/* base.c
 *
 * Copyright (C) 2005, 2006, 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Common functions.
   \author Stephane Germain <germste@gmail.com>
*/

/**
   \mainpage

   \section Description

   This is a library with functions to estimate the items and
   abilities from the responses of subjects to a questionnaire.

   The supported IRT (Item Response Theory) model are the Rasch model,
   the 2PLM (two parameter logistic model) and the 3PLM (three
   parameter logistic model). The multivariate logistic model and
   non parametric methods are also supported.

   The estimations methods available are the MMLE (Marginal Maximum
   Likelihood Estimator) and the BME (Bayes Modal Estimator)
   for the parametric estimation of items, the PMMLE (Penalized MMLE)
   and kernel smoothing for the non parametric estimation of items, and
   the ML (Maximum Likelihood) and the EAP (Expected A Posteriori)
   estimators for the abilities.

   Two command line programs called "irt" and "mirt" are also provided.

   \section Download

   See http://libirt.sf.net .

   \section References

   Baker, F.B. & Kim, S.-H. (2004). Item response theory:
   parameter estimation techniques. Second Edition. Dekker, New York.

   Ramsay, J.O. (1991). Kernel smoothing approaches to nonparametric item
   characteristic curve estimation. Biometrika, 56, 611-630.

   \todo A graded version of the PMMLE  with a common logit and
   an "intercept" for each boundary (with zero mean)

   \todo Different models on each item

   \todo DIF

   \todo Equating

   \todo Dimensionality test

   \todo Bidimensional latent variable

   \todo Continuous item

   \todo Threads
*/

#include "libirt.h"
#if HAVE_CONFIG_H
#  include <config.h>
#endif
#include <math.h>
#include <gsl/gsl_cdf.h>

/**
   \brief The version of the library.
*/
const char *libirt_version = VERSION;

/**
   \brief Control the verbosity level.

   By default it is 0 (silent).
*/
int libirt_verbose = 0;

/**
   \brief Compute the logit transformation.

   \return The logit of \em p.
*/
double
logit (double p)
{
  return log (p / (1 - p));
}

/**
   \brief Compute the logistic transformation.

   \return The logistic of \em t.
*/
double
logistic (double t)
{
  return 1 / (1 + exp (-t));
}

/**
   \brief Generate the quadrature classes.
   
   @param[in] nbr_quad The number of classes.
   @param[in] from The starting value for the middle points of the classes.
   @param[in] to The ending value for the middle points of the classes.
   @param[out] quad_points A vector(classes) with the middle points of
   each class.
   @param[out] quad_weights A vector(classes) with the normal weights of 
   each class.
*/
void
quadrature (int nbr_quad, double from, double to,
	    gsl_vector * quad_points, gsl_vector * quad_weights)
{
  int k;
  double step, x, cdf, oldcdf;

  /* the distance between two middle point */
  step = (to - from) / (nbr_quad - 1.0);

  /* the first point */
  x = from;
  /* no cumulative prob yet */
  oldcdf = 0;
  /* for each class except the last one */
  for (k = 0; k < nbr_quad - 1; k++)
    {
      /* the cumulative prob including this class */
      cdf = gsl_cdf_ugaussian_P (x + step / 2.0);
      /* the weight */
      gsl_vector_set (quad_weights, k, cdf - oldcdf);
      /* the middle point */
      gsl_vector_set (quad_points, k, x);
      /* save the old cumulative prob */
      oldcdf = cdf;
      x += step;
    }
  /* set the last class with a cumulative cdf of 1 */
  gsl_vector_set (quad_weights, k, 1 - oldcdf);
  gsl_vector_set (quad_points, k, x);
}

/**
  \brief Used in patterns_count to build a triary tree of the unique patterns.  

  Each leaf will be a unique pattern
*/
typedef struct
{
  /**  The number of subjects that went thru this node. */
  int count;

  /** Used to stored the position of this pattern (for leaf only). */
  int id;

  /** Pointer to the node with a failurw at the next item (for non leaf only). */
  void *false;

  /** Pointer to the node with a success at the next item (for non leaf only). */
  void *true;

  /** Pointer to the node with a non response at the next item (for non leaf only). */
  void *blank;

  /** Pointer to the next allocated node (used to free the nodes). */
  void *next;
} pattern_node;

/**
   \brief Group the identical patterns from \em subjects into \em patterns.

   @param[in] subjects A matrix(subjects x items) of binary responses.
   @param[out] patterns A matrix(patterns x items) with the uniques patterns.
   @param[out] counts A vector(patterns) with the count of each pattern.
   @param[out] index A vector(subjects) with the new position of each \em subjects
   in \em patterns.

   \warning This function allocate the memory for \em patterns, \em counts and \em index
*/
void
patterns_counts (gsl_matrix_int * subjects, gsl_vector_int ** index,
		 gsl_matrix_int ** patterns, gsl_vector ** counts)
{
  int nbr_subject, i, j, k, nbr_item, nbr_pattern;
  pattern_node *root, *cur, **next, **node_indx, *ptr;

  /* allocate the root of the tree */
  root = (pattern_node *) malloc (sizeof (pattern_node));
  root->false = NULL;
  root->true = NULL;
  root->blank = NULL;
  root->next = NULL;

  nbr_subject = subjects->size1;
  nbr_item = subjects->size2;

  *index = gsl_vector_int_alloc (nbr_subject);
  /* for each subject, node_indx will store a pointer
     to the node representing its pattern */
  node_indx =
    (pattern_node **) malloc (sizeof (pattern_node *) * nbr_subject);

  nbr_pattern = 0;

  /* used to create the list of allocated node */
  ptr = root;

  /* for each subject */
  for (j = 0; j < nbr_subject; j++)
    {
      /* start at the root of the tree */
      cur = root;

      /* for each item */
      for (i = 0; i < nbr_item; i++)
	{
	  /* branch to the corresponding node */
	  switch (gsl_matrix_int_get (subjects, j, i))
	    {
	    case TRUE:
	      next = (pattern_node **) & (cur->true);
	      break;
	    case FALSE:
	      next = (pattern_node **) & (cur->false);
	      break;
	    case BLANK:
	      next = (pattern_node **) & (cur->blank);
	      break;
	    }
	  /* if the node dont exists */
	  if (!*next)
	    /* create it */
	    {
	      *next = (pattern_node *) malloc (sizeof (pattern_node));
	      (*next)->true = NULL;
	      (*next)->false = NULL;
	      (*next)->blank = NULL;
	      (*next)->next = NULL;
	      (*next)->count = 1;
	      (*next)->id = -1;
	      ptr->next = *next;
	      ptr = *next;
	    }
	  else
	    /* else increment its count */
	    {
	      ((*next)->count)++;
	    }
	  cur = *next;
	}
      node_indx[j] = cur;
      if (cur->count == 1)
	{
	  nbr_pattern++;
	}
    }

  /* now copy the unique patterns and their counts */
  *patterns = gsl_matrix_int_alloc (nbr_pattern, nbr_item);
  *counts = gsl_vector_alloc (nbr_pattern);
  k = 0;
  /* for each subjects */
  for (j = 0; j < nbr_subject; j++)
    {
      /* if its the first subject to claims this pattern */
      if (node_indx[j]->id < 0)
	/* add the pattern to patterns and set its position */
	{
	  node_indx[j]->id = k;
	  gsl_vector_set (*counts, k, node_indx[j]->count);
	  for (i = 0; i < nbr_item; i++)
	    {
	      gsl_matrix_int_set (*patterns, k, i,
				  gsl_matrix_int_get (subjects, j, i));
	    }
	  k++;
	}
      /* set index */
      gsl_vector_int_set (*index, j, node_indx[j]->id);
    }

  /* free the nodes */
  while (root)
    {
      ptr = root->next;
      free (root);
      root = ptr;
    }

  free (node_indx);
}

/**
   \brief Compute the likelihood of each patterns.

   @param[in] patterns A matrix (patterns x items) of binary responses.
   @param[in] probs A matrix(items x classes) with response functions.
   @param[out] like A matrix(patterns x classes) with the likelihood of each pattern
   in each quadrature class.
   
   \warning The memory for \em likel should be allocated before.
*/
void
likelihood (gsl_matrix_int * patterns, gsl_matrix * probs,
	    gsl_matrix * like)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, resp;
  double prob, like_tmp;

  nbr_quad = probs->size2;
  nbr_pattern = patterns->size1;
  nbr_item = patterns->size2;

  /* for each pattern */
  for (j = 0; j < nbr_pattern; j++)
    {
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  like_tmp = 1;
	  /* for each item */
	  for (i = 0; i < nbr_item; i++)
	    {
	      /* get the response to item i in pattern j */
	      resp = gsl_matrix_int_get (patterns, j, i);
	      /* compute the prob of success to this item in this class */
	      prob = gsl_matrix_get (probs, i, k);
	      if (resp == BLANK)
		{
		  /* Adjust the likelihood for the missing data. */
		  like_tmp *= prob * prob + (1 - prob) * (1 - prob);
		}
	      else
		{
		  /* compute the likelihood of this response */
		  like_tmp *= resp ? prob : (1 - prob);
		}
	    }
	  gsl_matrix_set (like, j, k, like_tmp);
	}
    }
}

/**
   \brief Compute the likelihood of each multiple choice patterns.

   @param[in] patterns A matrix (patterns x options) of binary responses.
   @param[in] probs A matrix(options x classes) with response functions.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns.
   @param[out] like A matrix(patterns x classes) with the likelihood of each pattern
   in each quadrature class.
   
   \warning The memory for \em likel should be allocated before.
*/
void
likelihood_mc (gsl_matrix_int * patterns, gsl_matrix * probs,
	       gsl_vector_int * nbr_options, gsl_vector_int * items_pos, 
	       gsl_matrix * like)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, resp, pos, nbr_option, o;
  double prob, like_tmp, blank_tmp;

  nbr_quad = probs->size2;
  nbr_pattern = patterns->size1;
  nbr_item = nbr_options->size;

  /* for each pattern */
  for (j = 0; j < nbr_pattern; j++)
    {
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  like_tmp = 1;
	  /* for each item */
	  for (i = 0; i < nbr_item; i++)
	    {
	      nbr_option = gsl_vector_int_get(nbr_options, i);
	      pos = gsl_vector_int_get(items_pos, i);
	      blank_tmp = 0;
	      /* for each option */
	      for (o = 0; o < nbr_option; o++)
		{
		  /* get the response to item i in pattern j */
		  resp = gsl_matrix_int_get (patterns, j, pos+o);
		  /* compute the prob of success to this item in this class */
		  prob = gsl_matrix_get (probs, pos+o, k);
		  if (resp == BLANK)
		    {
		      /* Adjust the likelihood for the missing data. */
		      blank_tmp += prob * prob;
		    }
		  else if(resp == TRUE)
		    {
		      /* compute the likelihood of this response */
		      like_tmp *= prob;
		    }
		}
	      if(blank_tmp != 0) like_tmp *= blank_tmp;
	    }
	  gsl_matrix_set (like, j, k, like_tmp);
	}
    }
}

/**
   \brief Compute the posteriori probabilities.
   
   @param[in] patterns A matrix (patterns x items) of binary responses.
   @param[in] probs A matrix(items x classes) with response functions.
   @param[in] quad_weights A vector(classes) with the prior weights of each quadrature class.
   @param[out] post A matrix(patterns x classes) with the probability that a pattern
   is in a quadrature class.
   
   \warning The memory for \em post should be allocated before.
*/
void
posteriors (gsl_matrix_int * patterns, gsl_matrix * probs,
	    gsl_vector * quad_weights, gsl_matrix * post)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, resp;
  double prob, post_tmp, marg_tmp;

  nbr_quad = quad_weights->size;
  nbr_pattern = patterns->size1;
  nbr_item = patterns->size2;

  /* for each pattern */
  for (j = 0; j < nbr_pattern; j++)
    {
      marg_tmp = 0;
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  post_tmp = 1;
	  /* for each item */
	  for (i = 0; i < nbr_item; i++)
	    {
	      /* get the response to item i in pattern j */
	      resp = gsl_matrix_int_get (patterns, j, i);
	      /* compute the prob of success to this item in this class */
	      prob = gsl_matrix_get (probs, i, k);
	      if (resp == BLANK)
		{
		  /* Adjust the posteriori probability for the missing data. */
		  post_tmp *= prob * prob + (1 - prob) * (1 - prob);
		}
	      else
		{
		  /* compute the prob of this response */
		  post_tmp *= resp ? prob : (1 - prob);
		}
	    }
	  post_tmp *= gsl_vector_get (quad_weights, k);
	  gsl_matrix_set (post, j, k, post_tmp);

	  /* update the marginal */
	  marg_tmp += post_tmp;
	}
      /* divide by the marg */
      for (k = 0; k < nbr_quad; k++)
	{
	  gsl_matrix_set (post, j, k, gsl_matrix_get (post, j, k) / marg_tmp);
	}
    }
}

/**
   \brief Compute the posteriori probabilities for multiple choice patterns.
   
   @param[in] patterns A matrix (patterns x options) of binary responses.
   @param[in] probs A matrix(options x classes) with response functions.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns.
   @param[in] quad_weights A vector(classes) with the prior weights of each quadrature class.
   @param[out] post A matrix(patterns x classes) with the probability that a pattern
   is in a quadrature class.
   
   \warning The memory for \em post should be allocated before.
*/
void
posteriors_mc (gsl_matrix_int * patterns, gsl_matrix * probs,
	       gsl_vector_int * nbr_options, gsl_vector_int * items_pos, 
	       gsl_vector * quad_weights, gsl_matrix * post)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, resp,
    o, nbr_option, pos;
  double prob, post_tmp, marg_tmp, blank_tmp;

  nbr_quad = quad_weights->size;
  nbr_pattern = patterns->size1;
  nbr_item = nbr_options->size;

  /* for each pattern */
  for (j = 0; j < nbr_pattern; j++)
    {
      marg_tmp = 0;
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  post_tmp = 1;
	  /* for each item */
	  for (i = 0; i < nbr_item; i++)
	    {
	      nbr_option = gsl_vector_int_get(nbr_options, i);
	      pos = gsl_vector_int_get(items_pos, i);
	      blank_tmp = 0;
	      /* for each option */
	      for (o = 0; o < nbr_option; o++)
		{
		  /* get the response to option o in pattern j */
		  resp = gsl_matrix_int_get (patterns, j, pos+o);
		  /* compute the prob of success to this item in this class */
		  prob = gsl_matrix_get (probs, pos+o, k);
		  if (resp == BLANK)
		    {
		      /* Adjust the posteriori probability for the missing data. */
		      blank_tmp += prob * prob;
		    }
		  else if(resp == TRUE)
		    {
		      /* compute the prob of this response */
		      post_tmp *= prob;
		    }
		}
	      if(blank_tmp != 0) post_tmp *= blank_tmp;
	    }
	  post_tmp *= gsl_vector_get (quad_weights, k);
	  gsl_matrix_set (post, j, k, post_tmp);

	  /* update the marginal */
	  marg_tmp += post_tmp;
	}
      /* divide by the marg */
      for (k = 0; k < nbr_quad; k++)
	{
	  gsl_matrix_set (post, j, k, gsl_matrix_get (post, j, k) / marg_tmp);
	}
    }
}

/**
   \brief Compute the expected numbers of subjects.
   
   @param[in] patterns A matrix (patterns x items) of binary responses.
   @param[in] counts A vector(patterns) with the count of each pattern.
   @param[in] post A matrix(patterns x classes) with the probability that a pattern
   is in a quadrature class.
   @param[in] probs A matrix(items x classes) with response functions.
   Used to replace the missing data.
   @param[out] quad_sizes A vector(classes) with the expected number of
   subjects in the class.
   @param[out] quad_freqs A matrix(items x classes) with the expected number of
   subjects in the class having a success at the item.
   
   \warning The memory for \em quad_sizes and \em quad_freqs should be allocated before.
*/
void
frequencies (gsl_matrix_int * patterns, gsl_vector * counts,
	     gsl_matrix * post, gsl_matrix * probs,
	     gsl_vector * quad_sizes, gsl_matrix * quad_freqs)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, resp;
  double freq_tmp, size_tmp;

  nbr_quad = post->size2;
  nbr_pattern = patterns->size1;
  nbr_item = patterns->size2;

  /* for each class */
  for (k = 0; k < nbr_quad; k++)
    {
      /* for each item */
      for (i = 0; i < nbr_item; i++)
	{
	  size_tmp = 0;
	  freq_tmp = 0;
	  /* for each pattern */
	  for (j = 0; j < nbr_pattern; j++)
	    {
	      /* get the response to item i in pattern j */
	      resp = gsl_matrix_int_get (patterns, j, i);

	      if (resp == BLANK)
		{
		  /* adjust the expected frequency for the missing data. */
		  resp = gsl_matrix_get(probs, i, k);
		}

	      /* update the expected size and frequency */

	      /** \todo Recomputing the quad_size for each item is unecessary. */
	      size_tmp += (counts ? gsl_vector_get (counts, j) : 1)
		* gsl_matrix_get (post, j, k);

	      freq_tmp += (counts ? gsl_vector_get (counts, j) : 1) * resp 
		* gsl_matrix_get (post, j, k);
	    }
	  if (quad_sizes)
	    gsl_vector_set (quad_sizes, k, size_tmp);
	  if (quad_freqs)
	    gsl_matrix_set (quad_freqs, i, k, freq_tmp);
	}
    }
}

/**
   \brief Recompute the quadrature weights based on the observations.
   
   @param[in] nbr_subject the number of subjects.
   @param[in] quad_sizes A vector(classes) with the expected number of
   subjects in the class.
   @param[in,out] quad_points A vector(classes) with the middle points of
   each class.
   @param[out] quad_weights A vector(classes) with the prior weights of
   each quadrature class.
   
   \warning The memory for \em quad_weights should be allocated before.
*/
void
adjust_quad_weights (double nbr_subject, gsl_vector * quad_sizes, 
		     gsl_vector * quad_points, gsl_vector * quad_weights)
{
  int nbr_quad, k;
  double tot=0, mean=0, var=0, weight, tmp;

  nbr_quad = quad_sizes->size;

  /* correct for the number of subject anc compute the mean */
  for (k = 0; k < nbr_quad; k++)
    {
      weight = gsl_vector_get(quad_sizes, k) / nbr_subject;
      gsl_vector_set(quad_weights, k, weight);
      tot += weight;
      mean += weight * gsl_vector_get(quad_points, k);
    }
  mean /= tot; /* keep the sum to 1 */

  /* compute the variance */
  for (k = 0; k < nbr_quad; k++)
    {
      weight = gsl_vector_get(quad_weights, k);
      weight /= tot; /* keep the sum to 1 */
      gsl_vector_set(quad_weights, k, weight);
      tmp = gsl_vector_get(quad_points, k) - mean;
      tmp *= tmp;
      var += weight * tmp;
    }
  var = sqrt(var);

  /* standardize */
  /* \todo Center and rescale the weights without changing the points. */
  for (k = 0; k < nbr_quad; k++)
    {
      tmp = gsl_vector_get(quad_points, k);
      tmp -= mean;
      tmp /= var;
      gsl_vector_set(quad_points, k, tmp);
    }
}

/**
   \brief Check for degenerate items.

   For each items, check if it's almost either all success or
   all failures, and if so enable the ignore flag and set the prob
   accordingly.
   
   @param[in] patterns A matrix (patterns x items) of binary responses.
   @param[out] probs A matrix (items x classes) of response functions.
   @param[out] thresholds A vector(items) with the threshold parameters of each item.
   @param[out] ignore A vector (items) of flag.
   
   \return The number of items to ignore.

   \warning The memory for the outputs must be allocated before.
*/
int
set_ignore (gsl_matrix_int * patterns, gsl_matrix * probs, 
	    gsl_vector * thresholds, gsl_vector_int * ignore)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, nbr_true, nbr_false, nbr_blank, nbr_ignore;
  double pct, prob, thresh;

  nbr_pattern = patterns->size1;
  nbr_item = patterns->size2;

  nbr_ignore = 0;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      nbr_true = 0;
      nbr_false = 0;
      nbr_blank = 0;

      /* for each pattern except the first */
      for (j = 0; j < nbr_pattern; j++)
	{
	  /* count the number of true and false */
	  switch (gsl_matrix_int_get (patterns, j, i))
	    {
	    case TRUE:
	      nbr_true++;
	      break;
	    case FALSE:
	      nbr_false++;
	      break;
	    case BLANK:
	      nbr_blank++;
	      break;
	    }
	}

      pct = nbr_true / ((double)nbr_pattern - nbr_blank);

      if (pct <= VERY_SMALL_FREQ || pct >= 1-VERY_SMALL_FREQ)
	{
	  nbr_ignore++;

	  if (libirt_verbose > 2)
	    printf("Item %d has %10.5e%% success, ignoring it.\n", i+1, pct);

	  /* set the prob and thresh for the degenerate */
	  if (nbr_true > nbr_false)
	    {
	      prob = 1 - VERY_SMALL_PROB;
	      thresh = -10;
	    }
	  else
	    {
	      prob = VERY_SMALL_PROB;
	      thresh = 10;
	    }
	      
	  if (probs) 
	    {
	      nbr_quad = probs->size2;
	      /* for each class */
	      for (k = 0; k < nbr_quad; k++)
		gsl_matrix_set(probs, i, k, prob);
	    }

	  if (thresholds)
	    gsl_vector_set(thresholds, i, thresh);

	  /* set the ignore flag */
	  gsl_vector_int_set(ignore, i, 1);
	}
      else
	{
	  /* set the ignore flag */
	  gsl_vector_int_set(ignore, i, 0);
	}
    }

  return nbr_ignore;
}

/**
   \brief Check for degenerate multiple choice items.

   For each items, check if a option is all success,
   and if so enable the ignore flag and set the prob accordingly.
   
   @param[in] patterns A matrix (patterns x options) of binary responses.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns.
   @param[out] probs A matrix (options x classes) of option response functions.
   @param[out] thresholds A vector(options) with the threshold parameters of each logit.
   @param[out] ignore A vector (items) of flag.
   
   \return The number of items to ignore.

   \warning The memory for the outputs must be allocated before.
*/
int
set_ignore_mc (gsl_matrix_int * patterns,
	       gsl_vector_int * nbr_options, gsl_vector_int * items_pos,
	       gsl_matrix * probs, gsl_vector * thresholds, gsl_vector_int * ignore)
{
  int nbr_quad, nbr_pattern, nbr_item, i, j, k, o, win_option, 
    nbr_true, nbr_false, nbr_blank, nbr_ignore, pos, nbr_option;
  double pct, prob, thresh;

  nbr_pattern = patterns->size1;
  nbr_item = nbr_options->size;

  nbr_ignore = 0;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      gsl_vector_int_set(ignore, i, 0);
      pos = gsl_vector_int_get(items_pos, i);
      nbr_option = gsl_vector_int_get(nbr_options, i);

      /* for each option */
      for (o = 0; o < nbr_option; o++)
	{

	  nbr_true = 0;
	  nbr_false = 0;
	  nbr_blank = 0;

	  /* for each pattern */
	  for (j = 0; j < nbr_pattern; j++)
	    {
	      /* count the number of true and false */
	      switch (gsl_matrix_int_get (patterns, j, pos+o))
		{
		case TRUE:
		  nbr_true++;
		  break;
		case FALSE:
		  nbr_false++;
		  break;
		case BLANK:
		  nbr_blank++;
		  break;
		}
	    }

	  pct = nbr_true / ((double)nbr_pattern - nbr_blank);

	  if (pct >= 1-VERY_SMALL_FREQ)
	    {
	      /* set the ignore flag */
	      gsl_vector_int_set(ignore, i, 1);
	      win_option = o;
	      break;
	    }
	}

      if(gsl_vector_int_get(ignore, i))
	{
	  nbr_ignore++;

	  if (libirt_verbose > 2)
	    printf("Option %d of item %d has %10.5e%% success, ignoring the item.\n",
		   o+1, i+1, 100*pct);

	  /* set the prob and thresh for the most frequent option */
	  prob = 1 - VERY_SMALL_PROB;
	  thresh = 1;

	  if (probs) 
	    {
	      nbr_quad = probs->size2;
	      /* for each class */
	      for (k = 0; k < nbr_quad; k++)
		gsl_matrix_set(probs, pos+win_option, k, prob);
	    }

	  if (thresholds)
	    gsl_vector_set(thresholds, pos+win_option, thresh);

	  /* set prob and thresh for the rest */
	  for (o = 0; o < nbr_option; o++)
	    if(o != win_option)
	      {
		if (probs) 
		  {
		    nbr_quad = probs->size2;
		    /* for each class */
		    for (k = 0; k < nbr_quad; k++)
		      gsl_matrix_set(probs, pos+o, k, (1-prob)/(nbr_option-1));
		  }
		
		if (thresholds)
		  gsl_vector_set(thresholds, pos+o, -20*thresh/nbr_option);
	      }
	}
    }
      
  return nbr_ignore;
}

/**
   \brief Reset the probabilities to be inside the open interval (0,1).

   @param[in,out] probs A matrix (items x classes) of response functions.
   @param[in] eps The minimal distance allowed from 0 or 1.
*/
void
constrain_probs (gsl_matrix * probs, double eps)
{
  int nbr_quad, nbr_item, i, k;
  double prob;

  nbr_quad = probs->size2;
  nbr_item = probs->size1;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  prob = gsl_matrix_get(probs, i, k);
	  if (prob < eps) prob = eps;
	  if (prob > 1 - eps) prob = 1 - eps;
	  gsl_matrix_set(probs, i, k, prob);
	}      
    }
}

/**
   \brief Transform a matrix with multiple choice items into a matrix
   with binary items by creating a new "item"for each option.

   @param[in] patterns_mc A matrix(patterns x items) of multiple choice responses.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[out] patterns A matrix(patterns x options) of binary responses.
   @param[out] items_pos A vector(items) with the position of the first option of each item
   in patterns.

   \warning The memory for the outputs must be allocated before.
*/
void
patterns_expand (gsl_matrix_int * patterns_mc, gsl_vector_int * nbr_options, 
		 gsl_matrix_int * patterns, gsl_vector_int * items_pos)
{
  int i, j, k, pos, resp, nbr_option;
  int nbr_pattern = patterns_mc->size1;
  int nbr_item = patterns_mc->size2;

  gsl_matrix_int_set_all(patterns, FALSE);
  pos = 0;
  for (i = 0; i < nbr_item; i++)
    {
      nbr_option = gsl_vector_int_get(nbr_options, i);
      gsl_vector_int_set(items_pos, i, pos);
      for (j = 0; j < nbr_pattern; j++)
	{
	  resp = gsl_matrix_int_get(patterns_mc, j, i);
	  if (resp != BLANK && (resp < 1 || resp > nbr_option))
	    {
	      if (libirt_verbose > 0)
		printf("Item %d has an option out of range (%d), converting to blank.\n",
		       i+1, resp);
	      resp = BLANK;
	    }
	  if (resp != BLANK)
	    {
	      gsl_matrix_int_set(patterns, j, pos+resp-1, TRUE);
	    }
	  else
	    {
	      for(k = 0; k < nbr_option; k++)
		gsl_matrix_int_set(patterns, j, pos+k, BLANK);
	    }
	}
      pos += nbr_option;
    }
}

/**
   \brief Compute the items' characteristic curves from the options' characteristic curves.

   @param[in] probs A matrix (options x classes) of options' characteristic curves.
   @param[in] probs_stddev A matrix (options x classes) of occ standard error.
   @param[in] options_weights A vector (options) of options' weights.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item.
   @param[out] iccs A matrix (items+1 x classes) of items' characteristic curves
   and the test characteristic curve.
   @param[out] iccs_stddev A matrix (items+1 x classes) of icc standard error.

   \warning The memory for the outputs must be allocated before.
*/
void
icc_from_probs (gsl_matrix *probs, gsl_matrix *probs_stddev,
		gsl_vector *options_weights,
		gsl_vector_int *nbr_options, gsl_vector_int *items_pos,
		gsl_matrix *iccs, gsl_matrix *iccs_stddev)
{
  int nbr_item = nbr_options->size, nbr_quad = probs->size2,
    nbr_option, pos, i, k, o;
  double icc, var;

  for (i = 0; i < nbr_item; i++)
    {
      nbr_option = gsl_vector_int_get(nbr_options, i);
      pos = gsl_vector_int_get(items_pos, i);
      for (k = 0; k < nbr_quad; k++)
	{
	  icc = 0;
	  var = 0;
	  for (o = 0; o < nbr_option; o++)
	    {
	      icc += gsl_vector_get(options_weights, pos+o) 
		* gsl_matrix_get(probs, pos+o, k);
	      var += gsl_vector_get(options_weights, pos+o)
                * gsl_vector_get(options_weights, pos+o)
                * gsl_matrix_get(probs_stddev, pos+o, k)
		* gsl_matrix_get(probs_stddev, pos+o, k);
	    }
	  gsl_matrix_set(iccs, i, k, icc);
	  if (iccs_stddev) gsl_matrix_set(iccs_stddev, i, k, sqrt(var));
	}
    }

  /* compute the tcc */
  for (k = 0; k < nbr_quad; k++)
    {
      icc = 0;
      var = 0;
      for (i = 0; i < nbr_item; i++)
	{
	  icc += gsl_matrix_get(iccs, i, k);
	  var += gsl_matrix_get(iccs_stddev, i, k)
	    * gsl_matrix_get(iccs_stddev, i, k);
	}
      gsl_matrix_set(iccs, nbr_item, k, icc);
      if (iccs_stddev) gsl_matrix_set(iccs_stddev, nbr_item, k, sqrt(var));
    }
}

