/******************************************************************************/
/*                                                                            */
/*      ctk_HMM.cpp                                                           */
/*                                                                            */
/*      Support for HMMs                                                      */
/*                                                                            */
/*      Author: Jon Barker, Sheffield University                              */
/*                                                                            */
/*      CTK VERSION 1.3.5  Apr 22, 2007                              */
/*  */
/******************************************************************************/

#include "ctk-config.h"

#include <cmath>
#include <cassert>

#include <string>
#include <algorithm>
#include <numeric>
#include <map>
#include <functional>
#include <fstream>

#include "boost/smart_ptr.hpp"

#include "ctk_local.hh"
#include "ctk_error.hh"

#include "ctk_function_classes.hh"

#include "ctk_HMM_builder.hh"
#include "ctk_HMM_edit.hh"
#include "ctk_feature_vector.hh"

#include "ctk_dsp.hh"
#include "ctk_HMM.hh"

using boost::shared_ptr;

enum {HTK15_DEF_FILE, HTK20_DEF_FILE, HTK15_LIST_FILE, HTK20_LIST_FILE, HTK_LIST_FILE_ERROR};

// Convert a string to upper case
extern void make_upper(string &token);

static int global_next_mix_ID = 0;
static int global_next_state_ID = 0;

extern list<string> string_to_list_string(const string &string);

/******************************************************************************/

string get_filename(const string &fullfilename) {
  string name;
  unsigned int pos = fullfilename.rfind('/');
  if (pos==string::npos)
    name=fullfilename;
  else
    name=fullfilename.substr(pos+1);
  return name;
}

string get_filepath(const string &fullfilename) {
  string path;
  unsigned int pos = fullfilename.rfind('/');
  if (pos==string::npos)
    path=(".");
  else
    path=fullfilename.substr(0, pos);
  return path;
}



// Read the pronunciation dictionary  that maps HMM logical names onto sequences
// of HMM physical names
void read_dictionary(const string &file_name, map<string, list<string> > &dictionary) {
  
  char buffer[CTK_CHAR_BUFFER_SIZE];

  ifstream istr(file_name.c_str());
   
  if (istr==NULL) {
    cerr << "Cannot open dictionary file: " << file_name  << endl;
    throw(CTKError(__FILE__, __LINE__));
  }

  string word;
  while (istr>>word) {
    istr.getline(buffer,CTK_CHAR_BUFFER_SIZE,'\n');
    list<string> pronunciation = string_to_list_string(buffer);
    dictionary[word]=pronunciation;
  }
   
}


// Read the label file and construct the HMM logical name-> HMM output label map
void read_hmm_label_file(const string &a_file_name, map<string, string> &name_label_map) {
  
  // Note: Name -> Label is  a many-to-one mapping e.g. "one_male"->"1"  "one_female"->"1"
  // HMM names must be unique. But many HMMs can share the same output label.

  FILE *labelfile;
  
  if ((labelfile=fopen(a_file_name.c_str(), "r"))==NULL) {
    cerr << "Cannot open label file: " << a_file_name.c_str() << endl;
    throw(CTKError(__FILE__, __LINE__));
  }

  char line[255];
  
  while (fgets(line, 255, labelfile)!=NULL) {

    string label, a_name;

    // Get the first word on the line - i.e. the label
    char *tokenPtr = strtok(line, " ");
    if (tokenPtr==NULL) break;
    label=string(tokenPtr);
    label.erase(remove(label.begin(), label.end(), '\n'), label.end());  // strip carriage return char
    if (label.size()==0) break;

    // Get each of the model names that follow the label and add 'label/name' pairs to the map
    tokenPtr = strtok(NULL, " ");
    while (tokenPtr != NULL) {
      a_name=string(tokenPtr);
      a_name.erase(remove(a_name.begin(), a_name.end(), '\n'), a_name.end());  // strip carriage return char
      name_label_map.insert(make_pair(a_name, label));
      tokenPtr = strtok(NULL, " ");
    }

  }

}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixture			         	       	      */
/*									      */
/******************************************************************************/

HMMMixture::HMMMixture(const vector<HMMFloat> &amu, const vector<HMMFloat> &aivar): mu(amu), ivar(aivar) {

  use_marginals=use_delta_marginals=use_deltas=false;

  mix_ID = ++global_next_mix_ID;
  
#if defined (_DEBUG)
  assert(ivar.size());
  assert(mu.size());
#endif
    
  do_all_precomputation();

}

HMMMixture::~HMMMixture() {

}

void HMMMixture::set_missing_data_mask(shared_ptr<MaskVector> amask_vector) {
  mask_vector=amask_vector;
  mask_has_changed=true;
}

void HMMMixture::set_observed_data(shared_ptr<FeatureVector> afeature_vector) {
  feature_vector=afeature_vector;
  if (parallel_mixture.get()!=0) parallel_mixture->set_observed_data(feature_vector);
  data_has_changed=true;
  likelihoods_valid=marginals_valid=false;
}

void HMMMixture::set_parallel_mixture(shared_ptr<HMMMixture> aparallel_mixture) {
  parallel_mixture=aparallel_mixture;
  parallel_mixture->set_observed_data(feature_vector);
  parallel_mixture_has_changed=true;
}


// Calculate feature likelihoods - but skip computation if the data is fully missing ie. mask=0
void HMMMixture::calculate_masked_likelihood() const {

  vector<HMMFloat>::const_iterator mup=mu.begin(),
    ivarsm05p=ivarsm05.begin(),
    likconp=likcon.begin(),
    maskp=mask_vector->begin();

  likelihoods.resize(0);

  for (vector<HMMFloat>::const_iterator datap=feature_vector->begin(), data_end=feature_vector->end(use_deltas); datap!=data_end; ++datap, ++mup, ++ivarsm05p, ++likconp, ++maskp)
    likelihoods.push_back((*maskp!=0.0F)?calc_feature_likelihood(*datap, *mup, *ivarsm05p, *likconp):0.0F);

}


// Calculate all feature likelihoods (irrespective of mask)
void HMMMixture::calculate_full_likelihood() const {

  vector<HMMFloat>::const_iterator mup=mu.begin(),
    ivarsm05p=ivarsm05.begin(),
    likconp=likcon.begin();

  likelihoods.resize(0);

  for (vector<HMMFloat>::const_iterator datap=feature_vector->begin(), data_end=feature_vector->end(use_deltas); datap!=data_end; ++datap, ++mup, ++ivarsm05p, ++likconp)
    likelihoods.push_back(calc_feature_likelihood(*datap, *mup, *ivarsm05p, *likconp)); 

  likelihoods_valid=true;
}



// Calculate summed feature likelihoods for present features only
HMMFloat HMMMixture::accumulate_masked_likelihood() const {

  vector<HMMFloat>::const_iterator mup=mu.begin(),
    ivarsm05p=ivarsm05.begin(),
    likconp=likcon.begin();
  
  vector<HMMFloat>::const_iterator maskp=mask_vector->begin();

  HMMFloat total=0.0F;
  
 for (vector<HMMFloat>::const_iterator datap=feature_vector->begin(), data_end=feature_vector->end(use_deltas); datap!=data_end; ++datap, ++mup, ++ivarsm05p, ++likconp, ++maskp) {
    if (*maskp==1.0F) 
      total+=calc_feature_likelihood(*datap, *mup, *ivarsm05p, *likconp);
  }
  
  return total;
}



// Calculate feature marginals - but skip computation if the data is fully present ie. mask=1
void HMMMixture::calculate_masked_marginal() const {

  vector<HMMFloat>::const_iterator mup=mu.begin();
  vector<HMMFloat>::const_iterator sqrtivar05p=sqrtivar05.begin();
  vector<HMMFloat>::const_iterator erfmup=erfmu.begin();
  vector<HMMFloat>::const_iterator maskp=mask_vector->begin();
  vector<HMMFloat>::const_iterator lik0p=lik0.begin();
  
  vector<HMMFloat>::const_iterator marg_normp=feature_vector->marg_norm_begin();

  marginals.resize(0);
  
  for (vector<HMMFloat>::const_iterator lower_boundsp=feature_vector->lower_bounds_begin(), upper_boundsp=feature_vector->upper_bounds_begin(),upper_bounds_end=feature_vector->upper_bounds_end(false); upper_boundsp!=upper_bounds_end; ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++maskp, ++lik0p) 
    marginals.push_back((*maskp!=1.0F)?calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p):0.0F);

  if (use_delta_marginals) {
    
    int nfeatures = feature_vector->non_delta_size();

    vector<HMMFloat>::const_iterator ivarsm05p=ivarsm05.begin()+nfeatures,
      likconp=likcon.begin()+nfeatures;
    
    for (vector<HMMFloat>::const_iterator  lower_boundsp=feature_vector->lower_bounds_end(false), upper_boundsp=feature_vector->upper_bounds_end(false),upper_bounds_end=feature_vector->upper_bounds_end(true); upper_boundsp!=upper_bounds_end; ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++maskp, ++lik0p, ++ivarsm05p, ++likconp) {
      if (*upper_boundsp>*lower_boundsp)
	marginals.push_back((*maskp!=1.0F)?calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p):0.0F);
      else
	marginals.push_back((*maskp!=1.0F)?calc_feature_likelihood(*upper_boundsp, *mup, *ivarsm05p, *likconp):0.0F);
    }
    
  }

}

// Calculate all feature marginals (irrespective of mask)
void HMMMixture::calculate_full_marginal() const {

  vector<HMMFloat>::const_iterator mup=mu.begin();
  vector<HMMFloat>::const_iterator sqrtivar05p=sqrtivar05.begin();
  vector<HMMFloat>::const_iterator erfmup=erfmu.begin();
  vector<HMMFloat>::const_iterator lik0p=lik0.begin();
  
  vector<HMMFloat>::const_iterator marg_normp=feature_vector->marg_norm_begin();

  marginals.resize(0);
  
  for (vector<HMMFloat>::const_iterator lower_boundsp=feature_vector->lower_bounds_begin(), upper_boundsp=feature_vector->upper_bounds_begin(),upper_bounds_end=feature_vector->upper_bounds_end(false); upper_boundsp!=upper_bounds_end; ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++lik0p) 
    marginals.push_back(calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p));

  if (use_delta_marginals) {
    
    int nfeatures = feature_vector->non_delta_size();

    vector<HMMFloat>::const_iterator ivarsm05p=ivarsm05.begin()+nfeatures,
      likconp=likcon.begin()+nfeatures;
    
    for (vector<HMMFloat>::const_iterator  lower_boundsp=feature_vector->lower_bounds_end(false), upper_boundsp=feature_vector->upper_bounds_end(false),upper_bounds_end=feature_vector->upper_bounds_end(use_deltas); upper_boundsp!=upper_bounds_end; ++lower_boundsp, ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++lik0p, ++ivarsm05p, ++likconp) {
      if (*upper_boundsp>*lower_boundsp) {
	marginals.push_back(calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p));
      } else {
	marginals.push_back(calc_feature_likelihood(*upper_boundsp, *mup, *ivarsm05p, *likconp));
      }
    }
  }

  marginals_valid=true;

}


// Calculate summed marginals for missing features
HMMFloat HMMMixture::accumulate_masked_marginal() const {

  HMMFloat total=0.0F;

  vector<HMMFloat>::const_iterator marg_normp=feature_vector->marg_norm_begin();
  vector<HMMFloat>::const_iterator mup=mu.begin();
  vector<HMMFloat>::const_iterator sqrtivar05p=sqrtivar05.begin();
  vector<HMMFloat>::const_iterator erfmup=erfmu.begin();
  vector<HMMFloat>::const_iterator maskp=mask_vector->begin();
  vector<HMMFloat>::const_iterator lik0p=lik0.begin();

  if (!use_marginals) return 0.0;
  
  for (vector<HMMFloat>::const_iterator  lower_boundsp=feature_vector->lower_bounds_begin(), upper_boundsp=feature_vector->upper_bounds_begin(),upper_bounds_end=feature_vector->upper_bounds_end(false); upper_boundsp!=upper_bounds_end; ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++maskp, ++lik0p) {
    if (*maskp==0.0F) {
      total+=calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p);
    }
  }
  
  if (use_delta_marginals) {
    
    int nfeatures = feature_vector->non_delta_size();
  
    vector<HMMFloat>::const_iterator ivarsm05p=ivarsm05.begin()+nfeatures,
      likconp=likcon.begin()+nfeatures;
    
    for (vector<HMMFloat>::const_iterator  lower_boundsp=feature_vector->lower_bounds_end(false), upper_boundsp=feature_vector->upper_bounds_end(false),upper_bounds_end=feature_vector->upper_bounds_end(true); upper_boundsp!=upper_bounds_end; ++upper_boundsp, ++mup, ++sqrtivar05p, ++erfmup, ++marg_normp, ++maskp, ++lik0p, ++ivarsm05p, ++likconp) {
      if (*maskp==0.0F) {
	if (*upper_boundsp>*lower_boundsp)
	  total+=calc_feature_marginal(*lower_boundsp, *upper_boundsp, *mup, *sqrtivar05p, *erfmup, *marg_normp, *lik0p);
	else
	  total+=calc_feature_likelihood(*upper_boundsp, *mup, *ivarsm05p, *likconp);
      }
    }
    
  }

  
  return total;

}

// Takes the observed data to be observations of a masking source
// and calculates the expected value of this distribution given that
// it generates a value less than the masking level
vector<HMMFloat> HMMMixture::get_expected_values(const vector<bool> &mask) const {
  
  vector<HMMFloat> expected;

  vector<HMMFloat>::const_iterator mup=mu.begin();
  vector<HMMFloat>::const_iterator ivarp=ivar.begin();
  vector<bool>::const_iterator maskp=mask.begin();
  
  for (vector<HMMFloat>::const_iterator datap=feature_vector->begin(), data_end=feature_vector->end(use_deltas); datap!=data_end; ++datap, ++mup, ++ivarp, ++maskp) {
    HMMFloat x=0.0;
    if (*maskp) { // Only compute terms for which the mask is 0
      x=*datap-*mup;
      HMMFloat norm=2.0/(1.0+erft(x*sqrt(*ivarp)/sqrt(2.0)));
      x=-sqrt(2/M_PI) * sqrt(1.0/ *ivarp) * norm * exp(-x*x * *ivarp /2.0);
      x=x+*mup;
    
      if (x>*datap)
	cerr << *mup << " " << 1.0 / *ivarp << " "  << *datap << " " << norm << " "<< x << "\n";
      
     }
    expected.push_back(x);
  }
  
  //likelihoods.push_back(calc_feature_likelihood(*datap, *mup, *ivarsm05p, *likconp)); 

  return expected;
}


void HMMMixture::do_all_precomputation() {

  HMMFloat tmp_sqrtivar05, tmp_likcon, tmp_ivarsm05;

  ivarsm05.resize(0);
  sqrtivar.resize(0);
  sqrtivar05.resize(0);
  likcon.resize(0);
  lik0.resize(0);
  erfmu.resize(0);
  
  for (vector<HMMFloat>::iterator mup=mu.begin(), mup_end=mu.end(), ivarp=ivar.begin(); mup!=mup_end; ++mup, ++ivarp) {
    HMMFloat amu=*mup, aivar=*ivarp;
    
    ivarsm05.push_back(tmp_ivarsm05=(-0.5*aivar));
    sqrtivar.push_back(sqrt(aivar));
    sqrtivar05.push_back(tmp_sqrtivar05=sqrt(0.5*aivar));
    likcon.push_back(tmp_likcon=(-0.5 * log(2.0*M_PI/aivar)));     // Precomputed constant used in likelihood computation
    
    erfmu.push_back(erf(-amu*tmp_sqrtivar05));        // Guassian integral from -inf to 0
    lik0.push_back(amu*amu*tmp_ivarsm05+tmp_likcon);  // likelihood at x=0. 
  }
}


/***** The Erf Table *******************************/

HMMFloat *HMMMixture::erf_table = HMMMixture::make_erf_table();

const Float HMMMixture::ERF_TABLE_MAX = 3.60F;
const UInteger16 HMMMixture::ERF_TABLE_RESOLUTION = 1000;

HMMFloat *HMMMixture::make_erf_table() {
  UInteger16 n_entries=(UInteger16)(ERF_TABLE_MAX*ERF_TABLE_RESOLUTION)+1;
  HMMFloat *erf_data =  new HMMFloat[n_entries];
  for (UInteger16 i=0; i<n_entries; ++i) {
    erf_data[i]=erf((HMMFloat)i/ERF_TABLE_RESOLUTION);
  }
  return erf_data;
}

/***** The Log Table *******************************/

HMMFloat *HMMMixture::log_table_exponent = HMMMixture::make_log_table_exponent();
HMMFloat *HMMMixture::log_table_mantissa_8bit = HMMMixture::make_log_table_mantissa_8bit();
HMMFloat *HMMMixture::log_table_mantissa_10bit = HMMMixture::make_log_table_mantissa_10bit();

float *HMMMixture::make_log_table_exponent() {
  float *log_data =  new float[256];
  for (int i=0; i<256; ++i) 
    log_data[i]=log(pow(2.0,i-127));
  return log_data;
}

float *HMMMixture::make_log_table_mantissa_8bit() {
  float *log_data =  new float[256];
  for (int i=0; i<256; ++i) 
     log_data[i]=log(1.0+((float)(i)/256.0));
  return log_data;
}

float *HMMMixture::make_log_table_mantissa_10bit() {
  float *log_data =  new float[1024];
  for (int i=0; i<1024; ++i) 
     log_data[i]=log(1.0+((float)(i)/1024.0));
 return log_data;
}

// Friend function for writing HMMMixture class details
ostream& operator<< (ostream& out, const HMMMixture& x) {
  out << "~m \"mix" << x.get_ID() << "\"" << endl;
  out << "<Mean>" << x.mu.size() << endl;
  copy(x.mu.begin(), x.mu.end(), ostream_iterator<float>(out, " "));
  out << endl;
  out << "<Variance> " << x.ivar.size() << endl;
  transform(x.ivar.begin(), x.ivar.end(), ostream_iterator<float>(out, " "), bind1st(divides<double>(), 1.0));
  out << endl;
  //  out << "(Use_marginals = " << x.use_marginals << "; Use_delta_marginals = " << x.use_delta_marginals << "; use_deltas = " << x.use_deltas << ")" << endl;

  return out;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixtureStandard		              	       	      */
/*									      */
/******************************************************************************/

// Conventional Gaussian probability calculation.
HMMFloat HMMMixtureStandard::get_prob() const {

  if (get_data_has_changed()) { 
    calculate_full_likelihood();
    prob=accumulate(get_likelihoods_begin(), get_likelihoods_end(), 0.0F);
    clear_data_has_changed();
  }
  return prob;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixtureMD		              	       	      */
/*									      */
/******************************************************************************/

// MD probability calculation using marginalisation or bounded marginalisation over missing data
HMMFloat HMMMixtureMD::get_prob() const {

  if (get_data_has_changed() || get_mask_has_changed()) { 
     prob=0.0F;
     prob+=accumulate_masked_likelihood();
     prob+=accumulate_masked_marginal();
     clear_data_has_changed();
     clear_mask_has_changed();
  }

  return prob;
}


/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixtureMDSoft		              	       	      */
/*									      */
/******************************************************************************/

// MD probability calculation with soft masks

//  Note: BUG_2
//
// The method get_prob() can be compiled with a deliberate bug to simulate
// the behaviour of CTKv1.1.0. This is useful if running systems that have been tuned 
// under CTKv1.1.0 and hence were put out of tune when the bug was corrected
//

HMMFloat HMMMixtureMDSoft::get_prob() const {

  if (get_data_has_changed() || get_mask_has_changed()) {
    calculate_masked_likelihood();
    calculate_masked_marginal();

    const shared_ptr<MaskVector> mv=get_mask();
    vector<HMMFloat>::const_iterator wp, wp2, wp_end;
    vector<HMMFloat>::const_iterator lp = get_likelihoods_begin();
    vector<HMMFloat>::const_iterator mp = get_marginals_begin();
    vector<HMMFloat>::const_iterator lwp = mv->log_mask_begin();
    vector<HMMFloat>::const_iterator lomwp = mv->log_one_minus_mask_begin();
    prob=0.0F;

    // Static features
    bool deltas_with_bounds = get_use_deltas() && get_use_delta_marginals();

    // Accumulate probs for features. Note, delta features can't contribute unless they have bounds
    HMMFloat lik;
    
    for (wp=mv->begin(), wp2=wp_end=mv->end(deltas_with_bounds); wp!=wp_end; ++wp, ++lp, ++mp, ++lwp, ++lomwp, ++wp2) {
      HMMFloat w=*wp;

      lik=*lp;

#ifdef WITH_V1_1_0_BUGS
      if (*wp2) {lik/=2.0;} 
#endif

      if (w==0.0F)
	prob+=*mp;
      else if (w==1.0F)
	prob+=lik;
      else {
	prob+=logT8_add(lik+*lwp, *mp+*lomwp) ;
      }
    }

    // If delta features have no bounds then they can only contribute likelihoods and only when they are
    // fully present i.e. where the mask contains a 1.0. 
    
    if (!deltas_with_bounds) {
      // Delta features - use discrete mask and likelihoods only
      if (get_use_deltas()) {
	for (wp2=mv->begin(),wp_end=mv->end(true); wp!=wp_end; ++wp, ++lp, ++wp2) {
	  if (*wp==1.0F) {
#ifdef WITH_V1_1_0_BUGS
	    prob+=((*wp2==1.0)?*lp/2.0:*lp);
#else
	    prob+=*lp;
#endif
	  }
	}
      }
    }
    
    
    clear_data_has_changed();
    clear_mask_has_changed();
  }


  return prob;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixturePMC		              	       	      */
/*									      */
/******************************************************************************/

// MD probability calculation for PMC system with single state gaussian noise model

HMMFloat HMMMixturePMC::get_prob() const {

  //  const float log05 = -0.30103;   // log10(0.5)
  //  const float PRIOR_SCALE = 0.5;
  
  if (get_data_has_changed() || get_parallel_mixture_has_changed()) {
    calculate_full_likelihood();
    calculate_full_marginal();
 
    
    vector<HMMFloat>::const_iterator slp = get_likelihoods_begin();
    vector<HMMFloat>::const_iterator nlp = get_parallel_mixture()->get_likelihoods_begin();
    vector<HMMFloat>::const_iterator smp = get_marginals_begin();
    vector<HMMFloat>::const_iterator nmp = get_parallel_mixture()->get_marginals_begin();
    prob=0.0F;
    
    // Accumulate probs for features. Note, delta features do not contribute

    bool use_deltas=get_use_deltas();

    if (!use_deltas) {
      for (vector<HMMFloat>::const_iterator datap=get_feature_vector()->begin(), data_end=get_feature_vector()->end(use_deltas); datap!=data_end; ++nlp, ++nmp, ++slp, ++smp, ++datap) {
	prob+=logT8_add(*slp+*nmp, *smp+ *nlp) ;  
      }
    } else {

      const shared_ptr<MaskVector> mv=get_mask();

      //
      // The commented out sections below come from a more sophisticated version of the code where I was
      // trying to use the mask as an input that would bias interpretation towards one model or the other.
      // i.e. as a way of encorporating prior belief. I've taken it out for now.
      
      //      vector<HMMFloat>::const_iterator lm=mv->log_mask_begin();
      //      vector<HMMFloat>::const_iterator lomm=mv->log_one_minus_mask_begin();

      for (vector<HMMFloat>::const_iterator datap=get_feature_vector()->begin(), data_end=get_feature_vector()->end(false); datap!=data_end; ++nlp, ++nmp, ++slp, ++smp,  ++datap) { //++lm, ++lomm,
	prob+=logT8_add(*slp+*nmp, *smp+ *nlp) ;  
	//	prob+=logT8_add(*slp+*nmp+PRIOR_SCALE * (*lm-log05), *smp+ *nlp+ PRIOR_SCALE * (*lomm-log05)) ;  // JON
      }

      vector<HMMFloat>::const_iterator smp = get_marginals_begin();
      vector<HMMFloat>::const_iterator nmp = get_parallel_mixture()->get_marginals_begin();
      
      for (vector<HMMFloat>::const_iterator datap=get_feature_vector()->begin(), data_end=get_feature_vector()->end(false); datap!=data_end; ++nlp, ++nmp, ++slp, ++smp, ++datap) {// ++lm, ++lomm,
	  prob+=logT8_add(*slp+*nmp, *smp+ *nlp) ; 
	  //	  prob+=logT8_add(*slp+*nmp+PRIOR_SCALE * (*lm-log05), *smp+ *nlp+PRIOR_SCALE * (*lomm-log05)) ;  // JON
      }
      
    }
    
    
    clear_data_has_changed();
    clear_parallel_mixture_has_changed();

  }

  //  cerr << "PROB=" << prob << "\n"; // JON
  
  return prob;

  
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixtureMultisource	              	       	      */
/*									      */
/******************************************************************************/

// MD probability calculation for Multisource decoding.
// i.e. discrete masks applied as in MDBounds, but deltas have to be treated differently


HMMFloat HMMMixtureMultisource::get_prob() const {


  //
  // This code is still under development ... it is still uncertain how best to handle delta features.
  //
  //
  
  if (get_data_has_changed()) {
    calculate_full_likelihood();
    calculate_full_marginal();
  }

  if (get_data_has_changed() || get_mask_has_changed()) { 
    
    const shared_ptr<MaskVector> mv=get_mask();
    vector<HMMFloat>::const_iterator maskp1, maskp2, maskp_end;
    vector<HMMFloat>::const_iterator lp = get_likelihoods_begin();
    vector<HMMFloat>::const_iterator mp = get_marginals_begin();

    prob=0.0F;

    // Accumulate probability for static features
    // - if mask value is 0 use the marginal, else use the likelihood
    if (get_use_deltas()) {

      if (get_use_delta_marginals()) {
	for (maskp1=mv->begin(), maskp_end=mv->end(true); maskp1!=maskp_end; ++maskp1, ++lp, ++mp) {
	  prob+= (*maskp1==0.0F)?*mp:*lp;
	}
      } else { 
	// Deltas .... little bit complicated because non-deltas use likelihoods and marginals, deltas use likelihoods only
	// If a feature and its delta are both present then the likelihood term is the average of both
	
	
   	
	// Calculate weights for likelihood terms, so that 0.5(delta+non_delta) is used when both are present
	vector<HMMFloat> weights(mv->size(), 0.0);
	
	vector<HMMFloat>::iterator weightp1, weightp2;
	
	weightp1=weights.begin();
	weightp2=weights.begin()+mv->non_delta_size();
	
	for (maskp1=mv->begin(), maskp_end=maskp2=mv->end(false); maskp1!=maskp_end; ++maskp1, ++maskp2, ++weightp1, ++weightp2) {
	  if ((*maskp1==1.0)&&(*maskp2==1.0))
	    {*weightp1=0.5; *weightp2=0.5;}
	  else if (*maskp1==1.0)
	    *weightp1=1.0;
	  else if (*maskp2==1.0)
	    *weightp2=1.0;
	}
	
	// Non deltas - marginal and weighted likelihood
	for (maskp1=mv->begin(), maskp_end=mv->end(false), weightp1=weights.begin(); maskp1!=maskp_end; ++maskp1, ++lp, ++mp, ++weightp1) {
	  prob+= (*maskp1==0.0F)?*mp:*lp * *weightp1;
	}
	
	// deltas - weighted likelihood only
	for (maskp_end=mv->end(true); maskp1!=maskp_end; ++maskp1, ++lp, ++mp, ++weightp1) {
	  prob+= (*maskp1==0.0F)? 0.0:*lp * *weightp1;
	}
	
       
      }
      
    } else {
      // No Deltas 
      for (maskp1=mv->begin(), maskp_end=mv->end(false); maskp1!=maskp_end; ++maskp1, ++lp, ++mp) {
	prob+= (*maskp1==0.0F)?*mp:*lp;
      }
    }
    
    clear_data_has_changed();
    clear_mask_has_changed();
  }

  return prob;
}



/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMMixtureMultisourceSoft	              	       	      */
/*									      */
/******************************************************************************/

// MD probability calculation for Multisource decoding with soft mask.


HMMFloat HMMMixtureMultisourceSoft::get_prob() const {

  if (get_data_has_changed()) {
    calculate_full_likelihood();
    calculate_full_marginal();
  }

  if (get_data_has_changed() || get_mask_has_changed()) { 
    
    // Accumulate probability for static features
    // - if mask value is 0 use the marginal, else use the likelihood
    if (get_use_deltas()) {
      cerr << "Delta features not yet implemented for MultisourceSoft\n";
      throw(CTKError(__FILE__, __LINE__));
    } 
    
    const shared_ptr<MaskVector> mv=get_mask();
    vector<HMMFloat>::const_iterator wp, wp_end;
    vector<HMMFloat>::const_iterator lp = get_likelihoods_begin();
    vector<HMMFloat>::const_iterator mp = get_marginals_begin();
    vector<HMMFloat>::const_iterator lwp = mv->log_mask_begin();
    vector<HMMFloat>::const_iterator lomwp = mv->log_one_minus_mask_begin();
    prob=0.0F;
    
    // Static features
    for (wp=mv->begin(), wp_end=mv->end(false); wp!=wp_end; ++wp, ++lp, ++mp, ++lwp, ++lomwp) {
      HMMFloat w=*wp;
      if (w==0.0F)
	prob+=*mp;
      else if (w==1.0F)
	prob+=*lp;
      else {
	prob+=logT8_add(*lp+*lwp, *mp+*lomwp) ;
      }
    }
  
    clear_data_has_changed();
    clear_mask_has_changed();
    
  } 
  
  return prob;
}




/******************************************************************************/
/*									      */
/*	CLASS NAME: HMMState			         	       	      */
/*									      */
/******************************************************************************/


HMMState::HMMState(): mixture(), mix_weights(), voicing(0.0), max_duration(0) {
  // Construct a state with 0 gaussian mixtures - this acts as a non-emitting state
  num_mixes=mixture.size();

  prob=-numeric_limits<float>::max();
  max_mixture=-1;
  
  state_ID = ++global_next_state_ID;
} 

HMMState::HMMState(const vector<HMMMixture*> &amixture, const vector<HMMFloat> &amix_weights, float avoicing, int amax_duration): mixture(amixture), mix_weights(amix_weights), voicing(avoicing), max_duration(amax_duration) {

  num_mixes=mixture.size();

  prob=-numeric_limits<float>::max();
  max_mixture=-1;
  
  state_ID = ++global_next_state_ID;
} 

HMMState::~HMMState() {
}


Integer HMMState::get_num_mixes() const {
  return num_mixes;
} 

bool HMMState::emits() const {
  return num_mixes>0;
}

// Calculate state probability based on component mixture probabilities
void HMMState::calc_prob(bool max_approx_param) const {

  prob=0.0;
  if (emits()==false) return;

  prob=-1e40; // very large negative number
  //  prob= mixture[0]->get_prob() + mix_weights[0];
  
  if (!max_approx_param) {
    for (Integer m=0; m<get_num_mixes(); ++m) {
      //      cerr << "LOG ADD " << prob << " " << mixture[m]->get_prob()+mix_weights[m];
      if (mixture[m]!=NULL)
	prob=log_add(prob, mixture[m]->get_prob()+mix_weights[m]);
      //      cerr << " " << prob << "\n";
    }
    
  } else {
    HMMFloat p;
    for (Integer m=0; m<get_num_mixes(); ++m) {
      if (mixture[m]!=NULL) {
	if ((p=(mixture[m]->get_prob()+mix_weights[m]))>prob) {
	  prob=p;
	}
      }
    }
  }

}


// Return last calculated probability
HMMFloat HMMState::get_prob() const {return prob;}

// Find the max likelihood mixture and store index in max_mixture
void HMMState::store_max_mixture() const {

  max_mixture=0;
  HMMFloat p = -1e40, newp;
  
  if (emits()==true) {
    
    for (Integer m=0; m<get_num_mixes(); ++m) {
      if (mixture[m]!=NULL) {
	if ((newp=mixture[m]->get_prob())>p) {
	  p=newp;
	  max_mixture=m;  
	}
      }
    }
  }
  
}
  
bool HMMState::operator==(HMMState x) {
  if (voicing!=x.voicing) return false;
  if (max_duration!=x.max_duration) return false;
  if (num_mixes!=x.num_mixes) return false;
  if (!equal(mixture.begin(), mixture.end(), x.mixture.begin())) return false;
  if (!equal(mix_weights.begin(), mix_weights.end(), x.mix_weights.begin())) return false;
  return true; // if nothing is different then, finally, return true;
}

// Friend function for writing HMMMixture class details
ostream& operator<< (ostream& out, const HMMState& x) {
  out << "~s \"state" << x.get_ID() << "\"" << endl;
  if (x.max_duration!=0)
    out << "<MaxDuration> " << x.max_duration << endl;
  out << "<NumMixes> " << x.num_mixes << endl;
  for (int i=0; i<x.num_mixes; ++i) {
    if (x.mixture[i]!=NULL) {
      out << "<Mixture> " << i+1 << " " << exp(x.mix_weights[i]) << endl;
      out << "~m \"mix" << x.mixture[i]->get_ID() << "\"" << endl ;
    }
  }
  return out;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: Transitions		                      	       	      */
/*									      */
/******************************************************************************/

Transitions::Transitions(int n) {
  resize(n);
}

HMMFloat Transitions::get_transition_prob(unsigned int from, unsigned int to) const {
  return probabilities[from][to];
}

vector<HMMFloat> Transitions::get_row(unsigned int row) const {
  return probabilities[row];
}

CTKStatus Transitions::add_transition(unsigned int from, unsigned int to, HMMFloat prob) {
  if (from>=probabilities.size() || to>=probabilities[from].size())
    return CTK_FAILURE;
  probabilities[from][to]=prob;
  return CTK_SUCCESS;
}

void Transitions::clear() {
  resize(0);
}

void Transitions::resize(int n) {
  probabilities.resize(n);
  for (int i=0; i<n; ++i)
    probabilities[i].resize(n,0.0);
}

int Transitions::size() const {
  return probabilities.size();
}

// Normalise so that exits probs from each state sum to 1.0
void Transitions::normalise() {

  int trans_size = probabilities.size();

  for (int i=0; i<trans_size; ++i) {
    vector<HMMFloat> &row=probabilities[i];
    HMMFloat total=accumulate(row.begin(), row.end(), 0.0);

    if (total!=0.0)
      transform(row.begin(), row.end(), row.begin(), bind2nd(divides<HMMFloat>(), total));

  }  

}



vector<unsigned int> Transitions::rationalise() {

  vector<unsigned int> dead_states;
  bool from_state_deleted=true;
  bool to_state_deleted=true;

  while (from_state_deleted || to_state_deleted) {
    from_state_deleted=false;
    for (unsigned int j=1; j<probabilities.size()-1; ++j) {
      if (find(dead_states.begin(), dead_states.end(), j)==dead_states.end()) {
	float prob=0.0;
	for (unsigned int i=0; i<probabilities.size(); ++i) {
	  if (i==j) continue;
	  prob+=probabilities[i][j];
	}
	if (prob<numeric_limits<float>::min()) {
	  dead_states.push_back(j);
	  from_state_deleted=true;
	  for (unsigned int k=0; k<probabilities.size(); ++k) 
	    probabilities[j][k]=0.0;
	  break;
	}
      }
    }
  
    to_state_deleted=false;
    for (unsigned int i=1; i<probabilities.size()-1; ++i) {
      if (find(dead_states.begin(), dead_states.end(), i)==dead_states.end()) {
	float prob=0.0;
	for (unsigned int j=0; j<probabilities.size(); ++j) {
	  if (i==j) continue;
	  prob+=probabilities[i][j];
	}
	if (prob<numeric_limits<float>::min()) {
	  dead_states.push_back(i);
	  to_state_deleted=true;
	  for (unsigned int k=0; k<probabilities.size(); ++k) 
	    probabilities[k][i]=0.0;
	  break;
	}
      }
    }
  }

  multi_erase(probabilities, dead_states);
  for (unsigned int i=0; i<probabilities.size(); ++i)
    multi_erase(probabilities[i], dead_states);

  return dead_states;
}

CTKStatus Transitions::validate() const {
  // Check all rows of transition matrix 'trans' sum to 1.0 and contain only +ve numbers

  vector<vector<HMMFloat> >::const_iterator tpp;
  for (tpp = probabilities.begin(); tpp!=probabilities.end()-1; ++tpp) {
    if (fabs(accumulate(tpp->begin(), tpp->end(), 0.0)-1.0)>1.0e-6) {
      cerr << "HMM FILE ERROR: State transitions probabilities do not sum to one." << endl;
      return CTK_FAILURE;
    }
    if (find_if(tpp->begin(), tpp->end(), bind2nd(less<HMMFloat>(), 0.0))!=tpp->end()) {
      cerr << "HMM FILE ERROR: -ve values in transition probability matrix" << endl;
      return CTK_FAILURE;
    }
  }

  // Last row (transitions from 'end' state) should all be 0.0
  if (accumulate(tpp->begin(), tpp->end(), 0.0)!=0.0) return CTK_FAILURE;

  return CTK_SUCCESS;
}

ostream& operator<< (ostream& out, const Transitions& x) {

  int trans_size = x.probabilities.size();

  out << "<TransP> " << trans_size << endl;
  for (int i=0; i<trans_size; ++i) {
    vector<HMMFloat> row=x.get_row(i);

    copy(row.begin(), row.end(), ostream_iterator<HMMFloat>(out, " "));

    out << endl;
  }
  
  return out;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: HMM			                 	       	      */
/*									      */
/******************************************************************************/


HMM::HMM(const vector<HMMState*> &astates, const Transitions &atrans, int avec_size, const string &aname): states(astates), pTrans(atrans), name(aname), num_states(states.size()), vec_size(avec_size) {}

HMM::HMM(const vector<HMMState*> &astates, const Transitions &atrans, int avec_size, const string &aname, const string &filename): ReadOnceFile(filename), states(astates), pTrans(atrans), name(aname), num_states(states.size()), vec_size(avec_size) {}

HMM::~HMM(){
};

Integer HMM::get_num_states() const {
  return num_states;
}

HMMState* HMM::get_state(Integer state_no) const {
  return (state_no<num_states)?states[state_no]:NULL;
}


// Friend function for writing HMMMixture class details
ostream& operator<< (ostream& out, const HMM& x) {
  out << "~h \"" << x.name << "\"" << endl;
  out << "<BeginHMM>" << endl;
  out << "<NumStates> " << x.get_num_states() + 2 << " <StreamInfo> 1 " << x.get_vec_size() << " <VecSize> " << x.get_vec_size() << endl;
  for (int i=0; i<x.num_states; ++i) {
    out << "<State> " << i+2 << endl;
    out << "~s \"state" << x.states[i]->get_ID() << "\"" << endl;
  }
  out << x.pTrans << endl;
  out << "<EndHMM>" << endl;

  return out;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: SetOfHMMs		                 	       	      */
/*									      */
/******************************************************************************/

extern string expand_environment_variables(const string &word);

SetOfHMMs::SetOfHMMs(string a_filename, const string &owner_blockname, map<string, string> &a_name_label_map, map<string, list<string> > &a_dictionary, HMMMixture *amixture_prototype): ReadOnceFile(a_filename), name_label_map(a_name_label_map), dictionary(a_dictionary), mixture_prototype(amixture_prototype) {

  error_status=CTK_SUCCESS;
  
  char buffer[MAX_STRING_SIZE+1];
  
  sprintf(read_word_format_string,"%%%ds",MAX_STRING_SIZE);
 
  if (ro_fp()==NULL) return;

  HMM_list.resize(0);

  hmmpath=get_filepath(a_filename);
  

  int hmm_file_type = detect_HMM_file_type(ro_fp());
  
  if (hmm_file_type==HTK20_DEF_FILE) {
    // HTK_VERSION 2.0 onwards

    while (read_macro(ro_fp()));
    
  } else if (hmm_file_type==HTK20_LIST_FILE) {

    char buffer1[MAX_STRING_SIZE+1];
    while (fscanf(ro_fp(), read_word_format_string ,buffer1)==1) {
      
      string hmmfullfilename=expand_environment_variables(buffer1);
      FILE *fp = fopen(hmmfullfilename.c_str(), "r");
      if (fp!=NULL) {
	while(read_macro(fp));
	fclose(fp);
      } else {
	error_status=CTK_FAILURE;
	break;
      }
    }
    
  } else if (hmm_file_type==HTK15_DEF_FILE) {
    // HTK_VERSION 1.5
    
    fscanf(ro_fp(), read_word_format_string , buffer); // strip of #!MMF!# line
    
    HMMBuilder hmm_builder(this);
      
    while (fscanf(ro_fp(), read_word_format_string, buffer)==1) {
      
      string name=string(buffer);
      if (name==".") continue;
      
      // Remove quotes
      name.erase(remove(name.begin(), name.end(), '\"'), name.end());      
	
      //      HMM *hmmp = new HMM(this, ro_fp(), name, label);
      HMM *hmmp = hmm_builder.getHMMObject(ro_fp(), name);
      
      store_HMM(hmmp);
    }
  } else if (hmm_file_type==HTK15_LIST_FILE) {

    rewind(ro_fp());
    
    char line[MAX_STRING_SIZE+1];
    char buffer1[MAX_STRING_SIZE+1], buffer2[MAX_STRING_SIZE+1];
    while (fgets(line, MAX_STRING_SIZE, ro_fp())!=NULL) {
      int nwords = sscanf(line,"%s %s", buffer1, buffer2);
      if (nwords<1) continue;
      HMM *hmmp;
      
      string hmmfullfilename=expand_environment_variables(buffer1);
      hmmpath=get_filepath(hmmfullfilename); // i.e. path without the name
      
      string name;
      if (nwords==2) 
	name=string(buffer2);
      else {
	// If 'name' is not supplied then use the file name (minus the file path)
	name=get_filename(hmmfullfilename);  
	
      }
      
      if ((hmmp=(HMM*)file_table_fetch_entry(hmmfullfilename, owner_blockname))==NULL) {
	HMMBuilder hmm_builder(this);
	hmmp = hmm_builder.getHMMObject(hmmfullfilename, name);
	file_table_add_entry(hmmp, hmmfullfilename, owner_blockname);
      } 
      
      store_HMM(hmmp);      
    }
  } else if (hmm_file_type==HTK_LIST_FILE_ERROR) {
    cerr << "Error reading HMM file.\n";
    error_status=CTK_FAILURE;
  }

  fclose(ro_fp());

  if (error_status==CTK_FAILURE) return;

  if (name_label_map.size()==0 && dictionary.size()!=0)
    make_default_name_label_map_from_dictionary();

  if (name_label_map.size()==0)
    make_default_name_label_map_from_HMMs();
  
  if (dictionary.size()==0)
    make_default_dictionary();

  vector<HMM *> new_HMM_list = make_HMM_list_from_dictionary();

  //  cerr << "HMM_list size = " << HMM_list.size() << "\n";
  //  cerr << "HERE: new_HMM_list size = " << new_HMM_list.size() << "\n";
  replace_HMM_list(new_HMM_list);
  //  cerr << "HMM_list size = " << HMM_list.size() << "\n";

  
  /* Output dictionary and name_label_map for debugging
  cerr << "DICTIONARY:\n";
  for (map<string, list<string> >::const_iterator dp=dictionary.begin(); dp!=dictionary.end(); ++dp) {
    cerr << dp->first << " " << dp->second.size() << "\n";
  }

  cerr << "\nNAME->LABEL\n";
  for (map<string, string>::const_iterator mp=name_label_map.begin(); mp!=name_label_map.end(); ++mp) {
    cerr << mp->first << " -> " << mp->second << "\n";
  }
  */
  
  // Make list of possible grammar unit names and possible output labels
  make_name_and_label_lists();
  
  // remove duplicate states and mixtures from state and mixture lists
  remove_duplicates(state_list);
  remove_duplicates(mixture_list);

  
}

ostream& operator<< (ostream& out, const SetOfHMMs& x) {

  // Write global variable line
  out << "~o <VECSIZE> " << x.get_vec_size() << " <DIAGC> <NULLD> <USER>" << endl;

  // Write mixtures
  for (unsigned int i=0; i<x.mixture_list.size(); i++)
    out << *x.mixture_list[i];

  // Write states
  for (unsigned int i=0; i<x.state_list.size(); i++)
    out << *x.state_list[i];

  // Write HMMs
  for (unsigned int i=0; i<x.HMM_list.size(); i++)
    out << *x.HMM_list[i];

  return out;

}


// Construct a sorted list of unique output labels and grammar unit names
void SetOfHMMs::make_name_and_label_lists() {
  
  labels.resize(0);
  names.resize(0);

  for (map<string, list<string> >::const_iterator dp=dictionary.begin(); dp!=dictionary.end(); ++dp) {
    names.push_back(dp->first);
    labels.push_back(name_label_map[dp->first]);
  }

  // Remove duplicates from label list
  remove_duplicates(names);
  remove_duplicates(labels);
}


// Construct default pronunciation dictory:
// Default dictionary maps every model logical name onto a single physical model of
// the same name
void SetOfHMMs::make_default_dictionary() {

  dictionary.clear();
  for (map<string, string>::const_iterator mp=name_label_map.begin(); mp!=name_label_map.end(); ++mp) {
    string logical = mp->first;
    list<string> physical;
    physical.push_back(logical);
    dictionary[logical]=physical;
  }

}

// Make a list of all HMM used in the dictionary
vector<HMM *> SetOfHMMs::make_HMM_list_from_dictionary() {
  vector<HMM *> hmm_list;
  
  for (map<string, list<string> >::const_iterator mp=dictionary.begin(); mp!=dictionary.end(); ++mp) {
    const list<string> &hmmnames = mp->second;
    for (list<string>::const_iterator np=hmmnames.begin(); np!=hmmnames.end(); ++np) {
      HMM *hmm = get_HMM_by_name(*np);
      if (hmm!=NULL) hmm_list.push_back(hmm);
    }
  }
  remove_duplicates(hmm_list);

  return hmm_list;
}

// Construct default name label map from dictionary if it exists
void SetOfHMMs::make_default_name_label_map_from_dictionary() {
  name_label_map.clear();
  for (map<string, list<string> >::const_iterator dp=dictionary.begin(); dp!=dictionary.end(); ++dp) {
    string logical = dp->first;
    name_label_map[logical]=logical;
  }
}

// Construct default name label map from list of HMM names
void SetOfHMMs::make_default_name_label_map_from_HMMs() {
  name_label_map.clear();
  for (HMMConstIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp) {
    string logical = (*hmmpp)->getName();
    name_label_map[logical]=logical;
  }
}

const vector<string> &SetOfHMMs::get_label_list() const {return labels;}
const vector<string> &SetOfHMMs::get_name_list() const {return names;}

// Store pointers to newly built HMM and its states and mixtures 
void SetOfHMMs::store_HMM(HMM *hmmp) {
  HMM_list.push_back(hmmp);
  Integer num_states=hmmp->get_num_states();
  
  for (int s=0; s<num_states; ++s) {
    HMMState* statep=hmmp->get_state(s);
    state_list.push_back(statep);
    int num_mixes = statep->get_num_mixes();
    for (int m=0; m<num_mixes; ++m) {
      mixture_list.push_back(statep->get_mixture(m));
    }
  }

  // Make sure that there are no duplicates  // JON !!
  remove_duplicates(state_list);
  remove_duplicates(mixture_list);
  
}

SetOfHMMs::~SetOfHMMs() {
  //  cerr << "Destroying SetOfHMMs." << endl;

  delete mixture_prototype;

  // Note - the individual HMMs in HMM_list are not deleted here.
  // They will only be deleted when the new SetOfHMMs tried to load them.
  // This is part of the ReadOnceFile persistence mechanism
  
}

const HMM *SetOfHMMs::get_HMM_by_name(const string &name) const {
  for (unsigned int i=0; i<HMM_list.size(); ++i) {
    if (HMM_list[i]->getName()==name)
      return HMM_list[i];
  }
  return NULL;
}

HMM *SetOfHMMs::get_HMM_by_name(const string &name) {
  for (unsigned int i=0; i<HMM_list.size(); ++i) {
    if (HMM_list[i]->getName()==name)
      return HMM_list[i];
  }
  return NULL;
}



bool SetOfHMMs::load_macro_file(string macro_filename) {
  
  // If first char is not / then prepend the hmmpath
  if (macro_filename[0]!='/') {
    macro_filename=hmmpath+"/"+macro_filename;
  }

  if (find(macro_list.begin(), macro_list.end(), macro_filename)!=macro_list.end()) return true;

  FILE *fp=fopen(macro_filename.c_str(), "r");

  if (fp==NULL) return false;

  // READ MACRO FILE
  while (read_macro(fp));
  
  fclose(fp);

  macro_list.push_back(macro_filename);

  return true;
}

// Return the HMM label given the HMM name
string SetOfHMMs::lookup_label(const string &name) const {
  string x;
  map<string, string>::const_iterator mp = name_label_map.find(name);
  if (mp!=name_label_map.end()) {
    x=mp->second;
  }
  return x;
}

// Return the sequence of physical HMM names defining the pronunciation of the grammar unit name
list<string> SetOfHMMs::lookup_pronunciation(const string &name) const {
  list<string> x;
  map<string, list<string> >::const_iterator mp=dictionary.find(name);
  if (mp!=dictionary.end())
    x=mp->second;
  return x;
}

bool SetOfHMMs::read_macro(FILE *fp) {
  char buffer1[MAX_STRING_SIZE+1];
  char buffer2[MAX_STRING_SIZE+1];

  if (fscanf(fp, read_word_format_string, buffer1)!=1) return false;

  if (strcmp(buffer1,"~h")==0 || strcmp(buffer1,"~H")==0) {
    if (fscanf(fp, read_word_format_string, buffer2)!=1) return false;
    
    string name=string(buffer2);
    // Remove quotes
    name.erase(remove(name.begin(), name.end(), '\"'), name.end());
    
    //      HMM *hmmp = new HMM(this, ro_fp(), name, label);
    HMMBuilder hmm_builder(this);
    HMM *hmmp = hmm_builder.getHMMObject(fp, name);
    
    store_HMM(hmmp);
  } else if (strcmp(buffer1,"~s")==0 || strcmp(buffer1,"~S")==0) {
    if (fscanf(fp, read_word_format_string, buffer2)!=1) return false;
    HMMStateBuilder state_builder(this, true);     // last parameter signifies that we are in a macro def
      //    HMMState *sp = new HMMState(this, fp, vec_size, true);   // last parameter signifies that we are in a macro def
    HMMState *sp = state_builder.getHMMStateObject(fp);
    HMM_state_macros[buffer2]=sp;
  } else if (strcmp(buffer1,"~m")==0 || strcmp(buffer1,"~M")==0) {
    if (fscanf(fp, read_word_format_string, buffer2)!=1) return false;
    HMMMixtureBuilder mixture_builder(this, true);     // last parameter signifies that we are in a macro def
    HMMMixture *mp = mixture_builder.getHMMMixtureObject(fp);
    if (mp==NULL) {
      cerr << "Incomplete mixture macro definition for mixture ~m=" << buffer2 << "\n";
      throw(CTKError(__FILE__, __LINE__));
    }
    HMM_mixture_macros[buffer2]=mp;
  } else if (strcmp(buffer1,"~t")==0 || strcmp(buffer1,"~T")==0) {
    char buffer3[MAX_STRING_SIZE+1];
    if (fscanf(fp, read_word_format_string, buffer2)!=1) return false;
    fscanf(fp, read_word_format_string, buffer3); // Discard redundant TransP tag
    HMMTransitionBuilder transition_builder(this, true);     // last parameter signifies that we are in a macro def
    Transitions trans = transition_builder.getHMMTransitionObject(fp);
    HMM_transition_macros[buffer2]=trans;
  } else if (strcmp(buffer1,"~v")==0 || strcmp(buffer1,"~V")==0) {
    if (fscanf(fp, read_word_format_string, buffer2)!=1) return false;
    HMMVarianceBuilder variance_builder(this, true);     // last parameter signifies that we are in a macro def
    vector<HMMFloat> *vp = variance_builder.getHMMVarianceObject(fp);
    HMM_variance_macros[buffer2]=vp;
   
  } else if (strcmp(buffer1,"~o")==0 || strcmp(buffer1,"~O")==0) {
    HMMGlobalBuilder global_builder(this, true);     // last parameter signifies that we are in a macro def
    global_builder.getHMMGlobalObject(fp);
  } else {
    error_in_HMM_file(fp);
    error_status=CTK_FAILURE;
    return false;
  }

  return true;

}

  
void SetOfHMMs::error_in_HMM_file(FILE *fp) const {
  char buffer[MAX_STRING_SIZE+1];
  
  fscanf(fp, read_word_format_string, buffer);
  cerr << "HMM::read_file unknown keyword " << buffer << endl;  
  throw(CTKError(__FILE__, __LINE__));
}

Integer SetOfHMMs::get_num_HMMs() const {
  return HMM_list.size();
}

Integer SetOfHMMs::get_total_num_states() const {
  Integer total_num_states=0;
  for (HMMConstIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp)     
    total_num_states+=(*hmmpp)->get_num_states();
  return total_num_states; 
}

Integer SetOfHMMs::get_vec_size() const {
  if (*HMM_list.begin()==NULL) return 0;
  return (*HMM_list.begin())->get_vec_size();
}

void SetOfHMMs::display(ostream &outfile) {
  outfile << "num_HMMs = " << HMM_list.size() << endl;
  outfile << "num_states = " << get_total_num_states() << " ("<<state_list.size() <<" unique states)" << endl;
  outfile << "vec_size = " << (*HMM_list.begin())->get_vec_size() << endl;
  outfile << "num_dist = " << mixture_list.size() << endl;
}


// Set missing data mask for all mixtures in the pool
void SetOfHMMs::set_missing_data_mask(shared_ptr<MaskVector> mask_vector) {
  
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_missing_data_mask(mask_vector);
  }
}

// Set observed data for all mixtures in the pool
void SetOfHMMs::set_observed_data(shared_ptr<FeatureVector> feature_vector) {
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_observed_data(feature_vector);
  }
}

// Set observed data for all mixtures in the pool
void SetOfHMMs::set_parallel_mixture(shared_ptr<HMMMixture> parallel_mixture) {
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_parallel_mixture(parallel_mixture);
  }
}

void SetOfHMMs::set_use_marginals(bool use_marginals) {
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_use_marginals(use_marginals);
  }
}
void SetOfHMMs::set_use_delta_marginals(bool use_delta_marginals) {
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_use_delta_marginals(use_delta_marginals);
  }
}

void SetOfHMMs::set_use_deltas(bool use_deltas) {
  HMMMixtureIt mix_end=mixture_list.end();
  for (HMMMixtureIt mpp=mixture_list.begin(); mpp!=mix_end; ++mpp) {
    if (*mpp!=NULL)
      (*mpp)->set_use_deltas(use_deltas);
  }
}


void SetOfHMMs::construct_likelihood_vector(vector<Float> &likelihoods) {

  likelihoods.resize(0);
  for (HMMIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp) { 
    HMM *hmmp=*hmmpp;
    int num_states=hmmp->get_num_states();
    for (int s=0; s<num_states; ++s) {
      likelihoods.push_back((*hmmp).get_state(s)->get_prob());
    }
  }
}

void SetOfHMMs::set_likelihoods(const vector<Float> &likelihoods) {

  vector<Float>::const_iterator lp=likelihoods.begin();
  
  for (HMMIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp) { 
    HMM *hmmp=*hmmpp;
    int num_states=hmmp->get_num_states();
    for (int s=0; s<num_states; ++s) {
      (*hmmp).get_state(s)->set_prob(*lp++);
    }
  }
}

// Construct vector of indices of max likelihood mixture for each state of each HMM
void SetOfHMMs::construct_winning_mixture_vector(vector<Float> &winning_mixture) {
  
  for (HMMStateIt spp=state_list.begin(); spp!=state_list.end(); ++spp) 
    (*spp)->store_max_mixture();
  
  winning_mixture.resize(0);
  for (HMMIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp) { 
    HMM *hmmp=*hmmpp;
    int num_states=hmmp->get_num_states();
    for (int s=0; s<num_states; ++s) {
      winning_mixture.push_back((*hmmp).get_state(s)->get_max_mixture());
    }
  }
}

// Sum mixtures for each state employed in the set of HMMs
void SetOfHMMs::calc_prob(bool max_mixtures_flag) {  
  for (HMMStateIt spp=state_list.begin(); spp!=state_list.end(); ++spp) 
    (*spp)->calc_prob(max_mixtures_flag);
}



// Try and work out what sort of HMM file this is:
// Either 1)  An HTK v1.5 MMF file
//        2)  A list of HMM definitions compatible with HTK 1.5
//        3)  An HTK v2.0 and later macro file
//        4)  A list of macro files compatible with HTK 2.0 and later

int SetOfHMMs::detect_HMM_file_type(FILE *ro_fp) {
  char buffer[MAX_STRING_SIZE+1];

  fscanf(ro_fp, read_word_format_string, buffer);
  rewind(ro_fp);
  
  // If the file starts with #!MMF!# then it is an HTK1.5 definition file
  if (strcmp(buffer,"#!MMF!#")==0)
    return HTK15_DEF_FILE;

  string token(buffer);
  make_upper(token);
  
  // If the file starts with ~X or BEGINHMM then it is probably a definition file
  if ((buffer[0]=='~' && strlen(buffer)==2) || token=="<BEGINHMM>")
    return HTK20_DEF_FILE;
  

  // Is the first word a valid filename?
  string hmmfullfilename=expand_environment_variables(buffer);
  FILE *fp=fopen(hmmfullfilename.c_str(), "r");

  if (fp==NULL)
    return HTK_LIST_FILE_ERROR;


  // This is a file list, but what type? HTK1.5 (a list of anonymous HMM defs)
  //   HTK2.0 (a list of general macro files)
  // Look at the contents of the first file listed to find out...
  bool macro_def=false;
  bool hmm_def=false;
  
  fscanf(fp, read_word_format_string, buffer);
  if (buffer[0]=='~')
    macro_def=true;
  
  string token2(buffer);
  make_upper(token2);
  if (token2=="<BEGINHMM>")
    hmm_def=true;
  fclose(fp);

  // File started with a macro definition - therefore this is HTK2.0 or later
  if (macro_def==true)
    return HTK20_LIST_FILE;
  
  // File started with an HMM definition (with no ~h) - therefore this is HTK1.5 or later
  if (hmm_def==true)
    return HTK15_LIST_FILE;

  return HTK20_LIST_FILE;  // File is dodgy
  
}


// Apply a transformation to a set of HMMs
void SetOfHMMs::edit_HMMs(const HMMEdit *edit) {
  vector<HMM *> new_HMM_list;
  for (HMMIt hmmpp=HMM_list.begin(); hmmpp!=HMM_list.end(); ++hmmpp) {
    string name = (*hmmpp)->getName();
    cerr << "NAME: " << name << "\n";
    new_HMM_list.push_back(edit->edit(**hmmpp));
  }

  // Replace the HMM list - taking care of underlying state and mixture lists
  replace_HMM_list(new_HMM_list);

  delete edit;
}




void SetOfHMMs::replace_HMM_list(vector<HMM *> &new_HMM_list) {
 // Replaces the HMM list - taking care of underlying state and mixture lists
  
  vector<HMMState *>new_state_list;
  vector<HMMMixture *>new_mixture_list;

  // Construct new state and mixture lists from the new HMM list
  for (HMMConstIt hmmpp=new_HMM_list.begin(); hmmpp!=new_HMM_list.end(); ++hmmpp) {
    for (int i=0; i<(*hmmpp)->get_num_states(); ++i) {
      HMMState *state=(*hmmpp)->get_state(i);
      new_state_list.push_back(state);
      for (int j=0; j<state->get_num_mixes(); ++j) {
	HMMMixture *mix=state->get_mixture(j);
	if (mix!=NULL)
	  new_mixture_list.push_back(mix);
      }
    }
  }

  // Make sure that there are no duplicates
  remove_duplicates(new_state_list);
  remove_duplicates(new_mixture_list);

  // Replace old lists with new lists - delete any things in the old lists that are no longer used
  //  cerr << "Replacing mixture list\n";
  replace_list(mixture_list, new_mixture_list);
  //  cerr << "Replacing state list\n";
  //  cerr << state_list.size() << " " << new_state_list.size() << "\n";
  replace_list(state_list, new_state_list);
  //  cerr << "Replacing HMM list\n";
  replace_list(HMM_list, new_HMM_list);
}


/******************************************************************************/
// Non class methods

// ----- Various distance metrics -----

// Return the Kullback-Leiber distance between two mixture components
float distance_KL(const HMMMixture &mix1, const HMMMixture &mix2) {

  const vector<HMMFloat> &mu1=mix1.get_mu();
  const vector<HMMFloat> &mu2=mix2.get_mu();
  const vector<HMMFloat> &ivar1=mix1.get_ivar();
  const vector<HMMFloat> &ivar2=mix2.get_ivar();

  float distance=0.0;
  for (vector<HMMFloat>::const_iterator mu1p=mu1.begin(), mu2p=mu2.begin(), mu1p_end=mu1.end(), ivar1p=ivar1.begin(), ivar2p=ivar2.begin(); mu1p!=mu1p_end; ++mu1p, ++mu2p, ++ivar1p, ++ivar2p) {
    float diff=*mu1p-*mu2p;
    float ivar = *ivar1p + *ivar2p;
    distance+=(diff * diff * ivar) + *ivar1p/ *ivar2p + *ivar2p/ *ivar1p - 2;
  }
  distance=distance/2.0;
  
  return distance;
  
}

// Return the Bhattacharyya distance between two mixture components
float distance_BHA(const HMMMixture &mix1, const HMMMixture &mix2) {

  const vector<HMMFloat> &mu1=mix1.get_mu();
  const vector<HMMFloat> &mu2=mix2.get_mu();
  const vector<HMMFloat> &ivar1=mix1.get_ivar();
  const vector<HMMFloat> &ivar2=mix2.get_ivar();

  double distance=0.0;
  double logdetvarsum=0.0, logdetvarprod=0.0;
  for (vector<HMMFloat>::const_iterator mu1p=mu1.begin(), mu2p=mu2.begin(), mu1p_end=mu1.end(), ivar1p=ivar1.begin(), ivar2p=ivar2.begin(); mu1p!=mu1p_end; ++mu1p, ++mu2p, ++ivar1p, ++ivar2p) {
    double diff=*mu1p-*mu2p;
    double varsum = 1.0/ *ivar1p + 1.0/ *ivar2p;
    double varprod = 1.0/(*ivar1p * *ivar2p);
    logdetvarsum+=log(varsum);
    logdetvarprod+=log(varprod);
    distance+=(diff * diff * 1.0/varsum);
  }

  distance=distance/4.0+0.5*(logdetvarsum-mu1.size()*log(2.0)-0.5*logdetvarprod);

  
  return distance;

}

// Return the Mahalanobis distance between two mixture components
float distance_MAH(const HMMMixture &mix1, const HMMMixture &mix2) {

  const vector<HMMFloat> &mu1=mix1.get_mu();
  const vector<HMMFloat> &mu2=mix2.get_mu();
  const vector<HMMFloat> &ivar1=mix1.get_ivar();
  const vector<HMMFloat> &ivar2=mix2.get_ivar();

  float distance=0.0;
  for (vector<HMMFloat>::const_iterator mu1p=mu1.begin(), mu2p=mu2.begin(), mu1p_end=mu1.end(), ivar1p=ivar1.begin(), ivar2p=ivar2.begin(); mu1p!=mu1p_end; ++mu1p, ++mu2p, ++ivar1p, ++ivar2p) {
    float diff=*mu1p-*mu2p;
    distance+=(diff * diff * *ivar1p * *ivar2p);
  }
  distance=distance/mu1.size();
  
  return distance;
}

// Return the Euclidian distance between two mixture components - assumes they have the same size
float distance_EUC(const HMMMixture &mix1, const HMMMixture &mix2) {

  const vector<HMMFloat> &mu1=mix1.get_mu();
  const vector<HMMFloat> &mu2=mix2.get_mu();

  float distance=0.0;
  for (vector<HMMFloat>::const_iterator mu1p=mu1.begin(), mu2p=mu2.begin(), mu1p_end=mu1.end(); mu1p!=mu1p_end; ++mu1p, ++mu2p) {
    float diff=*mu1p-*mu2p;
    distance+=(diff*diff);
  }
  
  return distance;
}

//
// 
//
//

/* End of ctk_HMM.cpp */
