/******************************************************************************/
/*									      */
/*	ctk_HMM_decoder.cpp	 	       			              */
/*									      */
/*	Block for HMM decoding                                                */
/*									      */
/*	Author: Jon Barker, Sheffield University			      */
/*									      */
/*      CTK VERSION 1.3.5  Apr 22, 2007		         	      */
/*									      */
/******************************************************************************/
 
#include "ctk-config.h"

#include <cmath>
#include <vector>
#include <algorithm>
#include <numeric>
#include <fstream>
#include <sstream>

#include "ctk_local.hh"
#include "ctk_error.hh"

#include "ctk_dsp.hh"

#include "ctk_function_classes.hh"
#include "ctk_param.hh"
#include "ctk_socket.hh"
#include "ctk_data_descriptor.hh"

#include "ctk_reco.hh"
#include "ctk_decoder.hh"
#include "ctk_HMM_decoder.hh"
#include "ctk_feature_vector.hh"


const Boolean PARAM_DEFAULT_DISPLAY_GROUPS          = false;    // If true shown plot of the group mask after decoding
const char *PARAM_DEFAULT_GRAMMAR_FORMAT           = "EBNF";   // Default format for the grammar file 
const char *PARAM_DEFAULT_MASK_OUTPUT_FILENAME = "\0";   // Default for output mask file name (empty - i.e. don't output anything)

static const char *VALID_GRAMMAR_FORMATS[] = {"SLF",           // Standard Lattice Format  - i.e. HTK v2.0 onwards
					      "EBNF",          // Extended Backus-Naur Form Grammar - i.e. HTK v1.5
					      "\0"};  

/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: HMMDecoderBlock                                          */
/*                                                                            */
/******************************************************************************/

const int PARTIAL_TRACEBACK_PERIOD = 40;      // Number of frames to process between successive partial tracebacks


HMMDecoderBlock::HMMDecoderBlock(const string &a_name, const string &a_type, bool performs_probability):CTKObject(a_name),Block(a_name, a_type) {

  decoder_performs_probability_calculation=performs_probability;
  
  // Must initialise decoder and HMM pointers to NULL
  decoder=NULL;
  hmms=NULL;

  logfile=NULL;
  logfile2=NULL;
  timitfile=NULL;
  
  // ----- Set up probability calculation parameters -----

  if (decoder_performs_probability_calculation) {
    
    // Set up MAX_APPROX parameter
    max_mixtures_param = new ParamBool("MAX_APPROX",0);
    max_mixtures_param->set_helptext("If set ON then the sum of the probabilities of the Gaussian mixtures is approximated by the probability of the most probable mixture.<p> This is usually a good approximation and can speed up recognition performace without adversely effecting recognition performance.");
    parameters->register_parameter(max_mixtures_param);
    
    // Set up HAS_DELTAS parameter
    has_deltas_param = new ParamBool("HAS_DELTAS",0);
    has_deltas_param->set_helptext("If set ON then the input feature vector is split into two halves and the 2nd half (i.e. the half with larger indices) is consider to represent the delta features derived from the lower half. <p> It is important to set this parameter correctly when using decoders that treat delta features and non-deltas features differently.");
    parameters->register_parameter(has_deltas_param);
    
    // Set up USE_DELTAS parameter
    use_deltas_param = new ParamBool("USE_DELTAS");
    use_deltas_param->set_helptext("If set ON then the decoder will employ the delta features, else they will be ignored.<p> This parameter will default to ON if HAS_DELTAS is ON i.e. it is only necessary to set it explicitly if the representation has deltas but you want to ignore them.");
    parameters->register_parameter(use_deltas_param);
  }
  
  // ----- Set up HMM decoder parameters -----

  // Set up LOG_FILE parameter
  log_file_param = new ParamString("LOG_FILE");
  log_file_param->set_helptext("The name of the file to which to write hypothesised transcription. <p> If this is left unset or is set to an empty string then results will be written to the terminal.");
  parameters->register_parameter(log_file_param);

  // Set up LOG_FILE_2 parameter
  log_file2_param = new ParamString("LOG_FILE_2");
  log_file2_param->set_helptext("A second file to which more detailed information may be written. i.e. word boundaries, state alignments etc. <p> If this is left unset or is set to an empty string then results will be written to the terminal.");
  parameters->register_parameter(log_file2_param);

  // Set up TIMIT_OUTPUT_FILE parameter
  timitfile_param = new ParamString("TIMIT_OUTPUT_FILE");
  timitfile_param->set_helptext("A file for recording the recognition hypothesis in TIMIT format.");
  parameters->register_parameter(timitfile_param);
  
  // Set up SAMPLES_PER_FRAME parameter
  samples_per_frame_param = new ParamInt("SAMPLES_PER_FRAME",1);
  samples_per_frame_param->set_helptext("Number of signal samples corresponding to a single feature frame. This only needs to be set if a correct sample-aligned TIMIT output transcription is required.");
  samples_per_frame_param->install_validator(new Validator(Validator::VLOWER, 1.0));
  parameters->register_parameter(samples_per_frame_param);

  // Set up WORD_PENALTY parameter
  word_creation_penalty_param = new ParamFloat("WORD_PENALTY",0.0);
  word_creation_penalty_param->set_helptext("Sets a penalty imposed on tokens every time they reach a word boundary. <p> Setting this greater than 0.0 reduces Insertions and increases Deletions. <p> This provides a simple way of tuning a system that has a large Insertion/Deletion imbalance.");
  parameters->register_parameter(word_creation_penalty_param);


  // Set up PRUNING_BEAMWIDTH parameter
  pruning_beamwidth_param = new ParamFloat("PRUNING_BEAMWIDTH",0.0);
  pruning_beamwidth_param->set_helptext("Sets the size of the pruning beam width in unit of logprobability. <p> Note, setting the width to 0 (the default value) will turn pruning OFF<p>");
  pruning_beamwidth_param->install_validator(new Validator(Validator::VLOWER, 0.0));
  parameters->register_parameter(pruning_beamwidth_param);

// Set up HMM_FILE parameter
  hmm_file_param = new ParamString("HMM_FILE");
  hmm_file_param->set_helptext("The name of the file where the list of HMM description files is stored.");
  parameters->register_parameter(hmm_file_param);

  // Set up NOISE_HMM_FILE parameter
  noise_hmm_file_param = new ParamString("NOISE_HMM_FILE");
  noise_hmm_file_param->set_helptext("The name of the file where the list of noise HMM description files is stored.");
  parameters->register_parameter(noise_hmm_file_param, CTK_PARAM_TYPE_HIDDEN);

  // Set up GRAMMAR_FORMAT parameter
  grammar_format_param = new ParamEnumerated("GRAMMAR_FORMAT", VALID_GRAMMAR_FORMATS, PARAM_DEFAULT_GRAMMAR_FORMAT);
  grammar_format_param->set_helptext("Format of the grammar file. Either Standard Lattice File (SLF) or Extended Backus-Naur Form (EBNF).");
  parameters->register_parameter(grammar_format_param);

  // Set up GRAMMAR_FILE parameter
  grammar_filename_param = new ParamString("GRAMMAR_FILE");
  grammar_filename_param->set_helptext("The name of the file where the grammar is stored.");
  parameters->register_parameter(grammar_filename_param);

  // Set up NOISE_GRAMMAR_FILE parameter
  noise_grammar_filename_param = new ParamString("NOISE_GRAMMAR_FILE");
  noise_grammar_filename_param->set_helptext("The name of the file where the noise grammar is stored.");
  parameters->register_parameter(noise_grammar_filename_param, CTK_PARAM_TYPE_HIDDEN);

  // Set up LABEL_FILE parameter
  label_file_param = new ParamString("LABEL_FILE");
  label_file_param->set_helptext("A string pointing to the label file which associates model names with model labels");
  parameters->register_parameter(label_file_param);


  // Set up DICTIONARY parameter
  dictionary_param = new ParamString("DICTIONARY");
  dictionary_param->set_helptext("A string pointing to the pronunciation dictionary file");
  parameters->register_parameter(dictionary_param);

// Set up FIRST_TOKEN paramter
  first_token_param = new ParamString("FIRST_TOKEN");
  first_token_param->set_helptext("If this parameter is set to the label of an HMM then the decoding if forced to start with this HMM."); 
  parameters->register_parameter(first_token_param);

  // Set up FINAL TOKEN parameter
  final_token_param = new ParamString("FINAL_TOKEN");
  final_token_param->set_helptext("If this parameter is set to the label of an HMM then the decoding if forced to end with this HMM.");
  parameters->register_parameter(final_token_param);

  // Set up TRANSCRIPTION parameter
  transcription_param = new ParamString("TRANSCRIPTION","\0");
  transcription_param->set_helptext("A string composed of the sequence of HMM character labels representing the correct decoding for the input sequence being decoded. <p> This parameter only needs to be set if the user wishes to calculate recognition performance statistics on the recognition output.");
  parameters->register_parameter(transcription_param);

  // Set up SILENCE parameter
  silence_param = new ParamString("SILENCE","\0");
  silence_param->set_helptext("A string composed of a space seprated list of HMM output labels for all HMMs consider to be silence for the purposes of recognition statistics. <p> These labels are removed from both the reference transcription and the recognition hypothesis before scoring is performed.");
  parameters->register_parameter(silence_param);

  // Set up NBEST parameter
  num_hyp_param = new ParamInt("NBEST",1);
  num_hyp_param->set_helptext("The N-best number of hypotheses to output. <p> By default only the 1-best is displayed. <p> The N-best list is calculated using a fast but approximate word-lattice technique.");
  num_hyp_param->install_validator(new Validator(Validator::VLOWER, 1.0));
  parameters->register_parameter(num_hyp_param);

  // Set up STATE_PATH parameter
  state_path_param = new ParamBool("STATE_PATH",0);
  state_path_param->set_helptext("Record state path through models - this requires a small computational overhead and should be turned off in not required.");
  parameters->register_parameter(state_path_param);

  // Set up USE_POSTERIORS parameter
  use_posteriors_param = new ParamBool("USE_POSTERIORS",0);
  use_posteriors_param->set_helptext("If set ON then segregation hypotheses will be merged based on a 'posterior probability' normalisation, else the older mean-likelihood normalisation will be used.");
  parameters->register_parameter(use_posteriors_param, CTK_PARAM_TYPE_HIDDEN);

  // Set up HYPOTHESIS_FILTER parameter
  hypothesis_filter_param = new ParamString("HYPOTHESIS_FILTER",0);
  hypothesis_filter_param->set_helptext("A regular expression that can be supplied to filter decoding hypotheses from an N-best list. i.e. the decoder will  search down the N-best list for the highest scoring decoding whos labels are not matched by the filter. If no valid decodings are found in the top 50-best, then the top result is returned regardless of the filter.");
  parameters->register_parameter(hypothesis_filter_param);

  // Set up OUTPUT_CONFUSIONS parameter
  output_confusions_param = new ParamBool("OUTPUT_CONFUSIONS", 0);
  output_confusions_param->set_helptext("If set ON the the confusion matrix will be written to the logfile after all utterances have been decoded.");
  parameters->register_parameter(output_confusions_param);

  // Set up DUMP_PARAMETERS parameter
  dump_parameters_param = new ParamBool("DUMP_PARAMETERS", 0);
  dump_parameters_param->set_helptext("If set ON then the value of all the decoder parameters will be appended to the end of the log file.");
  parameters->register_parameter(dump_parameters_param);

  // Set up DISPLAY_GROUPS parameter
  display_groups_param = new ParamBool("DISPLAY_GROUPS", PARAM_DEFAULT_DISPLAY_GROUPS);
  display_groups_param->set_helptext("If set ON then the mask employed to produce the winning hypothesis will be displayed.");
  parameters->register_parameter(display_groups_param, CTK_PARAM_TYPE_HIDDEN);

  // Set up MASK_OUTPUT_FILENAME parameter
  mask_output_filename_param = new ParamString("MASK_OUTPUT_FILENAME", PARAM_DEFAULT_MASK_OUTPUT_FILENAME);
  mask_output_filename_param->set_helptext("If set ON then the mask employed to produce the winning hypothesis will be displayed.");
  parameters->register_parameter(mask_output_filename_param, CTK_PARAM_TYPE_HIDDEN);

  hmms=NULL;
  grammar="\0";
  noise_grammar="\0";
  
  overall_stats = new RecoStats();

}

HMMDecoderBlock::~HMMDecoderBlock() {
  // if (file_table_delete_entry(filename)==0) {
  //   if (hmms!=NULL) delete hmms;
  // }
  

  delete overall_stats;
  
}


// Load and build the HMMs stored in hmm_file
SetOfHMMs *HMMDecoderBlock::load_hmms(ParamString *hmm_file_param) {
  map<string, string> name_label_map;
  map<string, list<string> > dictionary;

  if (label_file_param->get_set_flag())
    read_hmm_label_file(label_file_param->get_value(), name_label_map);
  if (dictionary_param->get_set_flag())
    read_dictionary(dictionary_param->get_value(), dictionary);
  return (new SetOfHMMs(hmm_file_param->get_value(), getname(), name_label_map, dictionary, get_HMM_mixture_prototype()));

}

void HMMDecoderBlock::reset() {

  Block::reset();

  group_buffer.resize(0);
  mask_buffer.resize(0);
  display_groups=display_groups_param->get_value();
  mask_output_filename=mask_output_filename_param->get_value();
  
#ifdef _HAS_MATLAB
  if (display_groups_param->get_value())  // Only start up MATLAB if the display parameter is set
    matlab_startup();
#endif
  
  bool new_models_loaded=false;

  if (ReadOnceFile::interactive_mode_==false) {
    // If batch mode 
    // Retrieve hmms from cache if they exist else load them from the file
    if ((hmms=(SetOfHMMs*)file_table_fetch_entry_ignoring_timestamp(hmm_file_param->get_value(), getname()+":signal"))==NULL) {
      hmms=load_hmms(hmm_file_param);
      // Note that file is stored with a block specific name so that ctk blocks can have personal copies
      file_table_add_entry(hmms, hmm_file_param->get_value(), getname()+":signal");
      new_models_loaded=true;
    }
    
    // Retrieve noise hmms from cache if they exist else load them from the file
    if (uses_noise_models()) {
      if ((noise_hmms=(SetOfHMMs*)file_table_fetch_entry_ignoring_timestamp(noise_hmm_file_param->get_value(), getname()+":noise"))==NULL) {
	noise_hmms=load_hmms(noise_hmm_file_param);
	// Note that file is stored with a block specific name so that ctk blocks can have personal copies
	file_table_add_entry(noise_hmms, noise_hmm_file_param->get_value(), getname()+":noise");
	new_models_loaded=true;
      }
    }
  }else {
    // If interactive mode 
    hmms=load_hmms(hmm_file_param);
    if (uses_noise_models()) {
      noise_hmms=load_hmms(noise_hmm_file_param);
    }
    new_models_loaded=true;
  }

  
  if (hmms->get_error_status()==CTK_FAILURE) {
    cerr << "Error reading HMM definitions\n";
    throw(CTKError(__FILE__, __LINE__));
  }
    
  if ((first_token_param->get_set_flag()||final_token_param->get_set_flag()) && (grammar_filename_param->get_set_flag())) {
    cerr << "Warning: Use of the GRAMMAR_FILE parameter overides the use of FIRST_TOKEN and FINAL_TOKEN parameters." << endl;
  }
    
  word_creation_penalty=word_creation_penalty_param->get_value();
  num_hyp=num_hyp_param->get_value();

  // Set up hmm for probability calculation
  if (decoder_performs_probability_calculation) {
    max_mixtures=max_mixtures_param->get_value();
    has_deltas=has_deltas_param->get_value();
    
    if (use_deltas_param->get_set_flag()==0) {
      // If use_deltas not set explicitily then just use them if they are there
      use_deltas=has_deltas;
    } else {
      use_deltas=use_deltas_param->get_value();
      // Can't use deltas if they are not there!
      if (use_deltas && !has_deltas) {
	cerr << "Inconsistent decoder parameter settings:\n";
	cerr << "HMMDecoderBlock: Cannot have USE_DELTAS=TRUE and HAS_DELTAS=FALSE!" << endl;
	throw(CTKError(__FILE__, __LINE__));
      }
      if (!use_deltas && use_delta_bounds) {
	cerr << "Inconsistent decoder parameter settings:\n";
	cerr << "If USE_DELTA_BOUNDS = TRUE, the USE_DELTAS must be set TRUE also.\n";
	throw(CTKError(__FILE__, __LINE__));
      }
      
    }

    hmms->set_use_deltas(use_deltas);
    hmms->set_use_marginals(use_bounds);
    hmms->set_use_delta_marginals(use_delta_bounds);

    if (noise_hmms!=NULL) {
      noise_hmms->set_use_deltas(use_deltas);
      noise_hmms->set_use_marginals(use_bounds);
      noise_hmms->set_use_delta_marginals(use_delta_bounds);
    }

  }

  
  use_posteriors=use_posteriors_param->get_value();

  if (hypothesis_filter_param->get_set_flag()) {
    decoder->setRequiresNBestTraceInfo(true);   // Make sure decoder stores NBest traceback info    
    filter=compile_regular_expression(hypothesis_filter_param->get_value());
  } else
    filter=0;

  // Only record group info if it is require for generating a mask.
  Decoder::setRequiresGroupInfo(display_groups || mask_output_filename.size()>0);

  if (new_models_loaded) {
    hmms->display(cerr);
    if (noise_hmms!=NULL) noise_hmms->display(cerr);
  }
  
  // Build a new decoder if new HMMs have just been loaded or if the grammar has changed
  if (noise_hmms==NULL) {
    // Simple signal HMM
    if (new_models_loaded ||
	(grammar_filename_param->get_set_flag() && grammar_filename_param->get_value()!=grammar)) {
      delete decoder;
      decoder=buildDecoder(hmms, grammar_filename_param);   // Build the decoder network
    }
  } else {
    // Product Signal/Noise HMM
    if (new_models_loaded ||
	// Build a new decoder if new HMMs have just been loaded or if one of the grammars has changed
	(grammar_filename_param->get_set_flag() && grammar_filename_param->get_value()!=grammar) ||
	(noise_grammar_filename_param->get_set_flag() && noise_grammar_filename_param->get_value()!=noise_grammar) 

	) {
      Decoder *decoder1=buildDecoder(hmms, grammar_filename_param);   // Build the decoder network
      Decoder *decoder2=buildDecoder(noise_hmms, noise_grammar_filename_param);   // Build the decoder network
      delete decoder;
      decoder = new Decoder(*decoder1, *decoder2);
    }
  }
  
  
  if (grammar_filename_param->get_set_flag())
    grammar=grammar_filename_param->get_value();
  if (noise_grammar_filename_param->get_set_flag())
    noise_grammar=noise_grammar_filename_param->get_value();
    

  resetDecoder(state_path_param->get_value(), num_hyp);     // Put the decoder network in a good initial state


  Integer nfeatures=hmms->get_vec_size();

  Integer nfeatures_supplied=(*input_sockets)[0]->get_data_descriptor()->get_storage();
  int nfeatures_needed = (has_deltas && !use_deltas)?nfeatures/2:nfeatures;
  if (nfeatures_needed>nfeatures_supplied) {
    cerr << "HMMs require input data with dimensionality " << nfeatures_needed << ". Data supplied only has dimensionality of " << nfeatures_supplied << "." << endl;
    throw (CTKError(__FILE__, __LINE__));
  }
  
  logfile=open_logfile(log_file_param, stdout);   // use 'stdout' if log_file_param is not set

  if (log_file2_param->get_set_flag() && log_file2_param->get_value().size()!=0) {
    // Test to see if file already exists
    bool file_exists = test_file_exists(log_file2_param->get_value());
      
    logfile2=open_logfile(log_file2_param, NULL);  

    // If file didn't exist before then it is being opened for the first time and we must write the xml header
    if (!file_exists) {
      fprintf(logfile2,"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n");
      fprintf(logfile2,"<!DOCTYPE asr_results SYSTEM \"asr_results.dtd\">\n\n");

      // Print the details for the decoder settings
      fprintf(logfile2,"<asr_results>\n\n");

      print_decoder_details_XML(logfile2);

    }
  }
  
  // Default timit file is stdout for the 'decoder only' decoders and
  // no output for the older 'prob_calc + decoding' decoders
  // (This wrinkle maintains backward compatiblility with older scripts)
  FILE* default_timitfile=decoder_performs_probability_calculation?NULL:stdout;
  timitfile=open_logfile(timitfile_param, default_timitfile);
  
}

// Test for file existence. 
// Return true if file there exists a file called 'name', else return false.
bool HMMDecoderBlock::test_file_exists(const string &name) const {
  ofstream file(name.c_str(), ios::out|ios::in);
  if (!file)
    return false;
  else
    return true;
}

void HMMDecoderBlock::print_decoder_details_XML(FILE *fp) {
  fprintf(fp, "<CTK version=\"%s\">\n",CTK_VERSION_STRING);
  fprintf(fp, "<decoder type=\"%s\">\n",get_blocktype().c_str());
  fprintf_parameters_XML(fp);
  fprintf(fp, "</decoder>\n\n");
  fprintf(fp, "</CTK>\n\n");

}

regex_t *HMMDecoderBlock::compile_regular_expression(const string &regex) const {

  regex_t *compiled_regex = new regex_t;
  
  int err_no=0; 
  if((err_no=regcomp(compiled_regex, regex.c_str(), REG_EXTENDED))!=0) {
    size_t length = regerror (err_no, compiled_regex, NULL, 0);
    char *buffer = (char*)malloc(length);
    regerror(err_no, compiled_regex, buffer, length);
    cerr << buffer << endl; /* Print the error */
    free(buffer);
    regfree(compiled_regex);
    delete compiled_regex;
    throw(CTKError(__FILE__, __LINE__));
  }

  return compiled_regex;
}

bool HMMDecoderBlock::uses_noise_models() const {
  return (noise_hmm_file_param->get_set_flag());
}

Decoder *HMMDecoderBlock::buildDecoder(SetOfHMMs *hmms, ParamString *grammar_filename_param) {
  // This is done only once - after the HMMs are loaded

  if (grammar_filename_param->get_set_flag()) {
    // General FSG-file decoder constructor
    return (new Decoder(hmms, word_creation_penalty_param->get_value(), grammar_filename_param->get_value(), grammar_format_param->get_value()));
  } else {
    // Default loop grammar decoder constructor
    const HMM *first_HMM=NULL, *final_HMM=NULL;
    string first_token_name, final_token_name;
    if (first_token_param->get_set_flag()) {
      first_HMM=hmms->get_HMM_by_name(first_token_name=first_token_param->get_value());
      if (first_HMM==NULL) {
	cerr << "FIRST_TOKEN has an invalid name. No HMM found named: " << first_token_param->get_value() << ".\n";
	throw(CTKError(__FILE__, __LINE__));
      }
    }
    if (final_token_param->get_set_flag()) {
      final_HMM=hmms->get_HMM_by_name(final_token_name=final_token_param->get_value());
      if (final_HMM==NULL) {
	cerr << "FINAL_TOKEN has an invalid name. No HMM found named:" << final_token_param->get_value() << " .\n";
	throw(CTKError(__FILE__, __LINE__));
      }
    }
    return (new Decoder(hmms, first_token_name, final_token_name, word_creation_penalty_param->get_value()));
  }

  //  cerr << "DECODER\n" << *decoder << "\n";
}

void HMMDecoderBlock::resetDecoder(bool state_path_recording, int num_hyp) {
  // Put the decoder back in a good initial state
  if (decoder==NULL) {
    cerr << "Internal error! No decoder present!" << endl;
    exit(-1);
  }

  decoder->reset();
  decoder->activateStatePathRecording(state_path_recording);
  decoder->setNBestListSize(num_hyp);
  decoder->setPruningBeamwidth(pruning_beamwidth_param->get_value());
}


FILE *HMMDecoderBlock::open_logfile(ParamString *file_param, FILE *default_file/*=stdout*/) {

  FILE *a_file=default_file;   // Default log file - i.e. will be used if file_param is not set.
  
  // Try to open a log file if log_file_param is set and contains a string of length>0
  if (file_param->get_set_flag()) {
    string a_filename=file_param->get_value();
    if (a_filename.size()>0) {
      // Reopen or create log file in append mode
      if ((a_file=fopen((file_param->get_value()).c_str(), "a+"))==NULL) {
	cerr << "Cannot open log file: " << file_param->get_value() << endl;
	throw(CTKError(__FILE__, __LINE__));
      }
    }  
  }

  return a_file;
}

void HMMDecoderBlock::close() {

  // Retrieve the N best hypotheses
  CTKStatus back_trace_status=decoder->backTrace(hypotheses, 0.0);

  if (back_trace_status==CTK_SUCCESS) {
    // Optional filtering applied to winning hypothesis
    if (hypothesis_filter_param->get_set_flag()) {
      if (hypotheses[0]->matches_filter(filter)) {
	//      cerr << "rescoring hypotheses" << endl;
	hypotheses[0]=decoder->get_compliant_hyp(filter);
      }
      regfree(filter); /* Free the regular expression data structure */
      delete filter;
    }
  
    // Display groups used -e.g. for Multisource decoding  
#ifdef _HAS_MATLAB
    if (display_groups) 
      display_group_mask();
#endif
    if (mask_output_filename.size()>0)
      save_group_mask(mask_output_filename);
    
  }
  
  list<string> silences;
  istringstream isstr(silence_param->get_value());
  string word;
  while (isstr>>word) silences.push_back(word);

  // Get winning hypothesis ... if there is one.
  list<string> hyp;
  if (back_trace_status==CTK_SUCCESS)
    hyp=strip_silence(hypotheses[0]->get_solution_raw(), silences);
  string hyp_string = list_string_to_string(hyp);
  
  fprintf(logfile, "\r%s         \t", hyp_string.c_str());
  
  // Record correct utterance
  list<string> transcription_strings=string_to_list_string(transcription_param->get_value());
  list<string> correct=strip_silence(transcription_strings,silences);
  string correct_string = list_string_to_string(correct);
  fprintf(logfile, "%s\t", correct_string.c_str());

  // Record detailed info in 2nd log file
  if (logfile2!=NULL) {
    fprintf(logfile2,"<utterance>\n");
    fprintf(logfile2,"<target>");
    fprintf(logfile2, "%s", correct_string.c_str());
    fprintf(logfile2,"</target>\n");
    if (back_trace_status==CTK_SUCCESS) {
      // Record Nbest hypotheses
      for (UInteger i=0; i<hypotheses.size(); ++i) {
	fprintf(logfile2,"<decoding>\n");
	fprintf(logfile2,"<nbest> %d </nbest>\n",i+1);
	write_hypothesis(hypotheses[i], logfile2);
	fprintf(logfile2,"</decoding>\n");
      }
    } else {
      fprintf(logfile2,"<decoding>\n DECODING FAILED - NO SURVIVING TOKENS!\n</decoding>\n");
    }
    fprintf(logfile2,"</utterance>\n");
    fprintf(logfile2,"\n");
  }

  // write hypothesis in TIMIT format 
  if (timitfile!=NULL)
    hypotheses[0]->write_TIMIT_format(timitfile, samples_per_frame_param->get_value());
  
  // Calculate and record per utterance recognition stats
  vector<string> labelv=hmms->get_label_list();
  list<string> label_list;
  copy(labelv.begin(), labelv.end(), back_inserter(label_list));
  list<string> labels=strip_silence(label_list, silences);
  RecoStats *stats = new RecoStats(labels);

  list<string> hyp_source;
  if (back_trace_status==CTK_SUCCESS)
    hyp_source=strip_silence(hypotheses[0]->get_solution_source(), silences);
  stats->calc_stats(hyp_source, correct);
  stats->print_stats(logfile);
  fprintf(logfile, "\t");

  // Calcultate and record running stats
  *overall_stats += *stats;
  overall_stats->print_stats(logfile);
  fprintf(logfile, "  \n");
  
  
  // Delete stats and hypotheses
  delete stats;
  for (UInteger i=0; i<hypotheses.size(); ++i)
    delete hypotheses[i];
  hypotheses.resize(0);

  
  // Close log files
  if (logfile!=stderr && logfile!=stdout && logfile!=NULL)
    fclose(logfile);
  
  if (logfile2!=stderr && logfile2!=stdout && logfile2!=NULL) {
    fclose(logfile2);
  }

  if (timitfile!=stderr && timitfile!=stdout && timitfile!=NULL) {
    fclose(timitfile);
  }

  // Call parent close
  Block::close();
      
}


void HMMDecoderBlock::close2() {

  decoder->cleanUp(); 
 
  // Call parent close2
  Block::close2();
}

void HMMDecoderBlock::close_final() {
  
  logfile=open_logfile(log_file_param);
  
  if (output_confusions_param->get_value()) {
    fprintf(logfile, "\n");
    overall_stats->print_confusions(logfile);
  }
  
  if (logfile!=stderr && logfile!=stdout)
    fclose(logfile);

  // Write parameter to log file
  if (dump_parameters_param->get_value()) {
    bool dumped=false;
    if (log_file_param->get_set_flag()) {
      string a_filename=log_file_param->get_value();
      if (a_filename.size()>0) {	ofstream *ostr=new ofstream();
	ostr->open(a_filename.c_str(), ios::app);
	display_parameters(*ostr, "\0");
	ostr->close();
	delete ostr;
	dumped=true;
      }
    }
    if (!dumped)
	display_parameters(cout, "\0");

  }

  
  logfile2=open_logfile(log_file2_param, NULL);   // no log_file2 output if log_file2_param is not set
  if (logfile2!=NULL) {
    fprintf(logfile2, "</asr_results>\n");
    fclose(logfile2);
  }
      
  // Remove the decoder
  delete decoder;
  decoder=NULL;
  
  Block::close_final();
  
}


void HMMDecoderBlock::write_hypothesis(RecoHypothesis *hyp, FILE *file) {
  hyp->write_all(file);
}

void HMMDecoderBlock::set_use_bounds(Boolean x) {
  use_bounds=x;
}

void HMMDecoderBlock::set_use_delta_bounds(Boolean x) {
  use_delta_bounds=x;
}

void HMMDecoderBlock::process_frame(const CTKVector &frame, CTKVector *fmask/*=NULL*/, CTKVector *nmask/*=NULL*/, CTKVector *lower_bounds/*=NULL*/, CTKVector *upper_bounds/*=NULL*/, bool use_marginal_normalisation/*=true*/) {

  if (decoder_performs_probability_calculation) {
    // Calculate probabilities given the feature data
    
    shared_ptr<FeatureVector> feature_vector(new FeatureVector(frame, lower_bounds, upper_bounds, has_deltas));
    feature_vector->set_marginal_normalisation(use_marginal_normalisation);
    
    hmms->set_observed_data(feature_vector);

    if (fmask!=NULL) {
      shared_ptr<MaskVector> mask_vector(new MaskVector(*fmask, has_deltas));
      hmms->set_missing_data_mask(mask_vector);
    } 
    
    // add together the mixes in log domain 
    hmms->calc_prob(max_mixtures);

    if (noise_hmms!=NULL) {
      noise_hmms->set_observed_data(feature_vector);
      if (nmask!=NULL) {
	shared_ptr<MaskVector> mask_vector(new MaskVector(*nmask, has_deltas));
	noise_hmms->set_missing_data_mask(mask_vector);
      } 
      noise_hmms->calc_prob(max_mixtures);
    }



  } else {
    // Employ the probabilities provided
    hmms->set_likelihoods(frame);
  }

  // Perform the viterbi token pass step
  
  decoder->passTokens();
  
  // Send state likelihoods to output
  if ((*output_sockets)[0]->connected()) {
    vector<Float> *likelihoods = new vector<Float>;
    hmms->construct_likelihood_vector(*likelihoods);
    (*output_sockets)[0]->put_vector(likelihoods);  
  }
  
  // Send winning mixture numbers to output
  if (output_sockets->size()>1 && (*output_sockets)[1]->connected()) {
    vector<Float> *winning_mixtures = new vector<Float>;
    hmms->construct_winning_mixture_vector(*winning_mixtures);
    (*output_sockets)[1]->put_vector(winning_mixtures);
  }
  
  decoder->nextFrame();

  // Display partial traceback if the output is going to stdout or stderr rather than a logfile
  decoder->reset_current_group_hypothesis();
  if (logfile==stdout || logfile==stderr) {
    ostream& out = (logfile==stdout)?cout:cerr;
    if (decoder->getFrame()%PARTIAL_TRACEBACK_PERIOD==0) {
      decoder->backTrace(hypotheses, 0.0);
      for (UInteger i=0; i<hypotheses.size(); ++i) {
      	out << *hypotheses[i] << "  ";
      }
      out << "     \r" << flush;
    }
  }

}


// Version of process_frame specialised for multisource decoding
// set  soft_multisource == true  to use multisource decoder with soft missing data masks  
void HMMDecoderBlock::process_frame_multisource(const CTKVector &frame, const CTKVector &fmask, CTKVector *flower_bounds, CTKVector *fupper_bounds, vector<Integer> *groups, float md_weight, bool soft_multisource /*=false*/) {

  group_buffer.push_back(vector<Integer>(*groups));
  mask_buffer.push_back(vector<Float>(fmask));

  // Send the feature vector to every HMM Gaussian distribution
  shared_ptr<FeatureVector> feature_vector(new FeatureVector(frame, flower_bounds, fupper_bounds, has_deltas, md_weight));
  
  hmms->set_observed_data(feature_vector);  
  if (noise_hmms!=NULL) 
    noise_hmms->set_observed_data(feature_vector);  
  
  decoder->reset_current_group_hypothesis();

  decoder->nextFrame();

  // Send winning mixture numbers to output
  if ((*output_sockets)[0]->connected()) {
    (*output_sockets)[0]->put_sample(decoder->get_num_group_hypotheses());  
  }

  if (get_verbosity_level()>0) {
    cerr << "frame " << decoder->getFrame() << ": Nsegregations = " << decoder->get_num_group_hypotheses() << "\n";
    //    cerr << "Word records: " << WordRecord::getRecordListSize() << "\n";
    //    cerr << sizeof(Token) << " " << sizeof(WordRecord) << "\n";
  } 

  // Process each group hypothesis -
  do {
    //  The mask is created dynamically according to the group mask stored in the decoder.
    vector<Float> hypothesised_mask(fmask.size());

    if (soft_multisource)
      decoder->make_soft_mask_hypothesis(*groups, fmask, hypothesised_mask, 0);     
    //   decoder->make_discrete_mask_hypothesis(*groups, fmask, hypothesised_mask);     
    else
      decoder->make_discrete_mask_hypothesis(*groups, fmask, hypothesised_mask, 0);

    // The mask is sent to every HMM Gaussian distributuib
    shared_ptr<MaskVector> mask_vector(new MaskVector(hypothesised_mask, has_deltas));
    hmms->set_missing_data_mask(mask_vector);
    
    // add together the mixes in log domain  
    hmms->calc_prob(max_mixtures);

    if (get_verbosity_level()>10) {
      cerr << "mask : ";
      copy(hypothesised_mask.begin(), hypothesised_mask.end(), ostream_iterator<HMMFloat>(cerr, " "));
      cerr << endl;
    }
    
    if (noise_hmms!=NULL) {
      //  The mask is created dynamically according to the group mask stored in the decoder.
      vector<Float> hypothesised_mask(fmask.size());
      
      if (soft_multisource)
	decoder->make_soft_mask_hypothesis(*groups, fmask, hypothesised_mask, 1);     
      //   decoder->make_discrete_mask_hypothesis(*groups, fmask, hypothesised_mask);     
      else
	decoder->make_discrete_mask_hypothesis(*groups, fmask, hypothesised_mask, 1);
      
      // The mask is sent to every HMM Gaussian distributuib
      shared_ptr<MaskVector> mask_vector2(new MaskVector(hypothesised_mask, has_deltas));
      noise_hmms->set_missing_data_mask(mask_vector2);
    
      noise_hmms->calc_prob(max_mixtures);
      
      if (get_verbosity_level()>10) {
	cerr << "maskN: ";
	copy(hypothesised_mask.begin(), hypothesised_mask.end(), ostream_iterator<HMMFloat>(cerr, " "));
	cerr << endl;
      }
    }

    // Pass the tokens for this particular hypothesis
    decoder->passTokens();
  
  } while (decoder->next_group_hypothesis());

}

void HMMDecoderBlock::display_group_mask() {

#ifdef _HAS_MATLAB

  Integer width=group_buffer[0].size();

  mxArray *mx = mxCreateDoubleMatrix(width, group_buffer.size(), mxREAL);
  mxSetName(mx,"groups");
  Float *dest=(Float *)mxGetPr(mx);
  
  vector<Float> outmask;
  outmask.resize(width);

  // Reconstruct the group hypothesis from the group record string stored in the winning recognition hypothesis
  GroupHypothesis winning_group_hypothesis(hypotheses[0]->get_group_record());

  vector<vector<Float> >::iterator mbp = mask_buffer.begin();
  for (vector<vector<Integer> >::iterator gbp=group_buffer.begin(); gbp!=group_buffer.end(); ++gbp, ++mbp) {
    winning_group_hypothesis.make_discrete_mask(*gbp, *mbp, outmask, false);

    for (vector<Float>::iterator mp=outmask.begin(); mp!=outmask.end(); ++mp) {
      *dest++=(Float)*mp;
    }
  }
  

  engPutArray(ep, mx);
  engEvalString(ep,"figure; h=imagesc(groups); axis xy; colorbar");
  mxDestroyArray(mx);
#endif
  
}


void HMMDecoderBlock::save_group_mask(const string &filename) {

  Integer width=group_buffer[0].size();

  vector<Float> outmask;
  float outmask_float;
  
  outmask.resize(width);

  // Reconstruct the group hypothesis from the group record string stored in the winning recognition hypothesis
  GroupHypothesis winning_group_hypothesis(hypotheses[0]->get_group_record());

  FILE *fd;
  if ((fd=(FILE*)fopen(filename.c_str(),"wb"))==(FILE*)NULL) throw(FileErrorOFOF(__FILE__,__LINE__, filename.c_str()));

  vector<vector<Float> >::iterator mbp = mask_buffer.begin();
  for (vector<vector<Integer> >::iterator gbp=group_buffer.begin(); gbp!=group_buffer.end(); ++gbp, ++mbp) {
    winning_group_hypothesis.make_discrete_mask(*gbp, *mbp, outmask, false);

    for (vector<Float>::iterator omp=outmask.begin(), omp_end=outmask.end(); omp!=omp_end; ++omp) {
      outmask_float=*omp;
      fwrite(&outmask_float, sizeof(float), 1, fd);
    }
  }
  
  fclose(fd);
}


// Round masks values that are close to 1 and 0, so that they are equal to 1 and 0
// This can greatly speed up the probability calculations without having a significant effect on the result
void HMMDecoderBlock::round_mask(CTKVector *mask, Float round) {
  Float one_minus_round=1.0-round;
  
  for (Float *maskp=&(*mask)[0]; maskp!=&(*mask)[mask->size()]; ++maskp) {
    if (*maskp<round) *maskp=0.0;
    else if (*maskp>one_minus_round) *maskp=1.0;
  }
  
}

list<string> HMMDecoderBlock::strip_silence(const list<string> &words, const list<string> &silences) {

  list<string> newwords;

  list<string>::const_iterator wp_end=words.end();
  for (list<string>::const_iterator wp=words.begin(); wp!=wp_end; ++wp) {
    if (find(silences.begin(), silences.end(), *wp)==silences.end())
      newwords.push_back(*wp);
  }
  
  //Remove any character occuring in the string 'silence' from the string 'token_string'
  //  list<string>::const_iterator sp_end=silences.end();
  // for (list<string>::const_iterator sp=silences.begin(); sp!=sp_end; ++sp) 
    //    newwords.erase(remove(newwords.begin(), newwords.end(), *sp), newwords.end());
    //    remove(newwords.begin(), newwords.end(), *sp); // JOM check
  //   ;
  
  return newwords;
}

/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: HMMDecoderOnlyBlock                                         */
/*                                                                            */
/******************************************************************************/

// HMM Decoder accepting likelihoods - i.e. raw decoder, no prob calculation

const string HMMDecoderOnlyBlock::type_name = "HMMDecoder";
const string HMMDecoderOnlyBlock::help_text = HMM_DECODER_ONLY_BLOCK_HELP_TEXT;

HMMDecoderOnlyBlock::HMMDecoderOnlyBlock(const string &a_name):CTKObject(a_name),HMMDecoderBlock(a_name, type_name, false /*i.e. does not perform probability calculation*/) {

  make_input_sockets(1);
  input_sockets->set_description("in1", "state likelihoods");

  // Output socket for state likelihoods
  make_output_sockets(1);
  output_sockets->set_description("out1", "state likelihoods");
}
 
Block* HMMDecoderOnlyBlock::clone(const string &n) const{
  Block *ablock = new HMMDecoderOnlyBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void HMMDecoderOnlyBlock::reset() {

  HMMDecoderBlock::reset();
    
}
 

void HMMDecoderOnlyBlock::compute() {

  CTKVector *data;

  (*input_sockets)[0]->get_vector(data);
 
  process_frame(*data);

  delete data;
  
}

/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: StandardHMMDecoderBlock                                  */
/*                                                                            */
/******************************************************************************/

// Standard GMM likelihoods followed by HMM Decoder

const string StandardHMMDecoderBlock::type_name = "HMMDecoderStandard";
const string StandardHMMDecoderBlock::help_text = HMM_DECODER_STANDARD_BLOCK_HELP_TEXT;

StandardHMMDecoderBlock::StandardHMMDecoderBlock(const string &a_name):CTKObject(a_name),HMMDecoderBlock(a_name, type_name) {

  make_input_sockets(1);
  input_sockets->set_description("in1", "data");

  // Output socket for state likelihoods
  make_output_sockets(2);
  output_sockets->set_description("out1", "state likelihoods");
  output_sockets->set_description("out2", "max likelihood mixture number");
}
 
void StandardHMMDecoderBlock::build_output_data_descriptors() {
  
  // Construct a DataDescriptor for 1-dimension likelihood data
  DataDescriptor *dd = new DataDescriptor();
  CTKVector axis;
  for (Integer k=0; k<hmms->get_total_num_states(); ++k)
    axis.push_back(k);
  dd->add_inner_dimension(string("STATE"), axis);   
  (*output_sockets)[0]->set_data_descriptor(dd);
  (*output_sockets)[1]->set_data_descriptor(new DataDescriptor(*dd));
  
}


Block* StandardHMMDecoderBlock::clone(const string &n) const{
  Block *ablock = new StandardHMMDecoderBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void StandardHMMDecoderBlock::reset() {

  HMMDecoderBlock::reset();
    
}
 

void StandardHMMDecoderBlock::compute() {

  CTKVector *data;

  (*input_sockets)[0]->get_vector(data);

  process_frame(*data);

  delete data;
  
}


/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: BaseMD_HMMDecoderBlock                                   */
/*                                                                            */
/******************************************************************************/

BaseMD_HMMDecoderBlock::BaseMD_HMMDecoderBlock(const string &a_name, const string &a_type):CTKObject(a_name),HMMDecoderBlock(a_name, a_type) {

  // Set up the USE_BOUNDS parameter
  use_bounds_param = new ParamBool("USE_BOUNDS",0);
  use_bounds_param->set_helptext("If set ON then bounded marginals are used when (non-delta) data is missing.");
  parameters->register_parameter(use_bounds_param);

  // Set up the USE_DELTA_BOUNDS parameter
  use_delta_bounds_param = new ParamBool("USE_DELTA_BOUNDS",0);
  use_delta_bounds_param->set_helptext("If set ON then bounded marginals are used when *delta* data is missing.");
  parameters->register_parameter(use_delta_bounds_param);

  // Set up the USE_ERF_TABLE parameter
  use_erf_table_param = new ParamBool("USE_ERF_TABLE",1);
  use_erf_table_param->set_helptext("If set ON a table is used to lookup value for erf(), otherwise it is computed exactly. <p> Using the table can speed up the probability calculations without significantly effecting recognition performance.");
  parameters->register_parameter(use_erf_table_param, CTK_PARAM_TYPE_DEPRECATED);

  // Set up the USE_LOG_TABLE parameter
  use_log_table_param = new ParamBool("USE_LOG_TABLE",0);
  use_log_table_param->set_helptext("If set ON a table is used to lookup values for log(), otherwise it is computed exactly. <p> The can speed up the probability calculation, but may have an effect on recognition performance.");  
  parameters->register_parameter(use_log_table_param, CTK_PARAM_TYPE_DEPRECATED);
}


void BaseMD_HMMDecoderBlock::reset() {

  if (!use_bounds_param->get_value() && use_delta_bounds_param->get_value()) {
    cerr << "Inconsistent decoder parameter settings:\n";
    cerr << "Can only use bounds on delta features, if you use bounds on non-delta features too.\n";
    cerr << "i.e. if USE_DELTA_BOUNDS = TRUE, the USE_BOUNDS must be set TRUE also.\n";
    throw(CTKError(__FILE__, __LINE__));
  }
  
  set_use_bounds(false);
  if (use_bounds_param->get_set_flag()) {
    set_use_bounds(use_bounds_param->get_value());
  }

  set_use_delta_bounds(false);
  if (use_delta_bounds_param->get_set_flag()) {
    set_use_delta_bounds(use_delta_bounds_param->get_value());
  }

  HMMDecoderBlock::reset();

}
 

void BaseMD_HMMDecoderBlock::close() {
  HMMDecoderBlock::close();
}



/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: MD_HMMDecoderBlock                                       */
/*                                                                            */
/******************************************************************************/

const string MD_HMMDecoderBlock::type_name = "HMMDecoderMD";
const string MD_HMMDecoderBlock::help_text = HMM_DECODER_MD_BLOCK_HELP_TEXT;

MD_HMMDecoderBlock::MD_HMMDecoderBlock(const string &a_name):CTKObject(a_name),BaseMD_HMMDecoderBlock(a_name, type_name) {

  // Inputs for Data, Mask and Bounds
  make_input_sockets(4);
  
  input_sockets->set_description("in1", "data");
  input_sockets->set_description("in2", "mask");
  input_sockets->set_description("in3", "lower_bounds");
  input_sockets->set_description("in4", "upper_bounds");
  input_sockets->set_optional("in3");
  input_sockets->set_optional("in4");
  
  // Output socket for state likelihoods
  make_output_sockets(2);
  output_sockets->set_description("out1", "state likelihoods");
  output_sockets->set_description("out2", "max likelihood mixture number");
}

void MD_HMMDecoderBlock::build_output_data_descriptors() {
  
  // Construct a DataDescriptor for 1-dimension likelihood data
  DataDescriptor *dd = new DataDescriptor();
  CTKVector axis;
  for (Integer k=0; k<hmms->get_total_num_states(); ++k)
    axis.push_back(k);
  dd->add_inner_dimension(string("STATE"), axis);   
  (*output_sockets)[0]->set_data_descriptor(dd);
  (*output_sockets)[1]->set_data_descriptor(new DataDescriptor(*dd));
  
}

Block* MD_HMMDecoderBlock::clone(const string &n) const{
  Block *ablock = new MD_HMMDecoderBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void MD_HMMDecoderBlock::reset() {

  BaseMD_HMMDecoderBlock::reset();

  // Check the inputs all have the same shape
  if (input_shape_check()==false) {
    Integer x1=(*input_sockets)[0]->get_data_descriptor()->get_storage();
    Integer x2=(*input_sockets)[1]->get_data_descriptor()->get_storage();
    cerr << "BaseMD_HMMDecoderBlock:: Inputs have unequal widths (data=" << x1 << ", mask=" << x2;
    if ((*input_sockets)[2]->connected())
      cerr << ", Lower Bounds=" << (*input_sockets)[2]->get_data_descriptor()->get_storage();
    if ((*input_sockets)[3]->connected())
      cerr << ", Upper Bounds=" << (*input_sockets)[3]->get_data_descriptor()->get_storage();
    cerr << ")" << endl;
    throw(CTKError(__FILE__, __LINE__));
  }
}
 

void MD_HMMDecoderBlock::compute() {
  CTKVector *data=NULL;
  CTKVector *mask=NULL;
  CTKVector *lower_bounds=NULL;
  CTKVector *upper_bounds=NULL;
  
  (*input_sockets)[0]->get_vector(data);
  (*input_sockets)[1]->get_vector(mask);

  if ((*input_sockets)[2]->connected())
    (*input_sockets)[2]->get_vector(lower_bounds);

  if ((*input_sockets)[3]->connected())
    (*input_sockets)[3]->get_vector(upper_bounds);
  
  
  process_frame(*data, mask, NULL, lower_bounds, upper_bounds);

  delete data;
  delete mask;
  delete lower_bounds;
  delete upper_bounds;
  
}

HMMMixture *MD_HMMDecoderBlock::get_HMM_mixture_prototype() const {
  return new HMMMixtureMD();
  
}
/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: MD_PHMMDecoderBlock                                       */
/*                                                                            */
/******************************************************************************/

const string MD_PHMMDecoderBlock::type_name = "PHMMDecoderMD";
const string MD_PHMMDecoderBlock::help_text = PHMM_DECODER_MD_BLOCK_HELP_TEXT;

MD_PHMMDecoderBlock::MD_PHMMDecoderBlock(const string &a_name):CTKObject(a_name),BaseMD_HMMDecoderBlock(a_name, type_name) {

  // Inputs for Data, Mask and Bounds
  make_input_sockets(5);
  
  unset_parameter_hidden("NOISE_HMM_FILE");
  unset_parameter_hidden("NOISE_GRAMMAR_FILE");
  
  input_sockets->set_description("in1", "data");
  input_sockets->set_description("in2", "mask");
  input_sockets->set_description("in3", "nmask");
  input_sockets->set_description("in4", "lower_bounds");
  input_sockets->set_description("in5", "upper_bounds");
  input_sockets->set_optional("in4");
  input_sockets->set_optional("in5");
  
  // Output socket for state likelihoods
  make_output_sockets(2);
  output_sockets->set_description("out1", "state likelihoods");
  output_sockets->set_description("out2", "max likelihood mixture number");
}

void MD_PHMMDecoderBlock::build_output_data_descriptors() {
  
  // Construct a DataDescriptor for 1-dimension likelihood data
  DataDescriptor *dd = new DataDescriptor();
  CTKVector axis;
  for (Integer k=0; k<hmms->get_total_num_states(); ++k)
    axis.push_back(k);
  dd->add_inner_dimension(string("STATE"), axis);   
  (*output_sockets)[0]->set_data_descriptor(dd);
  (*output_sockets)[1]->set_data_descriptor(new DataDescriptor(*dd));
  
}

Block* MD_PHMMDecoderBlock::clone(const string &n) const{
  Block *ablock = new MD_PHMMDecoderBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void MD_PHMMDecoderBlock::reset() {

  BaseMD_HMMDecoderBlock::reset();

  // Check the inputs all have the same shape
  if (input_shape_check()==false) {
    Integer x1=(*input_sockets)[0]->get_data_descriptor()->get_storage();
    Integer x2=(*input_sockets)[1]->get_data_descriptor()->get_storage();
    Integer x3=(*input_sockets)[2]->get_data_descriptor()->get_storage();
    cerr << "BaseMD_HMMDecoderBlock:: Inputs have unequal widths (data=" << x1 << ", mask=" << x2 << ", nmask=" << x3;
    if ((*input_sockets)[3]->connected())
      cerr << ", Lower Bounds=" << (*input_sockets)[2]->get_data_descriptor()->get_storage();
    if ((*input_sockets)[4]->connected())
      cerr << ", Upper Bounds=" << (*input_sockets)[3]->get_data_descriptor()->get_storage();
    cerr << ")" << endl;
    throw(CTKError(__FILE__, __LINE__));
  }
}
 

void MD_PHMMDecoderBlock::compute() {
  CTKVector *data=NULL;
  CTKVector *mask=NULL;
  CTKVector *nmask=NULL;
  CTKVector *lower_bounds=NULL;
  CTKVector *upper_bounds=NULL;
  
  (*input_sockets)[0]->get_vector(data);
  (*input_sockets)[1]->get_vector(mask);

  (*input_sockets)[2]->get_vector(nmask);

  if ((*input_sockets)[3]->connected())
    (*input_sockets)[3]->get_vector(lower_bounds);

  if ((*input_sockets)[4]->connected())
    (*input_sockets)[4]->get_vector(upper_bounds);
  
  
  process_frame(*data, mask, nmask, lower_bounds, upper_bounds);

  delete data;
  delete mask;
  delete lower_bounds;
  delete upper_bounds;
  
}

HMMMixture *MD_PHMMDecoderBlock::get_HMM_mixture_prototype() const {
  return new HMMMixtureMD();
}

/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: BoundsUD_HMMDecoderBlock                                 */
/*                                                                            */
/******************************************************************************/


const string BoundsUD_HMMDecoderBlock::type_name = "HMMDecoderMDSoft";
const string BoundsUD_HMMDecoderBlock::help_text = HMM_DECODER_UDBOUNDS_BLOCK_HELP_TEXT;

BoundsUD_HMMDecoderBlock::BoundsUD_HMMDecoderBlock(const string &a_name):CTKObject(a_name),BaseMD_HMMDecoderBlock(a_name, type_name) {

  // Set up ONE_ZERO_ROUNDING parameter
  one_zero_rounding_param = new ParamFloat("ONE_ZERO_ROUNDING",0.0);
  one_zero_rounding_param->set_helptext("This defines a tolerance of between 0.0 and 0.5. Fuzzy mask values that lie within this tolerence of either 0.0 or 1.0 are rounded to 0.0 and 1.0 respectively. <p> Rounding the mask values in this way speeds up the probability calculation, however it may also effect recognition performance.");
  one_zero_rounding_param->install_validator(new Validator(0.0, 0.5));
  parameters->register_parameter(one_zero_rounding_param);
  
  set_parameter("USE_BOUNDS", true);
  set_parameter_hidden("USE_BOUNDS");  // USE_BOUNDS doesn't apply to soft masks ... i.e. they always use bounds


  // Inputs for Data, Mask and Bounds
  make_input_sockets(4);
  
  input_sockets->set_description("in1", "data");
  input_sockets->set_description("in2", "mask");
  input_sockets->set_description("in3", "lower_bounds");
  input_sockets->set_description("in4", "upper_bounds");
  input_sockets->set_optional("in3");

  // Output socket for state likelihoods
  make_output_sockets(2);
  output_sockets->set_description("out1", "state likelihoods");
  output_sockets->set_description("out2", "max likelihood mixture number");
}

void BoundsUD_HMMDecoderBlock::build_output_data_descriptors() {
  
  // Construct a DataDescriptor for 1-dimension likelihood data
  DataDescriptor *dd = new DataDescriptor();
  CTKVector axis;
  for (Integer k=0; k<hmms->get_total_num_states(); ++k)
    axis.push_back(k);
  dd->add_inner_dimension(string("STATE"), axis);   
  (*output_sockets)[0]->set_data_descriptor(dd);
  (*output_sockets)[1]->set_data_descriptor(new DataDescriptor(*dd));
  
}

Block* BoundsUD_HMMDecoderBlock::clone(const string &n) const{
  Block *ablock = new BoundsUD_HMMDecoderBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void BoundsUD_HMMDecoderBlock::reset() {

  BaseMD_HMMDecoderBlock::reset();

  // Check the inputs all have the same shape
  if (input_shape_check()==false) {
    Integer x1=(*input_sockets)[0]->get_data_descriptor()->get_storage();
    Integer x2=(*input_sockets)[1]->get_data_descriptor()->get_storage();
    cerr << "BaseMD_HMMDecoderBlock:: Inputs have unequal widths (Data=" << x1 << ", mask=" << x2;
    if ((*input_sockets)[2]->connected())
      cerr << ", Lower Bounds=" << (*input_sockets)[2]->get_data_descriptor()->get_storage();
    if ((*input_sockets)[3]->connected())
      cerr << ", Upper Bounds=" << (*input_sockets)[3]->get_data_descriptor()->get_storage();
    cerr << ")" << endl;
    throw(CTKError(__FILE__, __LINE__));
  }
}
 

void BoundsUD_HMMDecoderBlock::compute() {
  CTKVector *data=NULL;
  CTKVector *mask=NULL;
  CTKVector *lower_bounds=NULL;
  CTKVector *upper_bounds=NULL;
  
  (*input_sockets)[0]->get_vector(data);
  (*input_sockets)[1]->get_vector(mask);

  if ((*input_sockets)[2]->connected())
    (*input_sockets)[2]->get_vector(lower_bounds);

  if ((*input_sockets)[3]->connected())
    (*input_sockets)[3]->get_vector(upper_bounds);
  
  if (one_zero_rounding_param->get_value()!=0.0) {
    round_mask(mask, one_zero_rounding_param->get_value());
  }
  
  process_frame(*data, mask, NULL, lower_bounds, upper_bounds);

  delete data;
  delete mask;
  delete lower_bounds;
  delete upper_bounds;
  
}

HMMMixture *BoundsUD_HMMDecoderBlock::get_HMM_mixture_prototype() const {
  return new HMMMixtureMDSoft();
}

/******************************************************************************/
/*                                                                            */
/*       CLASS NAME: PMC_HMMDecoderBlock                                 */
/*                                                                            */
/******************************************************************************/

const string PMC_HMMDecoderBlock::type_name = "HMMDecoderPMC";
const string PMC_HMMDecoderBlock::help_text = HMM_DECODER_PMC_BLOCK_HELP_TEXT;

PMC_HMMDecoderBlock::PMC_HMMDecoderBlock(const string &a_name):CTKObject(a_name),BaseMD_HMMDecoderBlock(a_name, type_name) {

  // Set up ADAPTIVE parameter
  adaptive_param = new ParamBool("ADAPTIVE", false);
  adaptive_param->set_helptext("If true then noise model adaptation is switched on");
  parameters->register_parameter(adaptive_param);

  // Set up ADAPTATION_USES_PATHS parameter
  adaptation_uses_paths_param = new ParamBool("USE_PATHS_FOR_LIKELIHOODS", false);
  adaptation_uses_paths_param->set_helptext("If true then use distribution likelihoods including paths, else use current distribution likelihood");
  parameters->register_parameter(adaptation_uses_paths_param);

  // Set up TRACKING parameter
  tracking_param = new ParamBool("TRACKING", true);
  tracking_param->set_helptext("If true use 'tracking adaptation' else use 'refined estimation' adaptation");
  parameters->register_parameter(tracking_param);

  // Set up MEMORY_FACTOR parameter
  memory_factor_param = new ParamFloat("MEMORY_FACTOR", 0.95);
  memory_factor_param->set_helptext("Controls speed of adaptation x_new = (1-MF) x + MF x_old");
  memory_factor_param->install_validator(new Validator(0.0, 1.0));
  parameters->register_parameter(memory_factor_param);

  // Set up BIAS parameter
  bias_param = new ParamFloat("BIAS", 25.0);
  bias_param->set_helptext("speech/background bias");
  parameters->register_parameter(bias_param);

  // Set up LIKELIHOOD_THRESHOLD parameter
  likelihood_threshold_param = new ParamFloat("LIKELIHOOD_THRESHOLD", 10.0);
  likelihood_threshold_param->set_helptext("The larger the threshold the more speech states are summed");
  parameters->register_parameter(likelihood_threshold_param);

  
  set_parameter("USE_BOUNDS", true);
  set_parameter_hidden("USE_BOUNDS");  // USE_BOUNDS doesn't apply to soft masks ... i.e. they always use bounds

  set_parameter("USE_DELTA_BOUNDS", false);
  set_parameter_hidden("USE_DELTA_BOUNDS");  // USE_DELTA_MARGINALS doesn't apply to PMC ... i.e. they always use these in cases where they are using deltas

  // Inputs for Data, Mask and Bounds
  make_input_sockets(6);
  
  input_sockets->set_description("in1", "data");
  input_sockets->set_description("in2", "noise means");
  input_sockets->set_description("in3", "noise variance");
  input_sockets->set_description("in4", "lower_bounds");
  input_sockets->set_description("in5", "upper_bounds");
  input_sockets->set_description("in6", "(unused)");
  input_sockets->set_optional("in4");
  input_sockets->set_optional("in6");

  // Output socket for state likelihoods
  make_output_sockets(5);
  output_sockets->set_description("out1", "state likelihoods");
  output_sockets->set_description("out2", "max likelihood mixture number");
  output_sockets->set_description("out3", "missing data mask");
  output_sockets->set_description("out4", "noise mean estimate");
  output_sockets->set_description("out5", "noise variance estimate");
}

void PMC_HMMDecoderBlock::build_output_data_descriptors() {
  
  // Construct a DataDescriptor for 1-dimension likelihood data
  DataDescriptor *dd = new DataDescriptor();
  CTKVector axis;
  for (Integer k=0; k<hmms->get_total_num_states(); ++k)
    axis.push_back(k);
  dd->add_inner_dimension(string("STATE"), axis);   
  (*output_sockets)[0]->set_data_descriptor(dd);
  (*output_sockets)[1]->set_data_descriptor(new DataDescriptor(*dd));
  
  DataDescriptor *dd1 = new DataDescriptor();
  CTKVector axis1;
  
  int width =   (*input_sockets)[0]->get_data_descriptor()->get_storage();
  
  for (Integer kk=0; kk<width; ++kk)
    axis1.push_back(kk);

  dd1->add_inner_dimension(string("FREQ"), axis1);   
  (*output_sockets)[2]->set_data_descriptor(new DataDescriptor(*dd1));
  (*output_sockets)[3]->set_data_descriptor(new DataDescriptor(*dd1));
  (*output_sockets)[4]->set_data_descriptor(new DataDescriptor(*dd1));
  
}

Block* PMC_HMMDecoderBlock::clone(const string &n) const{
  Block *ablock = new PMC_HMMDecoderBlock(n.empty()?getname():n);
  return copy_this_block_to(ablock);
}

void PMC_HMMDecoderBlock::reset() {

  first_frame=true;  // Next frame is the first frame to be processed
  
  unlock_parameters();  
  set_parameter("USE_DELTA_BOUNDS", use_deltas_param->get_value());
  relock_parameters();
 
  BaseMD_HMMDecoderBlock::reset();

  // Check the inputs all have the same shape
  if (input_shape_check()==false) {
    Integer x1=(*input_sockets)[0]->get_data_descriptor()->get_storage();
    Integer x2=(*input_sockets)[1]->get_data_descriptor()->get_storage();
    Integer x3=(*input_sockets)[2]->get_data_descriptor()->get_storage();
    cerr << "BaseMD_HMMDecoderBlock:: Inputs have unequal widths (Data=" << x1 << ", noise means=" << x2 << ", noise variance = " << x3 << "\n";
    if ((*input_sockets)[3]->connected())
      cerr << ", Lower Bounds=" << (*input_sockets)[2]->get_data_descriptor()->get_storage();
    if ((*input_sockets)[4]->connected())
      cerr << ", Upper Bounds=" << (*input_sockets)[3]->get_data_descriptor()->get_storage();
    cerr << ")" << endl;
    throw(CTKError(__FILE__, __LINE__));
  }

  memory_factor = memory_factor_param->get_value();

  nprob_scale = bias_param->get_value();
  likelihood_threshold = likelihood_threshold_param->get_value();
  adaptation_uses_paths = adaptation_uses_paths_param->get_value();
  
  s = new CTKVector();

  nvarcount.resize(64);
  for (int i=0;i<64; ++i)
    nvarcount[i]=10;
  ncount=10;
}
 

void PMC_HMMDecoderBlock::compute() {
  CTKVector *dummy=NULL;
  CTKVector *data=NULL;
  CTKVector *lower_bounds=NULL;
  CTKVector *upper_bounds=NULL;
  CTKVector *fmask=NULL;
  
  (*input_sockets)[0]->get_vector(data);

  if (first_frame || adaptive_param->get_value()==false) {
    (*input_sockets)[1]->get_vector(means);
    (*input_sockets)[2]->get_vector(variance);
  } else {
    (*input_sockets)[1]->get_vector(dummy);
    delete dummy;
    (*input_sockets)[2]->get_vector(dummy);
    delete dummy;
  }

  if (first_frame && adaptive_param->get_value()==true) {
    for (int i=0; i<32; i++) { 
      float v=(*variance)[i];
      float m=(*means)[i];
      s->push_back(v+m*m);
    }

  }
  
  first_frame=false;

  if ((*input_sockets)[3]->connected())
    (*input_sockets)[3]->get_vector(lower_bounds);

  if ((*input_sockets)[4]->connected())
    (*input_sockets)[4]->get_vector(upper_bounds);

  if ((*input_sockets)[5]->connected())
    (*input_sockets)[5]->get_vector(fmask);


  int width = (*input_sockets)[0]->get_data_descriptor()->get_storage();
  
  // variances are stored as inverse variances (1/variance) so reciprocate variances and cast to HMMFloats
  vector<HMMFloat> ivariance;
  for (vector<Float>::iterator varp=variance->begin(), varp_end=variance->end(); varp!=varp_end; ++varp)
    ivariance.push_back(1.0/ *varp);    

  // Cast means to HMMFloats 
  vector<HMMFloat> fmeans;
  for (vector<Float>::iterator meanp=means->begin(), meanp_end=means->end(); meanp!=meanp_end; ++meanp)
    fmeans.push_back(*meanp);    

  shared_ptr<HMMMixture> noise_mixture(new HMMMixtureStandard(fmeans, ivariance));
  noise_mixture->set_use_marginals(true);
  noise_mixture->set_use_deltas(get_has_deltas());
  noise_mixture->set_use_delta_marginals(get_has_deltas());
  
  //  cerr << *noise_mixture;
  
  shared_ptr<FeatureVector> feature_vector(new FeatureVector(*data, lower_bounds, upper_bounds, get_has_deltas()));
  
  feature_vector->set_marginal_normalisation(false); 
  

  noise_mixture->set_observed_data(feature_vector);
  noise_mixture->calculate_full_marginal();
  noise_mixture->calculate_full_likelihood();
  hmms->set_parallel_mixture(noise_mixture);  

  
  process_frame(*data, fmask, NULL, lower_bounds, upper_bounds, false); /* false=>Do not use marginal normalisation */  
  

  const EmittingNodeBase *emitting_node;
  const HMMState *top_state;

  float best_score;

  if (adaptation_uses_paths) {
    emitting_node =  get_decoder()->getTopEmittingNode();
    top_state = emitting_node->getHMMState();
    best_score=(emitting_node)->getToken().getScore();
  } else {
    emitting_node =  get_decoder()->getInstantTopEmittingNode();
    top_state = emitting_node->getHMMState();
    best_score=top_state->get_prob();
  }

  vector<float> sprob(width,0.0);
  vector<float> nprob(width,0.0);
  //  vector<float> sprob(width,-10000.0);
  //  vector<float> nprob(width,-10000.0);

  int d1 = width;
  int d2 = width;
  if (get_has_deltas()) d1=d1/2;
  

  list<EmittingNodeBase*>::const_iterator enp=get_decoder()->getEmittingNodesBegin();
  list<EmittingNodeBase*>::const_iterator enp_end=get_decoder()->getEmittingNodesEnd();

  float score;
  float total_delta_score = -100000.0;


  HMMMixtureStandard dummy_mix;
  
  for (; enp!=enp_end; ++enp) {
    if (adaptation_uses_paths)
      score=(*enp)->getToken().getScore();              // Path score
    else
      score=(*enp)->getHMMState()->get_prob();    // Instant score
    
    float delta_score=score-best_score;
    if (delta_score>-likelihood_threshold) {
      //      cerr << ".";   // JON
      //      total_delta_score=log_add(total_delta_score,delta_score);
      total_delta_score=dummy_mix.logT8_add(total_delta_score,delta_score);

    }
  }

  //  cerr << " " << total_delta_score << "\n"; // JON

  enp=get_decoder()->getEmittingNodesBegin();
  enp_end=get_decoder()->getEmittingNodesEnd();

  for (; enp!=enp_end; ++enp) {
    if (adaptation_uses_paths)
      score=(*enp)->getToken().getScore();              // Path score
    else
      score=(*enp)->getHMMState()->get_prob();    // Instant score
    float delta_score=score-best_score;

    top_state = (*enp)->getHMMState();

    if (delta_score>-likelihood_threshold) {
      
      for (int i=0; i<top_state->get_num_mixes(); ++i) {
	float weight=top_state->get_mixture_weight(i);
	const HMMMixture * mix = top_state->get_mixture(i);
	
	vector<HMMFloat>::const_iterator nlp =noise_mixture->get_likelihoods_begin();
	vector<HMMFloat>::const_iterator slp =mix->get_likelihoods_begin();
	vector<HMMFloat>::const_iterator nmp = noise_mixture->get_marginals_begin();
	vector<HMMFloat>::const_iterator smp =mix->get_marginals_begin();
	
	for(int j=0; j<d1; ++j, ++nmp, ++nlp, ++smp, ++slp) { 
	  sprob[j]=mix->logT8_add(sprob[j], *slp+*nmp+weight+delta_score-total_delta_score);
	  nprob[j]=mix->logT8_add(nprob[j], *smp+*nlp+weight+delta_score-total_delta_score);
	  //	  	  sprob[j]=log_add(sprob[j], *slp+*nmp+weight+delta_score-total_delta_score);
	  //	  	  nprob[j]=log_add(nprob[j], *smp+*nlp+weight+delta_score-total_delta_score);
	}
	
	if (get_use_deltas()) {
	  vector<HMMFloat>::const_iterator nmp = noise_mixture->get_marginals_begin();
	  vector<HMMFloat>::const_iterator smp =mix->get_marginals_begin();
	  
	  for(int j=d1; j<d2; ++j, ++nmp, ++nlp, ++smp, ++slp) { 
	    sprob[j]=mix->logT8_add(sprob[j], *slp+*nmp+weight+delta_score-total_delta_score);
	    nprob[j]=mix->logT8_add(nprob[j], *smp+*nlp+weight+delta_score-total_delta_score);
	    //	    	    sprob[j]=log_add(sprob[j], *slp+*nmp+weight+delta_score-total_delta_score);
	    //	    	    nprob[j]=log_add(nprob[j], *smp+*nlp+weight+delta_score-total_delta_score);
	  }
	  
	}
	
      }
    }
  }

  
  vector<Float> mask(width,0.0);
  vector<bool> maskb(width,0);
  //  cerr << total_delta_score << " : ";
  for(int k=0; k<width; ++k) {
    //    cerr << sprob[k] << " (" << nprob[k] << ") ";
    mask[k]= sprob[k]/(sprob[k]+(nprob[k]*nprob_scale ));  
    maskb[k]= (sprob[k]>(nprob[k]*nprob_scale ));  

    //    mask[k]= (sprob[k]>(nprob[k]+nprob_scale ));  
    //    maskb[k]= (sprob[k]>(nprob[k]+nprob_scale ));  
  }   
  //  cerr << "\n";
  
  if (adaptive_param->get_value()==true)  {
    // Adapt means and variances for next frame
    vector<HMMFloat> expected = noise_mixture->get_expected_values(maskb);
    for (int i=0; i<32; i++) {
      // If noise observed then use observed value, else use expected value beneath mask
      HMMFloat x=(!maskb[i])?(*data)[i]:expected[i];
      if (isnan(mask[i])) {
	mask[i]=0.0;
      }



      if (tracking_param->get_value()==true) {
	// Tracking style adaptation
	(*means)[i]= memory_factor * (*means)[i] + (1-memory_factor) * x;
	
	if (!maskb[i]) {
	  (*s)[i] = memory_factor * (*s)[i] + (1-memory_factor) * x * x;
	  (*variance)[i] = (*s)[i] - (*means)[i] * (*means)[i];
	}
      } else {
	// Refined estimation style adaptation
	(*means)[i]= (ncount * (*means)[i] + x) / (ncount+1);
	
	if (!maskb[i] && !isnan(mask[i])) {
	  (*s)[i] = (nvarcount[i] * (*s)[i] + x * x) / (nvarcount[i]+1);
	  (*variance)[i] = nvarcount[i] * ((*s)[i] - (*means)[i] * (*means)[i])  / (nvarcount[i]-1);
	  if ((*variance)[i]<0.1) (*variance)[i]=0.1;    // Variance floor - prevents rounding errors driving variance -ve
	}
	
      }
    }

    ++ncount;
    for (int i=0; i<32; i++) {
      if (!maskb[i]) 
	nvarcount[i]++;
    }

  }



  // Send update mask to output socket
  if ((*output_sockets)[2]->connected()) {
    (*output_sockets)[2]->put_vector(new vector<Float>(mask));  
  }

  // Send noise mean estimate to output socket
  if ((*output_sockets)[3]->connected()) {
    (*output_sockets)[3]->put_vector(new vector<Float>(*means));  
  }

  // Send noise variance estimate to output socket
  if ((*output_sockets)[4]->connected()) {
    (*output_sockets)[4]->put_vector(new vector<Float>(*variance));  
  }

  if (adaptive_param->get_value()==false) {
    delete means;
    delete variance;
  } 

  delete data;
  delete lower_bounds;
  delete upper_bounds;
  
}




HMMMixture *PMC_HMMDecoderBlock::get_HMM_mixture_prototype() const {
  return new HMMMixturePMC();
}
 /* End of ctk_HMM_decoder.cpp */
