/****************************************************************************/
/*									      */
/*	ctk_parse.cpp	    						      */
/*									      */
/*	Implemenation for Parse	- parses a HTK format grammar file	      */
/*									      */
/*	Author: Jon Barker, Sheffield University			      */
/*									      */
/*      CTK VERSION 1.3.5  Apr 22, 2007			      */
/*									      */
/******************************************************************************/

#include "ctk-config.h"

#include "ctk_parse.hh"

#include <list>
#include <string>
#include <fstream>

#include <functional>
#include <algorithm>

#include "ctk_local.hh"
#include "ctk_error.hh"
 
#include "ctk_function_classes.hh"

#define CTK_CHAR_BUFFER_SIZE 1024
#define CTK_MAX_FSG_EXPANSION_DEPTH 1000

/******************************************************************************/
/*									      */
/*	CLASS NAME: FiniteStateGrammar	         				      */
/*									      */
/******************************************************************************/

FiniteStateGrammar::FiniteStateGrammar(std::istream &ifstr){



  char buffer[CTK_CHAR_BUFFER_SIZE];
  char name[CTK_CHAR_BUFFER_SIZE];
  char value[CTK_CHAR_BUFFER_SIZE];

  char *vp;

  string x;
  
  while (ifstr.getline(buffer,CTK_CHAR_BUFFER_SIZE,'\n')) {
    if (parseAssignment(buffer, name, value)) {
      if (strlen(name)!=0 && strlen(value)!=0) {
      	if (find_if(nodeList.begin(), nodeList.end(), bind2nd(mem_fun(&GrammarNode::isNamed),name))==nodeList.end())
	  nodeList.push_back(new GrammarSimpleFactorNode(vp=value, name));
	else {
	  cerr << "Error: Multiple definitions for grammar item named: " << name << endl;
	  throw(CTKError(__FILE__,__LINE__));
	}
      } 
    } else nodeList.push_back(new GrammarSimpleFactorNode(vp=buffer, "\0"));
  }

  base_node = getNodeNamed("\0");

  
  if (base_node!=NULL) {
    base_node->expandNode(nodeList);

    base_node->simplifyBrackets();
    base_node->print(cout);
    cout << endl;
  } else {
    cerr << "Grammar file contains incomplete grammar description.\r\n";
    throw(CTKError(__FILE__,__LINE__));
  }
}


FiniteStateGrammar::~FiniteStateGrammar(){
  delete base_node;
};


GrammarNode* FiniteStateGrammar::getNodeNamed(const string &aname) {
  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) {
    if ((*np)->getname()==aname) return *np;
  }

  return NULL;
}

// Construct a network to represent this grammar - return true on success else false
// The network is constructed between a given pair of NENs belonging to 'network'
CTKStatus FiniteStateGrammar::buildNetwork(Decoder &network, NetworkNodePair node_pair) {
  // Convert FSG grammar tree into a FSG decoder network via recursive descent of the tree
  base_node->expandNetwork(network, node_pair, 1);

  return CTK_SUCCESS; 
}

// Private methods

bool FiniteStateGrammar::parseAssignment(char *line, char *name, char *value) {
  char *lp = line;
  bool seen_white_space=false, seen_non_white_space=false;

  while (*lp!='=') {
    if (isspace(*lp)) 
      seen_white_space=true;
    else {
      if (seen_white_space&&seen_non_white_space) return false;
      seen_non_white_space=true;
      *name++=*lp;
      if (*lp==0 || *lp=='\n') return false;
    }
    ++lp;
  }
  *name=0;
  ++lp;

  while (*lp!=0) 
    *value++=*lp++;
  *value=0;

  return true;
      
}
  
  
/******************************************************************************/
/*									      */
/*	CLASS NAME: GrammarNode	               				      */
/*									      */
/******************************************************************************/

GrammarNode::GrammarNode(){
  init();
};

GrammarNode::GrammarNode(char* &value, const string &aname){

  init();

  name=aname;
  set_stop_char(0);
  parse(value);
  
}

void GrammarNode::init(){
  is_alternative=0;
}

GrammarNode::GrammarNode(const GrammarNode &node): a_stop_char(node.a_stop_char), is_alternative(node.is_alternative) {

  name=node.name;
  
  for (list<GrammarNode *>::const_iterator np=node.nodeList.begin(); np!=node.nodeList.end(); ++np) {
    nodeList.push_back((*np)->clone());
  }
}

GrammarNode::~GrammarNode(){
  //  sequence_delete(nodeList.begin(), nodeList.end());
}


void GrammarNode::simplifyBrackets() {
  deleteRedundantDoubleBrackets();
  deleteRedundantLeafBrackets();
}

// Condenses redundant double brackets e..g simplifies  (()) to ( ) 
void GrammarNode::deleteRedundantDoubleBrackets() {
  for (list<GrammarNode *>::const_iterator np=nodeList.begin(); np!=nodeList.end(); ++np) 
    (*np)->deleteRedundantDoubleBrackets();

  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) {
    if((*np)->isAFactorNode() && ((*np)->nodeList.size()==1) &&  (*((*np)->nodeList.begin()))->isAFactorNode()) {
      GrammarNode *n1=*np, *n2=*((*np)->nodeList.begin());
      GrammarNode *new_node;
      
      // Rules of factor combination...
      if (n1->isAnOptionalRepeatableFactorNode() || n2->isAnOptionalRepeatableFactorNode())
	new_node = new GrammarOptionalRepeatableFactorNode();
      else if (n1->isAnOptionalFactorNode() && n2->isARepeatableFactorNode())
	new_node = new GrammarOptionalRepeatableFactorNode();
      else if (n1->isARepeatableFactorNode() && n2-> isAnOptionalFactorNode())
	new_node = new GrammarOptionalRepeatableFactorNode();
      else if (n1->isARepeatableFactorNode() || n2->isARepeatableFactorNode())
	new_node = new GrammarRepeatableFactorNode();
      else if (n1->isAnOptionalFactorNode() || n2->isAnOptionalFactorNode())
	new_node = new GrammarOptionalFactorNode();
      else {
	new_node = new GrammarSimpleFactorNode();
      }
      
      new_node->grabNodesFrom(*((*np)->nodeList.begin()));
      new_node->setAlternative(n2->isAnAlternativeNode());
      
      delete (n1);
      delete (n2);
      *np=new_node;
    }
  }
    
}

// Removes simple factor brackets i.e. () if they contain only a single leaf node
// e.g  '(4)' is replaced with '4'
void GrammarNode::deleteRedundantLeafBrackets() {

  for (list<GrammarNode *>::const_iterator np=nodeList.begin(); np!=nodeList.end(); ++np) 
    (*np)->deleteRedundantLeafBrackets();

  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) {
    if((*np)->isASimpleFactorNode() && ((*np)->nodeList.size()==1) &&  (*((*np)->nodeList.begin()))->isALeafNode()) {
      GrammarNode *simple_factor_node=*np;
      GrammarNode *leaf_node=*((*np)->nodeList.begin());
     
      *np=leaf_node; // Replace the simple factor node with the leaf it contained
      simple_factor_node->removeChildren(); // Remove the leaf from the simple factor -don't delete them
      delete (simple_factor_node);  // Delete the simple factor
    }
  }
    
}


// protected and private methods

void GrammarNode::grabNodesFrom(GrammarNode *anode) {
  for (list<GrammarNode *>::const_iterator np=anode->nodeList.begin(); np!=anode->nodeList.end(); ++np) 
    nodeList.push_back(*np);
  anode->nodeList.resize(0);
}

void GrammarNode::parse(char *&value) {

  bool alternative=false;
  while (*value!= stop_char()) {
    if (*value==0) {
      cerr << "SYNTAX ERROR !" << endl;
      throw(CTKError(__FILE__,__LINE__));
    }
    if (isspace(*value))
      ++value;
    else if (*value=='|') {
      alternative=true; ++value; 
    } else if (*value=='$')
      addNode(new GrammarLinkNode(value, stop_char()), alternative);
    else if (*value=='(')
      addNode(new GrammarSimpleFactorNode(value), alternative);
    else if (*value=='[')
      addNode(new GrammarOptionalFactorNode(value), alternative);
    else if (*value=='{')
      addNode(new GrammarOptionalRepeatableFactorNode(value), alternative);
    else if (*value=='<')
      addNode(new GrammarRepeatableFactorNode(value), alternative);
    else
      addNode(new GrammarLeafNode(value, stop_char()), alternative);
  }
  
  if (*value=='}' || *value==']' || *value=='>' || *value==')') ++value;
}

void GrammarNode::addNode(GrammarNode *a_node, bool &alternative) {

  // nodeList.back()->nodeList.push_back(a_node);
  if ((nodeList.size()>1) && alternative!=isAnAlternativeNode()) {
    cerr << "Error: mixed serial and parallel factors in expression. Need more brackets." << endl;
    throw(CTKError(__FILE__,__LINE__));
  }

  setAlternative(alternative);
  alternative=false;

  nodeList.push_back(a_node);
}

// Replaces link nodes with the bits of grammar they represent
void GrammarNode::expandNode(list<GrammarNode *> aNodeList, int depth) {

  if (depth>CTK_MAX_FSG_EXPANSION_DEPTH) {
    cerr << "Cannot expand grammar: Expansion depth exceeded. Recursive definition?" << endl;
    throw(CTKError(__FILE__,__LINE__));
  }
  
  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) {
    if ((*np)->isALinkNode()) {      
      list<GrammarNode *>::iterator np2;
      for (np2=aNodeList.begin(); np2!=aNodeList.end(); np2++) {
	if ((*np2)->getname()==(*np)->getname()) {
	  (*np)=(*np2)->clone();
	  break;
	}
      }
      if (np2==aNodeList.end()) {
	cerr << "Cannot find definition of grammar item named: " << (*np)->getname() << endl;
	throw(CTKError(__FILE__,__LINE__));
      }
    }
  }

  // Recurse to next level
  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) 
    (*np)->expandNode(aNodeList, depth+1);
      
}

void GrammarNode::parseName(char *&value) {
  name="\0";
  while (*value!=stop_char() && !isspace(*value) && *value!='|') {
    if (*value==0) {
      cerr << "SYNTAX ERROR" << endl;
      throw(CTKError(__FILE__,__LINE__));
    }
    name+=*value++;
  }
}

void GrammarNode::print(ostream &outfile) {
  print(outfile, ' ',' ',' ');
}


void GrammarNode::countNodes(int &n) {
  ++n;
  for (list<GrammarNode *>::iterator np=nodeList.begin(); np!=nodeList.end(); ++np) 
    (*np)->countNodes(n);
}

NetworkNodePair GrammarNode::expandNetwork(Decoder &network, NetworkNodePair node_pair, bool join_to_end) {

  list<GrammarNode *>::const_iterator np;
  
  for (np=nodeList.begin(); np!=nodeList.end(); ++np) {
    node_pair=(*np)->expandNetwork(network, node_pair, join_to_end||(*np==nodeList.back()));
  }
  
  return node_pair;
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: GrammarFactorNode	               				      */
/*									      */
/******************************************************************************/

GrammarFactorNode::~GrammarFactorNode(){};

NetworkNodePair GrammarFactorNode::expandNetwork(Decoder &network, NetworkNodePair node_pair, bool join_to_end) {

  NetworkNodePair new_node_pair;
  
  // make new node pair
  if (join_to_end) {   // Inserting factor in parallel
    new_node_pair=network.insertNodePairBetween(node_pair, is_optional, is_repeatable);  // O  O  ==>  O<--o  o<--O
  } else {    // Inserting factors in series
    new_node_pair=network.insertNodePairAt(node_pair.start, is_optional, is_repeatable); // O   ==>  0<--o  o
    node_pair.start=new_node_pair.end;  // Modify node pair for next insertion in chain
  }

  GrammarNode::expandNetwork(network, new_node_pair, isAnAlternativeNode()); // Calls expandNetwork on all children  - final boolean determines whether children are added in series or parallle


  // Return node pair for next insertion 
  return (node_pair);
}

/******************************************************************************/
/*									      */
/*	CLASS NAME: GrammarLinkNode	               				      */
/*									      */
/******************************************************************************/
void GrammarLinkNode::parse(char *&value) {
  ++value;  // skip $ from name
  parseName(value);
}


/******************************************************************************/
/*									      */
/*	CLASS NAME: GrammarLeafNode	               				      */
/*									      */
/******************************************************************************/

void GrammarLeafNode::parse(char *&value) {
  parseName(value);
}

void GrammarLeafNode::setAlternative(bool){
  cerr << "Error: Cannot set alternative flag for a leaf node.\n\r";
  throw(CTKError(__FILE__,__LINE__));
}

NetworkNodePair GrammarLeafNode::expandNetwork(Decoder &network, NetworkNodePair node_pair, bool join_to_end) {

  // make new node pair
  if (join_to_end) {   // Inserting HMM in parallel
    // O  O  ==>  O<--o  o<--O
    network.addHMMByNameBetween(getname(), node_pair.start, node_pair.end);
  } else {    // Inserting HMM in series
    // O   ==>  0<--o  o
    node_pair.start=network.addHMMByNameAt(getname(), node_pair.start); 
  }
  
  // Return node pair for next insertion 
  return (node_pair);
}

/* End of ctk_parse.cpp */
