Logo Search packages:      
Sourcecode: lttoolbox version File versions  Download package

FSTProcessor.H

/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
#ifndef _FSTPROCESSOR_
#define _FSTPROCESSOR_

#include <cstdio>
#include <map>
#include <queue>
#include <set>
#include <string>

#include <lttoolbox/Alphabet.H>
#include <lttoolbox/Buffer.H>
#include <lttoolbox/Ltstr.H>
#include <lttoolbox/State.H>
#include <lttoolbox/TransExe.H>

using namespace std;
/**
 * Class that implement the FST-based modules of the system
 */
00038 class FSTProcessor
{
private:
  /**
   * Transducers in FSTP
   */
00044   map<string, TransExe, Ltstr> transducers;
  
  /**
   * Current state of lexical analysis
   */
00049   State current_state;
  
  /**
   * Initial state of every token
   */
00054   State initial_state;
  
  /**
   * Set of final states of incoditional sections in the dictionaries
   */
00059   set<Node *> inconditional;
  
  /**
   * Set of final states of standard sections in the dictionaries
   */
00064   set<Node *> standard;

  /**
   * Set of final states of postblank sections in the dictionaries
   */    
00069   set<Node *> postblank;
  
  /**
   * Merge of 'inconditional', 'standard' and 'postblank sets
   */
00074   set<Node *> all_finals;

  queue<string> blankqueue;
  set<unsigned short> alphabetic_chars;
  set<unsigned short> escaped_chars;
  Alphabet alphabet;
  Buffer<unsigned short> input_buffer; // 4 Kb
  Node root;
  bool outOfWord;
  
  void streamError();
  unsigned short readEscaped(FILE *input);
  string readFullBlock(FILE *input, char const delim1, char const delim2);
  bool isAlphabetic(unsigned short const c) const;
  bool isEscaped(unsigned short const c) const;

  unsigned short readAnalysis(FILE *input);
  unsigned short readGeneration(FILE *input, FILE *output);
  unsigned short readSAO(FILE *input);
  
  void flushBlanks(FILE *output);
  void calcInitial();
  void classifyFinals();
  void writeEscaped(string const &str, FILE *output);
  static bool endsWith(string const &a, string const &b);
  void printWord(string const &sf, string const &lf, FILE *output);
  void printSAOWord(string const &lf, FILE *output);
  void printUnknownWord(string const &sf, FILE *output);
  unsigned int lastBlank(string const &str);
  void printSpace(unsigned short const val, FILE *output);
  void skipUntil(FILE *input, FILE *output, int const character);

public:
  FSTProcessor();
  ~FSTProcessor();

  void initAnalysis();
  void initSAO(){initAnalysis();};
  void initGeneration();
  void initPostgeneration();
  void initBiltrans();
  
  void analysis(FILE *input = stdin, FILE *output = stdout);
  void generation(FILE *input = stdin, FILE *output = stdout, bool unknown_words = true);
  void postgeneration(FILE *input = stdin, FILE *output = stdout); 
  string biltrans(string const &input_word, bool with_delim = true);
  void SAO(FILE *input = stdin, FILE *output = stdout);  
  
  void load(FILE *input);

  bool valid() const;
};


#endif

Generated by  Doxygen 1.6.0   Back to index