Logo Search packages:      
Sourcecode: lttoolbox version File versions  Download package

tmx_compiler.h

/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
#ifndef _TMXCOMPILER_
#define _TMXCOMPILER_

#include <lttoolbox/alphabet.h>
#include <lttoolbox/regexp_compiler.h>
#include <lttoolbox/entry_token.h>
#include <lttoolbox/ltstr.h>
#include <lttoolbox/transducer.h>

#include <map>
#include <string>
#include <set>
#include <libxml/xmlreader.h>
#include <iostream>

using namespace std;

/**
 * A compiler of dictionaries to letter transducers
 */
00039 class TMXCompiler
{
private:
  /**
   * The libxml2's XML reader
   */
00045   xmlTextReaderPtr reader;
  
  /**
   * Identifier of all the symbols during the compilation
   */
00050   Alphabet alphabet;

  /**
   * Main transducer representing the TM
   */  
00055   Transducer transducer;  

  /**
   * Origin language
   */
00060   wstring origin_language;

  /**
   * Meta language
   */
00065   wstring meta_language;
  
  /**
   * Method to parse an XML Node
   */
  void procNode();

  /**
   * Parse the &lt;tu&gt; element
   */
  void procTU();

  /**
   * Insert a tu into the transducer
   * @param origin left part
   * @param meta right part
   */
  void insertTU(vector<int> const &origin, vector<int> const &meta);

  /**
   * Gets an attribute value with their name and the current context
   * @param name the name of the attribute
   * @return the value of the attribute
   */
  wstring attrib(wstring const &name);

  /**
   * Skip all document #text nodes before "elem"
   * @param name the name of the node
   * @param elem the name of the expected node
   */
  void skip(wstring &name, wstring const &elem);

  /**
   * Skip all blank #text nodes before "name"
   * @param name the name of the node
   */
  void skipBlanks(wstring &name);
  
  /**
   * Force an element to be empty, and check for it
   * @param name the element 
   */
  void requireEmptyError(wstring const &name);

  /**
   * Force an attribute to be specified, amd check for it
   * @param value the value of the attribute
   * @param attrname the name of the attribute
   * @param elemname the parent of the attribute
   */
  void requireAttribute(wstring const &value, wstring const &attrname,
                        wstring const &elemname);

  /**
   * True if all the elements in the current node are blanks
   * @return true if all are blanks
   */
  bool allBlanks();

  wstring getTag(size_t const &val) const;
  void trim(vector<int> &v) const;
  void align(vector<int> &origin, vector<int> &meta);
  unsigned int numberLength(vector<int> &v, unsigned int const position) const;
  bool vectorcmp(vector<int> const &orig, unsigned int const begin_orig,
                       vector<int> const &meta, unsigned int const begin_meta,
                       unsigned const int length) const;
  void split(vector<int> const &v, vector<vector<int> > &sv, int const symbol) const;
  void align_blanks(vector<int> &o, vector<int> &m);
  vector<int> join(vector<vector<int> > const &v, int const s) const;

  static void printvector(vector<int> const &v, wostream &wos = std::wcout);  //eliminar este m├ętodo
  
public:

  /*
   * Constants to represent the element and the attributes of
   * translation memories in TMX format
   */
  static wstring const TMX_COMPILER_TMX_ELEM;
  static wstring const TMX_COMPILER_HEADER_ELEM;
  static wstring const TMX_COMPILER_BODY_ELEM;
  static wstring const TMX_COMPILER_TU_ELEM;
  static wstring const TMX_COMPILER_TUV_ELEM;
  static wstring const TMX_COMPILER_HI_ELEM;
  static wstring const TMX_COMPILER_PH_ELEM;
  static wstring const TMX_COMPILER_XMLLANG_ATTR;
  static wstring const TMX_COMPILER_SEG_ELEM;


  /**
   * Constructor
   */
  TMXCompiler();

  /**
   * Destructor
   */
  ~TMXCompiler();

  /**
   * Compile dictionary to letter transducers
   */
  void parse(string const &fichero, wstring const &lo, wstring const &lm);
  
  /**
   * Write the result of compilation 
   * @param fd the stream where write the result
   */
  void write(FILE *fd);
};


#endif

Generated by  Doxygen 1.6.0   Back to index