Logo Search packages:      
Sourcecode: lttoolbox version File versions  Download package

regexp_compiler.cc

/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
#include <lttoolbox/regexp_compiler.h>

#include <cstdlib>
#include <iostream>

00024 RegexpCompiler::RegexpCompiler()
{
}

00028 RegexpCompiler::~RegexpCompiler()
{
  destroy();
}

00033 RegexpCompiler::RegexpCompiler(RegexpCompiler const &rec)
{
  copy(rec);
}

RegexpCompiler &
00039 RegexpCompiler::operator =(RegexpCompiler const &rec)
{
  if(this != &rec)
  {
    destroy();
    copy(rec);
  }

  return *this;
}

void
00051 RegexpCompiler::copy(RegexpCompiler const &rec)
{
  token = rec.token;
  input = rec.input;
  transducer = rec.transducer;
  letter = rec.letter;
  alphabet = rec.alphabet;
  state = rec.state;
  letter = rec.letter;
  postop = rec.postop;
}

void
00064 RegexpCompiler::destroy()
{
}

bool 
00069 RegexpCompiler::isReserved(int const t)
{
  switch(t) 
  {
    case L'(':
    case L')':
    case L'[':
    case L']':
    case L'*':
    case L'?':
    case L'+':
    case L'-':
    case L'^':
    case L'\\':
    case L'|':
    case FIN_FICHERO:
      return true;
 
    default:
      return false;
  }
}

void
00093 RegexpCompiler::error()
{
  wcerr << L"Error parsing regexp" <<endl;
  exit(EXIT_FAILURE);
}

void 
00100 RegexpCompiler::errorConsuming(int const t)
{
  wcerr << L"Error parsing regexp" << endl;
  exit(EXIT_FAILURE);
}

void 
00107 RegexpCompiler::consume(int const t)
{
  if(token == t)
  {
    input = input.substr(1);
    if(input ==  L"")
    {
      token = FIN_FICHERO;
    }
    else
    {
      token = input[0];
    }
  }
  else
  {
    errorConsuming(t);
  }
}

void 
00128 RegexpCompiler::compile(wstring const &er)
{
  input = er;
  token = static_cast<int>(input[0]);
  state = transducer.getInitial();
  S();
  transducer.setFinal(state);
}

void 
00138 RegexpCompiler::S()
{
  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
  {
    RExpr();
    Cola();
  }
  else
  {
    error();
  }
}

void 
00152 RegexpCompiler::RExpr()
{
  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
  {
    Term();
    RExprp();
  }
  else
  {
    error();
  }
}

void 
00166 RegexpCompiler::Cola()
{
  if(token == FIN_FICHERO || token == L')')
  {
  }
  else if(token == L'|')
  {
    int e = state;
    state = transducer.getInitial();
    consume(L'|');
    RExpr();
    Cola();
   
    state = transducer.insertNewSingleTransduction((*alphabet)(0, 0), state);
    transducer.linkStates(e, state, (*alphabet)(0, 0));
  }
  else
  {
    error();
  }
}

void 
00189 RegexpCompiler::Term()
{
  if(!isReserved(token) || token == L'\\')
  {
    Transducer t;
    int e = t.getInitial();
    Letra();
    e = t.insertNewSingleTransduction((*alphabet)(letter, letter), e);
    t.setFinal(e);
    Postop();
    if(postop == L"*")
    {
      t.zeroOrMore((*alphabet)(0, 0));
    }
    else if(postop == L"+")
    {
      t.oneOrMore((*alphabet)(0, 0));
    }
    else if(postop == L"?")
    {
      t.optional((*alphabet)(0, 0));
    }

    postop = L"";
    state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
  }
  else if(token == L'(')
  {
    Transducer t = transducer;
    int e = state;
    transducer.clear();
    state = transducer.getInitial();
    consume(L'(');
    S();
    consume(L')');
    transducer.setFinal(state);
    Postop();
    if(postop == L"*")
    {
      transducer.zeroOrMore((*alphabet)(0, 0));
    }
    else if(postop == L"+")
    {
      transducer.oneOrMore((*alphabet)(0, 0));
    }
    else if(postop == L"?")
    {
      transducer.optional((*alphabet)(0, 0));
    }

    postop = L"";
    state = t.insertTransducer(e, transducer, (*alphabet)(0, 0));
    transducer = t;
  }
  else if(token == L'[')
  {
    consume(L'[');
    Esp();
  }
  else
  {
    error();
  }
}

void 
00255 RegexpCompiler::RExprp()
{
  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
  {
    Term();
    RExprp();
  }
  else if(token == L'|' || token == FIN_FICHERO || token == L')')
  {
  }
  else
  {
    error();
  }
}

void
00272 RegexpCompiler::Letra()
{
  if(!isReserved(token))
  {
    letter = token;
    consume(token);
  }
  else if(token == L'\\')
  {
    consume(L'\\');
    letter = token;
    Reservado();
  }
  else
  {
    error();
  }
}

void
00292 RegexpCompiler::Postop()
{
  if(token == L'*')
  {
    consume(L'*');
    postop = L"*";
  }
  else if(token == L'?')
  {
    consume(L'?');
    postop = L"?";
  }
  else if(token == L'+')
  {
    consume(L'+');
    postop = L"+";
  }
  else if(token == L'(' || token == L'[' || !isReserved(token) || 
          token == L'\\' || token == L'|' ||  token == FIN_FICHERO || 
        token == L')')
  {
  }
  else
  {
    error();
  }
}

void
00321 RegexpCompiler::Esp()
{
  Transducer t;
  if(!isReserved(token) || token == L'\\' || token == L']')
  {
    Lista();
    consume(L']');
    Postop();

    for(set<int>::iterator it = brackets.begin(); 
        it != brackets.end(); it++)
    {
      int mystate = t.getInitial();
      mystate = t.insertNewSingleTransduction((*alphabet)(0, 0), mystate);
      mystate = t.insertNewSingleTransduction((*alphabet)(*it, *it), mystate);
      t.setFinal(mystate);
    }

    t.joinFinals((*alphabet)(0, 0));
  }
  else if(token == L'^')
  {
    consume(L'^');
    Lista();
    consume(L']');
    Postop();
   
    for(int i = 0; i < 256 ;i++)
    {
      if(brackets.find(i) == brackets.end())
      {
        int mystate = t.getInitial();
        mystate = t.insertNewSingleTransduction((*alphabet)(0, 0), mystate);
        mystate = t.insertNewSingleTransduction((*alphabet)(i, i), mystate);
      t.setFinal(mystate);
      }
    }
    
    t.joinFinals((*alphabet)(0, 0));
  }
  else
  {
    error();
  }

  if(postop == L"+")
  {
    t.oneOrMore((*alphabet)(0, 0));     
  }
  else if(postop == L"*")
  {
    t.zeroOrMore((*alphabet)(0, 0));
  }
  else if(postop == L"?")
  {
    t.optional((*alphabet)(0, 0));
  }
  brackets.clear();
  postop = L"";

  state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
}

void 
00385 RegexpCompiler::Lista()
{
  if(!isReserved(token) || token == L'\\')
  {
    Elem();
    Lista();
  }
  else if(token == L']')
  {
  }
  else
  {
    error();
  }
}

void 
00402 RegexpCompiler::Reservado()
{
  if(isReserved(token))
  {
    consume(token);
  }
  else
  {
    error();
  }
}

void 
00415 RegexpCompiler::Elem()
{
  if(!isReserved(token) || token == L'\\')
  {
    Letra();
    int rango1 = letter;
    ColaLetra();
    int rango2 = letter;

    if(rango1 > rango2)
    {
      error();
    }
    else
    {
      for(int i = rango1; i <= rango2; i++)
      {
        brackets.insert(i);
      }
    }
  }
  else
  {
    error();
  }
}

void
00443 RegexpCompiler::ColaLetra()
{
  if(token == L'-')
  {
    consume(L'-');
    Letra();
  }
  else if(!isReserved(token) || token == L'\\' || token == L']')
  {
  }
  else
  {
    error();
  }
}

void
00460 RegexpCompiler::setAlphabet(Alphabet *a)
{
  alphabet = a;
}

Transducer &
00466 RegexpCompiler::getTransducer()
{
  return transducer;
}

void
00472 RegexpCompiler::initialize(Alphabet *a)
{
  setAlphabet(a);
  transducer.clear();
  brackets.clear();
  postop = L"";
}

Generated by  Doxygen 1.6.0   Back to index