#-------------------------------------------------------------------------
#ParserGen.py -- The parser generation routines.
#Compiler Generator Coco/R,
#Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
#extended by M. Loeberbauer & A. Woess, Univ. of Linz
#ported from Java to Python by Ronald Longo
#
#This program is free software; you can redistribute it and/or modify it
#under the terms of the GNU General Public License as published by the
#Free Software Foundation; either version 2, or (at your option) any
#later version.
#
#This program is distributed in the hope that it will be useful, but
#WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
#or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
#for more details.
#
#You should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation, Inc.,
#59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
#As an exception, it is allowed to write an extension of Coco/R that is
#used as a plugin in non-free software.
#
#If not otherwise stated, any source code generated by Coco/R (other than
#Coco/R itself) does not fall under the GNU General Public License.
#-------------------------------------------------------------------------*/
import copy
import os
import os.path
import StringIO

from Errors import Errors
from Trace import Trace
from Core import Node
from Core import DFA
from Core import Symbol
from Core import Tab
from CodeGenerator import CodeGenerator


class MyLoopBreak( Exception ):
   pass


class ParserGen( object ):
   maxTerm =    3    # sets of size < maxTerm are enumerated
   ls      = "\n"

   tErr    =    0    # error codes
   altErr  =    1
   syncErr =    2

   usingPos =  None  # usingPos: Position   "using" definitions from the attributed grammar

   errorNr   = 0      # highest parser error number
   curSy     = None   # symbol whose production is currently generated
   err       = None   # generated parser error messages
   srcName   = ''     # name of attributed grammar file
   srcDir    = ''     # directory of attributed grammar file
   symSet    = [ ]

   codeGen   = CodeGenerator( )
   
   @staticmethod
   def Overlaps( s1, s2 ):
      assert isinstance( s1, set )
      assert isinstance( s2, set )
      ln = len(s1)
      for i in xrange( 0, ln ):
         if (i in s1) and (i in s2):
            return True
      return False

   @staticmethod
   def GenErrorMsg( errTyp, sym ):
      assert isinstance( errTyp, int )
      assert isinstance( sym, Symbol )
      ParserGen.errorNr += 1
      ParserGen.err.write( ParserGen.ls + '      ' + str(ParserGen.errorNr) + ' : "' )
      if errTyp == ParserGen.tErr:
         if sym.name[0] == '"':
            ParserGen.err.write( str(DFA.Escape( sym.name )) + ' expected' )
         else:
            ParserGen.err.write( str(sym.name) + ' expected' )
      elif errTyp == ParserGen.altErr:
         ParserGen.err.write( 'invalid ' + str(sym.name) )
      elif errTyp == ParserGen.syncErr:
         ParserGen.err.write( 'this symbol not expected in ' + str(sym.name) )
      ParserGen.err.write('",' )

   @staticmethod
   def NewCondSet( s ):
      assert isinstance( s, set )
      for i in xrange( 1, len(ParserGen.symSet) ):
        # skip symSet[0] (reserved for union of SYNC sets)
        if s == ParserGen.symSet[i]: #s.equals( ParserGen.symSet[i] ):
           return i
      ParserGen.symSet.append( copy.copy(s) ) 
      return len(ParserGen.symSet) - 1

   @staticmethod
   def GenCond( s, p ):
      assert isinstance( s, set  )
      assert isinstance( p, Node )
      if p.typ == Node.rslv:
         ParserGen.codeGen.CopySourcePart( p.pos, 0 )
      else:
         n = len(s)
         if n == 0:
            ParserGen.codeGen.write( 'False' ) # should never happen
         elif n <= ParserGen.maxTerm:
            for i in xrange( 0, len(Symbol.terminals) ):
               sym = Symbol.terminals[i]
               assert isinstance( sym, Symbol )
               if sym.n in s:
                  ParserGen.codeGen.write( 'self.la.kind == ')
                  ParserGen.PrintTermName( sym )
                  n -= 1
                  if n > 0:
                     ParserGen.codeGen.write( ' or ' )
         else:
            ParserGen.codeGen.write( 'self.StartOf(' + str(ParserGen.NewCondSet( s )) + ')' )

   @staticmethod
   def GenCode( p, indent, isChecked ):
      #assert isinstance( p, Node )
      assert isinstance( indent, int )
      assert isinstance( isChecked, set )
      while p is not None:
         if p.typ == Node.nt:       # Non-Terminals
            ParserGen.codeGen.Indent( indent )
            if p.retVar is not None:
               ParserGen.codeGen.write( p.retVar + ' = ' )
            ParserGen.codeGen.write( 'self.' + p.sym.name + '(' )
            ParserGen.codeGen.CopySourcePart( p.pos, 0 )
            ParserGen.codeGen.write( ')\n' )
         elif p.typ == Node.t:      # Terminals
            ParserGen.codeGen.Indent( indent )
            if p.sym.n in isChecked:
               ParserGen.codeGen.write( 'self.Get( )\n' )
            else:
               ParserGen.codeGen.write( 'self.Expect(' )
               ParserGen.PrintTermName( p.sym )
               ParserGen.codeGen.write( ')\n' )
         elif p.typ == Node.wt:
            ParserGen.codeGen.Indent( indent )
            s1 = Tab.Expected( p.next, ParserGen.curSy )
            s1 |= Tab.allSyncSets
            ParserGen.codeGen.write( 'self.ExpectWeak(' )
            ParserGen.PrintTermName( p.sym )
            ParserGen.codeGen.write( ', ' + str(ParserGen.NewCondSet( s1 )) + ')\n' )
         elif p.typ == Node.any:
            ParserGen.codeGen.Indent( indent )
            ParserGen.codeGen.write( 'self.Get()\n' )
         elif p.typ == Node.eps:
            ParserGen.codeGen.Indent( indent )
            ParserGen.codeGen.write( 'pass\n' )
         elif p.typ == Node.rslv:
            #ParserGen.codeGen.Indent( indent )
            #ParserGen.codeGen.write( 'pass\n' )
            pass   # Nothing to do
         elif p.typ == Node.sem:
            ParserGen.codeGen.CopySourcePart( p.pos, indent )
         elif p.typ == Node.sync:
            ParserGen.codeGen.Indent( indent)
            ParserGen.GenErrorMsg( ParserGen.syncErr, ParserGen.curSy )
            s1 = copy.copy(p.set)
            ParserGen.codeGen.write( 'while not (' )
            ParserGen.GenCond( s1,p )
            ParserGen.codeGen.write( '):\n' )
            ParserGen.codeGen.Indent( indent+1 )
            ParserGen.codeGen.write( 'self.SynErr(' + str(ParserGen.errorNr) + ')\n' )
            ParserGen.codeGen.Indent( indent+1 )
            ParserGen.codeGen.write( 'self.Get()\n' )
         elif p.typ == Node.alt:
            s1 = Tab.First( p )
            p2 = p
            equal = (s1 == isChecked)
            while p2 is not None:
               s1 = Tab.Expected( p2.sub, ParserGen.curSy )
               ParserGen.codeGen.Indent( indent )
               if p2 == p:
                  ParserGen.codeGen.write( 'if ' )
                  ParserGen.GenCond( s1, p2.sub )
                  ParserGen.codeGen.write( ':\n' )
               elif p2.down is None and equal:
                  ParserGen.codeGen.write( 'else:\n' )
               else:
                  ParserGen.codeGen.write( 'elif ' )
                  ParserGen.GenCond( s1, p2.sub )
                  ParserGen.codeGen.write( ':\n' )
               s1 |= isChecked
               ParserGen.GenCode( p2.sub, indent+1, s1 )
               p2 = p2.down
            if not equal:
               ParserGen.codeGen.Indent( indent )
               ParserGen.GenErrorMsg( ParserGen.altErr, ParserGen.curSy )
               ParserGen.codeGen.write( 'else:\n' )
               ParserGen.codeGen.Indent( indent+1 )
               ParserGen.codeGen.write( 'self.SynErr(' + str(ParserGen.errorNr) + ')\n' )
         elif p.typ == Node.iter:
            ParserGen.codeGen.Indent( indent )
            p2 = p.sub
            ParserGen.codeGen.write( 'while ' )
            if p2.typ == Node.wt:
               s1 = Tab.Expected( p2.next, ParserGen.curSy )
               s2 = Tab.Expected( p.next, ParserGen.curSy )
               ParserGen.codeGen.write( 'self.WeakSeparator(' )
               ParserGen.PrintTermName( p2.sym )
               ParserGen.codeGen.write( ', ' + str(ParserGen.NewCondSet(s1)) + ', ' + str(ParserGen.NewCondSet(s2)) + ')' )
               s1 = set( )
               if p2.up or p2.next is None:
                  p2 = None
               else:
                  p2 = p2.next
            else:
               s1 = Tab.First( p2 )
               ParserGen.GenCond( s1, p2 )
            ParserGen.codeGen.write( ':\n' )
            ParserGen.GenCode( p2,indent+1, s1 )
            ParserGen.codeGen.write( '\n' )
         elif p.typ == Node.opt:
            s1 = Tab.First( p.sub )
            ParserGen.codeGen.Indent( indent )
            ParserGen.codeGen.write( 'if (' )
            ParserGen.GenCond( s1, p.sub )
            ParserGen.codeGen.write( '):\n' )
            ParserGen.GenCode( p.sub, indent+1, s1 )
         
         if p.typ != Node.eps and p.typ != Node.sem and p.typ != Node.sync:
            for val in xrange( 0, len(isChecked) ):
               isChecked.discard( val )
         
         if p.up:
            break
         
         p = p.next

   @staticmethod
   def GenTokens( withNames ):
      assert isinstance( withNames, bool )
      for sym in Symbol.terminals:
         if sym.name[0].isalpha( ):
            ParserGen.codeGen.write( '   _' + sym.name + ' = ' + str(sym.n) + '\n' )
      if withNames:
         ParserGen.codeGen.write( '   # terminals\n')
         for sym in Symbol.terminals:
            ParserGen.codeGen.write( '   ' + sym.symName + ' = ' + str(sym.n) + '\n' )
         ParserGen.codeGen.write( '   # pragmas\n' )
         for sym in Symbol.pragmas:
            ParserGen.codeGen.write( '   ' + sym.symName + ' = ' + str(sym.n) + '\n' )
         ParserGen.codeGen.write( '\n' )

   @staticmethod
   def GenPragmas( ):
      for sym in Symbol.pragmas:
         ParserGen.codeGen.write( '   _' + str(sym.name) + ' = ' + str(sym.n) + '\n' )

   @staticmethod
   def GenCodePragmas( ):
      for sym in Symbol.pragmas:
         ParserGen.codeGen.write( 'if self.la.kind == ' )
         ParserGen.PrintTermName( sym )
         ParserGen.codeGen.write( ':\n' )
         ParserGen.codeGen.CopySourcePart( sym.semPos, 4, True )

   @staticmethod
   def GenProductions( ):
      for sym in Symbol.nonterminals:
         ParserGen.curSy = sym
         
         # Generate the function header
         ParserGen.codeGen.write( '   def ' + sym.name + '( self' )
         if sym.attrPos is not None:
            ParserGen.codeGen.write( ', ' )
         ParserGen.codeGen.CopySourcePart( sym.attrPos, 0 )
         ParserGen.codeGen.write( ' ):\n' )
         
         # Generate the function body
         ParserGen.codeGen.CopySourcePart( sym.semPos, 2 )
         ParserGen.GenCode( sym.graph, 2, set( ) )
         
         # Generate the function close
         if sym.retVar is not None:
            ParserGen.codeGen.write( '      return ' + sym.retVar + '\n' )
         ParserGen.codeGen.write( '\n' )

   @staticmethod
   def InitSets( ):
      for i in xrange(0,len(ParserGen.symSet)):
         s = ParserGen.symSet[i]
         ParserGen.codeGen.write( '      [' )
         j = 0
         for sym in Symbol.terminals:
            if sym.n in s:
               ParserGen.codeGen.write( 'T,' )
            else:
               ParserGen.codeGen.write( 'x,' )
            j += 1
            if j%4 == 0:
               ParserGen.codeGen.write( ' ' )
         if i == (len(ParserGen.symSet) - 1):
            ParserGen.codeGen.write( 'x]\n' )
         else:
            ParserGen.codeGen.write( 'x],\n' )

   @staticmethod
   def Init( fn, dir ):
      assert isinstance( fn, str )
      assert isinstance( dir, str )
      ParserGen.srcName = fn
      ParserGen.srcDir  = dir
      ParserGen.errorNr = -1
      ParserGen.usingPos = None

   @staticmethod
   def WriteParser( withNames ):
      assert isinstance( withNames, bool )
      assert isinstance( Tab.allSyncSets, set )
      ParserGen.symSet.append( Tab.allSyncSets )
      
      ParserGen.codeGen.openFiles( 'Parser.frame', ParserGen.srcName,
            'Parser.py', True )
      
      if withNames: 
         Tab.AssignNames( ) 
      
      ParserGen.err = StringIO.StringIO( )
      for sym in Symbol.terminals:
         ParserGen.GenErrorMsg( ParserGen.tErr, sym )
      
      ParserGen.codeGen.CopyFramePart( '-->begin' )
      if ParserGen.usingPos != None:
         ParserGen.codeGen.write( '\n' )
         ParserGen.codeGen.CopySourcePart( ParserGen.usingPos, 0 )
      ParserGen.codeGen.CopyFramePart( '-->constants' )
      ParserGen.GenTokens( withNames )
      ParserGen.codeGen.write( '   maxT = ' + str(len(Symbol.terminals) - 1) + '\n')
      ParserGen.GenPragmas( )
      ParserGen.codeGen.CopyFramePart( '-->declarations' )
      ParserGen.codeGen.CopySourcePart( Tab.semDeclPos, 0 )
      ParserGen.codeGen.CopyFramePart( '-->pragmas' )
      ParserGen.GenCodePragmas( )
      ParserGen.codeGen.CopyFramePart( '-->productions' )
      ParserGen.GenProductions( )
      ParserGen.codeGen.CopyFramePart( '-->parseRoot' )
      ParserGen.codeGen.write( Tab.gramSy.name + '()\n' )
      ParserGen.codeGen.write( '      self.Expect(' )
      ParserGen.PrintTermName( Tab.eofSy )
      ParserGen.codeGen.write( ')\n' )
      ParserGen.codeGen.CopyFramePart( '-->initialization' )
      ParserGen.InitSets( )
      ParserGen.codeGen.CopyFramePart( '-->errors' )
      ParserGen.codeGen.write( str(ParserGen.err.getvalue( )) )
      ParserGen.codeGen.CopyFramePart( '$$$' )
      ParserGen.codeGen.close( )

   @staticmethod
   def WriteStatistics( ):
      Trace.WriteLine( )
      Trace.WriteLine( 'Statistics:' )
      Trace.WriteLine( '-----------' )
      Trace.WriteLine( )
      Trace.WriteLine( str(len( Symbol.terminals )) + ' terminals' )
      Trace.WriteLine( str(len( Symbol.terminals ) + len( Symbol.pragmas ) + len( Symbol.nonterminals )) + ' symbols' )
      Trace.WriteLine( str(len(Node.nodes)) + ' nodes' )
      Trace.WriteLine( str(len(ParserGen.symSet)) + ' sets' )
      Trace.WriteLine( )

   @staticmethod
   def PrintTermName( sym ):
      assert isinstance( sym, Symbol )
      assert isinstance( sym.symName, (str,unicode) ) or (sym.symName is None)
      if sym.symName is None:
         ParserGen.codeGen.write( str(sym.n) )
      else:
         ParserGen.codeGen.write( 'Scanner.' )
         ParserGen.codeGen.write( str(sym.symName) )
