GME  13
AParser.h
Go to the documentation of this file.
00001 /* ANTLRParser.h
00002  *
00003  * Define the generic ANTLRParser superclass, which is subclassed to
00004  * define an actual parser.
00005  *
00006  * Before entry into this file: ANTLRTokenType must be set.
00007  *
00008  * SOFTWARE RIGHTS
00009  *
00010  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
00011  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
00012  * company may do whatever they wish with source code distributed with
00013  * PCCTS or the code generated by PCCTS, including the incorporation of
00014  * PCCTS, or its output, into commerical software.
00015  *
00016  * We encourage users to develop software with PCCTS.  However, we do ask
00017  * that credit is given to us for developing PCCTS.  By "credit",
00018  * we mean that if you incorporate our source code into one of your
00019  * programs (commercial product, research project, or otherwise) that you
00020  * acknowledge this fact somewhere in the documentation, research report,
00021  * etc...  If you like PCCTS and have developed a nice tool with the
00022  * output, please mention that you developed it using PCCTS.  In
00023  * addition, we ask that this header remain intact in our source code.
00024  * As long as these guidelines are kept, we expect to continue enhancing
00025  * this system and expect to make other tools available as they are
00026  * completed.
00027  *
00028  * ANTLR 1.33
00029  * Terence Parr
00030  * Parr Research Corporation
00031  * with Purdue University and AHPCRC, University of Minnesota
00032  * 1989-1998
00033  */
00034 
00035 #ifndef APARSER_H_GATE
00036 #define APARSER_H_GATE
00037 
00038 #include <stdio.h>
00039 #include <setjmp.h>
00040 #include "config.h"
00041 #include ATOKEN_H
00042 #include ATOKENBUFFER_H
00043 
00044 #ifdef ZZCAN_GUESS
00045 #ifndef ZZINF_LOOK
00046 #define ZZINF_LOOK
00047 #endif
00048 #endif
00049 
00050 
00051 #define NLA                     (token_type[lap&(LLk-1)])/* --> next LA */
00052 
00053 typedef unsigned char SetWordType;
00054 
00055 /* Define external bit set stuff (for SetWordType) */
00056 #define EXT_WORDSIZE    (sizeof(char)*8)
00057 #define EXT_LOGWORDSIZE 3
00058 
00059            /* s y n t a c t i c  p r e d i c a t e  s t u f f */
00060 
00061 #ifndef zzUSER_GUESS_HOOK
00062 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv)
00063 #endif
00064 
00065 #ifndef zzUSER_GUESS_DONE_HOOK
00066 #define zzUSER_GUESS_DONE_HOOK(seqFrozen)
00067 #endif
00068 
00069 typedef struct _zzjmp_buf {
00070                         jmp_buf state;
00071                 } zzjmp_buf;
00072 
00073 /* these need to be macros not member functions */
00074 #define zzGUESS_BLOCK           ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen;
00075 #define zzNON_GUESS_MODE        if ( !guessing )
00076 #define zzGUESS_FAIL            guess_fail();
00077 #define zzGUESS_DONE            {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) }
00078 #define zzGUESS                         saveState(&zzst); \
00079                                                         guessing = 1; \
00080                             zzGuessSeqFrozen = ++zzGuessSeq; \
00081                                                         _marker = inputTokens->mark(); \
00082                                                         zzrv = setjmp(guess_start.state); \
00083                             zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \
00084                                                     if ( zzrv ) zzGUESS_DONE
00085 
00086 #ifndef zzTRACE_RULES
00087 #define zzTRACEdata
00088 #else
00089 #ifndef zzTRACEdata
00090 #define zzTRACEdata     const ANTLRChar *zzTracePrevRuleName;
00091 #endif
00092 #endif
00093 
00094 #ifndef zzTRACEIN
00095 #define zzTRACEIN(r)    zzTracePrevRuleName=traceCurrentRuleName;tracein(r);
00096 #endif
00097 #ifndef zzTRACEOUT
00098 #define zzTRACEOUT(r)   traceout(r);traceCurrentRuleName=zzTracePrevRuleName;
00099 #endif
00100 
00101                   /* a n t l r  p a r s e r  d e f */
00102 
00103 struct ANTLRParserState {
00104         /* class variables */
00105         zzjmp_buf guess_start;
00106         int guessing;
00107 
00108         int inf_labase;
00109         int inf_last;
00110 
00111         int dirty;
00112 
00113     int             traceOptionValue;       // MR10
00114     int             traceGuessOptionValue;  // MR10
00115     const ANTLRChar *traceCurrentRuleName;  // MR10
00116     int             traceDepth;             // MR10
00117 
00118 };
00119 
00120 /* notes:
00121  *
00122  * multiple inheritance is a cool way to include what stuff is needed
00123  * in this structure (like guess stuff).  however, i'm not convinced that
00124  * multiple inheritance works correctly on all platforms.  not that
00125  * much space is used--just include all possibly useful members.
00126  *
00127  * the class should also be a template with arguments for the lookahead
00128  * depth and so on.  that way, more than one parser can be defined (as
00129  * each will probably have different lookahead requirements).  however,
00130  * am i sure that templates work?  no, i'm not sure.
00131  *
00132  * no attributes are maintained and, hence, the 'asp' variable is not
00133  * needed.  $i can still be referenced, but it refers to the token
00134  * associated with that rule element.  question: where are the token's
00135  * stored if not on the software stack?  in local variables created
00136  * and assigned to by antlr.
00137  */
00138 class ANTLRParser {
00139 protected:
00140         /* class variables */
00141         static SetWordType bitmask[sizeof(SetWordType)*8];
00142         static char eMsgBuffer[500];
00143 
00144 protected:
00145         int LLk;                                        // number of lookahead symbols (old LL_K)
00146         int demand_look;
00147         ANTLRTokenType eofToken;                        // when do I stop during resynch()s
00148         int bsetsize;                                   // size of bitsets created by ANTLR in
00149                                                                         // units of SetWordType
00150 
00151         ANTLRTokenBuffer *inputTokens;  //place to get input tokens
00152 
00153         zzjmp_buf guess_start;          // where to jump back to upon failure
00154         int guessing;                           // if guessing (using (...)? predicate)
00155 
00156         // infinite lookahead stuff
00157         int can_use_inf_look;           // set by subclass (generated by ANTLR)
00158         int inf_lap;
00159         int inf_labase;
00160         int inf_last;
00161         int *_inf_line;
00162 
00163         ANTLRChar **token_tbl;          // pointer to table of token type strings
00164 
00165         int dirty;                                      // used during demand lookahead
00166 
00167         ANTLRTokenType *token_type;             // fast reference cache of token.getType()
00168 //      ANTLRLightweightToken **token;  // the token with all its attributes
00169         int lap;
00170         int labase;
00171 
00172 private:
00173         void fill_inf_look();
00174 
00175 protected:
00176         virtual void guess_fail() {                         // MR9 27-Sep-97 make virtual
00177         traceGuessFail();                               // MR10
00178         longjmp(guess_start.state, 1); }                // MR9
00179         virtual void guess_done(ANTLRParserState *st) {     // MR9 27-Sep-97 make virtual
00180          restoreState(st); }                            // MR9
00181         virtual int guess(ANTLRParserState *);              // MR9 27-Sep-97 make virtual
00182         void look(int);
00183     int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *,
00184                            _ANTLRTokenPtr *, SetWordType **);
00185     int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *,
00186                            _ANTLRTokenPtr *, SetWordType **);
00187     int _match_wsig(ANTLRTokenType);
00188     int _setmatch_wsig(SetWordType *);
00189     virtual void consume();
00190     void resynch(SetWordType *wd,SetWordType mask);
00191         void prime_lookahead();
00192         virtual void tracein(const ANTLRChar *r);              // MR10
00193         virtual void traceout(const ANTLRChar *r);             // MR10
00194         static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);}      // x % EXT_WORDSIZE // MR9
00195         static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;}      // x / EXT_WORDSIZE // MR9
00196         int set_deg(SetWordType *);
00197         int set_el(ANTLRTokenType, SetWordType *);
00198         virtual void edecode(SetWordType *);                            // MR1
00199         virtual void FAIL(int k, ...);                                      // MR1
00200     int                 traceOptionValue;                           // MR10
00201     int                 traceGuessOptionValue;                      // MR10
00202     const ANTLRChar     *traceCurrentRuleName;                      // MR10
00203     int                 traceDepth;                                 // MR10
00204     void                traceReset();                               // MR10
00205     virtual void        traceGuessFail();                           // MR10
00206     virtual void        traceGuessDone(const ANTLRParserState *);   // MR10
00207     int                 zzGuessSeq;                                 // MR10
00208 
00209 public:
00210         ANTLRParser(ANTLRTokenBuffer *,
00211                                 int k=1,
00212                                 int use_inf_look=0,
00213                                 int demand_look=0,
00214                                 int bsetsize=1);
00215         virtual ~ANTLRParser();
00216 
00217         virtual void init();
00218         
00219         ANTLRTokenType LA(int i)
00220         {
00221                 return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
00222                                                         token_type[(lap+(i)-1)&(LLk-1)];
00223         }
00224         _ANTLRTokenPtr LT(int i);
00225 
00226         void setEofToken(ANTLRTokenType t)      { eofToken = t; }
00227 
00228         void noGarbageCollectTokens()   { inputTokens->noGarbageCollectTokens(); }
00229         void garbageCollectTokens()             { inputTokens->garbageCollectTokens(); }
00230 
00231     virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup,
00232                                          SetWordType *eset, ANTLRTokenType etok, int k);
00233         virtual void saveState(ANTLRParserState *);     // MR9 27-Sep-97 make virtual
00234         virtual void restoreState(ANTLRParserState *);  // MR9 27-Sep-97 make virtual
00235 
00236         virtual void panic(char *msg);
00237         static char *eMsgd(char *,int);
00238         static char *eMsg(char *,char *);
00239         static char *eMsg2(char *,char *,char *);
00240 
00241         void consumeUntil(SetWordType *st);
00242         void consumeUntilToken(int t);
00243 
00244         virtual int _setmatch_wdfltsig(SetWordType *tokensWanted,
00245                                          ANTLRTokenType tokenTypeOfSet,
00246                                          SetWordType *whatFollows);
00247         virtual int _match_wdfltsig(ANTLRTokenType tokenWanted,
00248                                          SetWordType *whatFollows);
00249         
00250         const ANTLRChar * parserTokenName(int tok);                     // MR1
00251 
00252     int                 traceOptionValueDefault;        // MR11
00253     int                 traceOption(int delta);         // MR11
00254     int                 traceGuessOption(int delta);    // MR11
00255 
00256 //  MR8  5-Aug-97   S.Bochnak@microtool.com.pl
00257 //  MR8             Move resynch static local variable
00258 //  MR8               to class instance
00259 
00260     int                 syntaxErrCount;                      // MR12
00261     ANTLRTokenStream   *getLexer() const {                   // MR12
00262       return inputTokens ? inputTokens->getLexer() : 0; }    // MR12
00263 protected:                                              // MR8
00264     int     resynchConsumed;                            // MR8
00265     char    *zzFAILtext; // workarea required by zzFAIL // MR9
00266 };
00267 
00268 
00269 #define zzmatch(_t)                                                     \
00270         if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \
00271                                  (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
00272 
00273 #define zzmatch_wsig(_t,handler)                                                \
00274         if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
00275 
00276 #define zzsetmatch(_ts)                                                 \
00277         if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \
00278                                  (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
00279 
00280 #define zzsetmatch_wsig(_ts, handler)                           \
00281         if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
00282 
00283 /* For the dflt signal matchers, a FALSE indicates that an error occurred
00284  * just like the other matchers, but in this case, the routine has already
00285  * recovered--we do NOT want to consume another token.  However, when
00286  * the match was successful, we do want to consume hence _signal=0 so that
00287  * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;"
00288  * preamble.
00289  */
00290 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \
00291         if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \
00292                 _signal = MismatchedToken;
00293 
00294 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \
00295         if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken;
00296 
00297 
00298 //  MR1  10-Apr-97      zzfailed_pred() macro does not backtrack
00299 //  MR1                   in guess mode.
00300 //  MR1                 Identification and correction due to J. Lilley
00301 
00302 #ifndef zzfailed_pred
00303 #define zzfailed_pred(_p) \
00304   if (guessing) { \
00305     zzGUESS_FAIL; \
00306   } else { \
00307     fprintf(stdout,"line %d: semantic error; failed predicate: '%s'\n", \
00308         LT(1)->getLine(), _p); \
00309   }
00310 #endif
00311 
00312 #define zzRULE \
00313                 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0;        \
00314                 _ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)"";     \
00315                 int zzErrk=1,zzpf=0; \
00316         zzTRACEdata \
00317                 ANTLRChar *zzMissText=(ANTLRChar *)"";
00318 
00319 #endif
00320 
00321         /* S t a n d a r d  E x c e p t i o n  S i g n a l s */
00322 
00323 #define NoSignal                        0
00324 #define MismatchedToken         1
00325 #define NoViableAlt                     2
00326 #define NoSemViableAlt          3
00327 
00328 /* MR7  Allow more control over signalling                                  */
00329 /*        by adding "Unwind" and "SetSignal"                                */
00330 
00331 #define Unwind              4
00332 #define setSignal(newValue) *_retsignal=_signal=(newValue)
00333 #define suppressSignal       *_retsignal=_signal=0
00334 #define exportSignal        *_retsignal=_signal