GME
13
|
00001 /* ANTLRParser.h 00002 * 00003 * Define the generic ANTLRParser superclass, which is subclassed to 00004 * define an actual parser. 00005 * 00006 * Before entry into this file: ANTLRTokenType must be set. 00007 * 00008 * SOFTWARE RIGHTS 00009 * 00010 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 00011 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 00012 * company may do whatever they wish with source code distributed with 00013 * PCCTS or the code generated by PCCTS, including the incorporation of 00014 * PCCTS, or its output, into commerical software. 00015 * 00016 * We encourage users to develop software with PCCTS. However, we do ask 00017 * that credit is given to us for developing PCCTS. By "credit", 00018 * we mean that if you incorporate our source code into one of your 00019 * programs (commercial product, research project, or otherwise) that you 00020 * acknowledge this fact somewhere in the documentation, research report, 00021 * etc... If you like PCCTS and have developed a nice tool with the 00022 * output, please mention that you developed it using PCCTS. In 00023 * addition, we ask that this header remain intact in our source code. 00024 * As long as these guidelines are kept, we expect to continue enhancing 00025 * this system and expect to make other tools available as they are 00026 * completed. 00027 * 00028 * ANTLR 1.33 00029 * Terence Parr 00030 * Parr Research Corporation 00031 * with Purdue University and AHPCRC, University of Minnesota 00032 * 1989-1998 00033 */ 00034 00035 #ifndef APARSER_H_GATE 00036 #define APARSER_H_GATE 00037 00038 #include <stdio.h> 00039 #include <setjmp.h> 00040 #include "config.h" 00041 #include ATOKEN_H 00042 #include ATOKENBUFFER_H 00043 00044 #ifdef ZZCAN_GUESS 00045 #ifndef ZZINF_LOOK 00046 #define ZZINF_LOOK 00047 #endif 00048 #endif 00049 00050 00051 #define NLA (token_type[lap&(LLk-1)])/* --> next LA */ 00052 00053 typedef unsigned char SetWordType; 00054 00055 /* Define external bit set stuff (for SetWordType) */ 00056 #define EXT_WORDSIZE (sizeof(char)*8) 00057 #define EXT_LOGWORDSIZE 3 00058 00059 /* s y n t a c t i c p r e d i c a t e s t u f f */ 00060 00061 #ifndef zzUSER_GUESS_HOOK 00062 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv) 00063 #endif 00064 00065 #ifndef zzUSER_GUESS_DONE_HOOK 00066 #define zzUSER_GUESS_DONE_HOOK(seqFrozen) 00067 #endif 00068 00069 typedef struct _zzjmp_buf { 00070 jmp_buf state; 00071 } zzjmp_buf; 00072 00073 /* these need to be macros not member functions */ 00074 #define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen; 00075 #define zzNON_GUESS_MODE if ( !guessing ) 00076 #define zzGUESS_FAIL guess_fail(); 00077 #define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) } 00078 #define zzGUESS saveState(&zzst); \ 00079 guessing = 1; \ 00080 zzGuessSeqFrozen = ++zzGuessSeq; \ 00081 _marker = inputTokens->mark(); \ 00082 zzrv = setjmp(guess_start.state); \ 00083 zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \ 00084 if ( zzrv ) zzGUESS_DONE 00085 00086 #ifndef zzTRACE_RULES 00087 #define zzTRACEdata 00088 #else 00089 #ifndef zzTRACEdata 00090 #define zzTRACEdata const ANTLRChar *zzTracePrevRuleName; 00091 #endif 00092 #endif 00093 00094 #ifndef zzTRACEIN 00095 #define zzTRACEIN(r) zzTracePrevRuleName=traceCurrentRuleName;tracein(r); 00096 #endif 00097 #ifndef zzTRACEOUT 00098 #define zzTRACEOUT(r) traceout(r);traceCurrentRuleName=zzTracePrevRuleName; 00099 #endif 00100 00101 /* a n t l r p a r s e r d e f */ 00102 00103 struct ANTLRParserState { 00104 /* class variables */ 00105 zzjmp_buf guess_start; 00106 int guessing; 00107 00108 int inf_labase; 00109 int inf_last; 00110 00111 int dirty; 00112 00113 int traceOptionValue; // MR10 00114 int traceGuessOptionValue; // MR10 00115 const ANTLRChar *traceCurrentRuleName; // MR10 00116 int traceDepth; // MR10 00117 00118 }; 00119 00120 /* notes: 00121 * 00122 * multiple inheritance is a cool way to include what stuff is needed 00123 * in this structure (like guess stuff). however, i'm not convinced that 00124 * multiple inheritance works correctly on all platforms. not that 00125 * much space is used--just include all possibly useful members. 00126 * 00127 * the class should also be a template with arguments for the lookahead 00128 * depth and so on. that way, more than one parser can be defined (as 00129 * each will probably have different lookahead requirements). however, 00130 * am i sure that templates work? no, i'm not sure. 00131 * 00132 * no attributes are maintained and, hence, the 'asp' variable is not 00133 * needed. $i can still be referenced, but it refers to the token 00134 * associated with that rule element. question: where are the token's 00135 * stored if not on the software stack? in local variables created 00136 * and assigned to by antlr. 00137 */ 00138 class ANTLRParser { 00139 protected: 00140 /* class variables */ 00141 static SetWordType bitmask[sizeof(SetWordType)*8]; 00142 static char eMsgBuffer[500]; 00143 00144 protected: 00145 int LLk; // number of lookahead symbols (old LL_K) 00146 int demand_look; 00147 ANTLRTokenType eofToken; // when do I stop during resynch()s 00148 int bsetsize; // size of bitsets created by ANTLR in 00149 // units of SetWordType 00150 00151 ANTLRTokenBuffer *inputTokens; //place to get input tokens 00152 00153 zzjmp_buf guess_start; // where to jump back to upon failure 00154 int guessing; // if guessing (using (...)? predicate) 00155 00156 // infinite lookahead stuff 00157 int can_use_inf_look; // set by subclass (generated by ANTLR) 00158 int inf_lap; 00159 int inf_labase; 00160 int inf_last; 00161 int *_inf_line; 00162 00163 ANTLRChar **token_tbl; // pointer to table of token type strings 00164 00165 int dirty; // used during demand lookahead 00166 00167 ANTLRTokenType *token_type; // fast reference cache of token.getType() 00168 // ANTLRLightweightToken **token; // the token with all its attributes 00169 int lap; 00170 int labase; 00171 00172 private: 00173 void fill_inf_look(); 00174 00175 protected: 00176 virtual void guess_fail() { // MR9 27-Sep-97 make virtual 00177 traceGuessFail(); // MR10 00178 longjmp(guess_start.state, 1); } // MR9 00179 virtual void guess_done(ANTLRParserState *st) { // MR9 27-Sep-97 make virtual 00180 restoreState(st); } // MR9 00181 virtual int guess(ANTLRParserState *); // MR9 27-Sep-97 make virtual 00182 void look(int); 00183 int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *, 00184 _ANTLRTokenPtr *, SetWordType **); 00185 int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *, 00186 _ANTLRTokenPtr *, SetWordType **); 00187 int _match_wsig(ANTLRTokenType); 00188 int _setmatch_wsig(SetWordType *); 00189 virtual void consume(); 00190 void resynch(SetWordType *wd,SetWordType mask); 00191 void prime_lookahead(); 00192 virtual void tracein(const ANTLRChar *r); // MR10 00193 virtual void traceout(const ANTLRChar *r); // MR10 00194 static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);} // x % EXT_WORDSIZE // MR9 00195 static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;} // x / EXT_WORDSIZE // MR9 00196 int set_deg(SetWordType *); 00197 int set_el(ANTLRTokenType, SetWordType *); 00198 virtual void edecode(SetWordType *); // MR1 00199 virtual void FAIL(int k, ...); // MR1 00200 int traceOptionValue; // MR10 00201 int traceGuessOptionValue; // MR10 00202 const ANTLRChar *traceCurrentRuleName; // MR10 00203 int traceDepth; // MR10 00204 void traceReset(); // MR10 00205 virtual void traceGuessFail(); // MR10 00206 virtual void traceGuessDone(const ANTLRParserState *); // MR10 00207 int zzGuessSeq; // MR10 00208 00209 public: 00210 ANTLRParser(ANTLRTokenBuffer *, 00211 int k=1, 00212 int use_inf_look=0, 00213 int demand_look=0, 00214 int bsetsize=1); 00215 virtual ~ANTLRParser(); 00216 00217 virtual void init(); 00218 00219 ANTLRTokenType LA(int i) 00220 { 00221 return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] : 00222 token_type[(lap+(i)-1)&(LLk-1)]; 00223 } 00224 _ANTLRTokenPtr LT(int i); 00225 00226 void setEofToken(ANTLRTokenType t) { eofToken = t; } 00227 00228 void noGarbageCollectTokens() { inputTokens->noGarbageCollectTokens(); } 00229 void garbageCollectTokens() { inputTokens->garbageCollectTokens(); } 00230 00231 virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, 00232 SetWordType *eset, ANTLRTokenType etok, int k); 00233 virtual void saveState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 00234 virtual void restoreState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 00235 00236 virtual void panic(char *msg); 00237 static char *eMsgd(char *,int); 00238 static char *eMsg(char *,char *); 00239 static char *eMsg2(char *,char *,char *); 00240 00241 void consumeUntil(SetWordType *st); 00242 void consumeUntilToken(int t); 00243 00244 virtual int _setmatch_wdfltsig(SetWordType *tokensWanted, 00245 ANTLRTokenType tokenTypeOfSet, 00246 SetWordType *whatFollows); 00247 virtual int _match_wdfltsig(ANTLRTokenType tokenWanted, 00248 SetWordType *whatFollows); 00249 00250 const ANTLRChar * parserTokenName(int tok); // MR1 00251 00252 int traceOptionValueDefault; // MR11 00253 int traceOption(int delta); // MR11 00254 int traceGuessOption(int delta); // MR11 00255 00256 // MR8 5-Aug-97 S.Bochnak@microtool.com.pl 00257 // MR8 Move resynch static local variable 00258 // MR8 to class instance 00259 00260 int syntaxErrCount; // MR12 00261 ANTLRTokenStream *getLexer() const { // MR12 00262 return inputTokens ? inputTokens->getLexer() : 0; } // MR12 00263 protected: // MR8 00264 int resynchConsumed; // MR8 00265 char *zzFAILtext; // workarea required by zzFAIL // MR9 00266 }; 00267 00268 00269 #define zzmatch(_t) \ 00270 if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \ 00271 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; 00272 00273 #define zzmatch_wsig(_t,handler) \ 00274 if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 00275 00276 #define zzsetmatch(_ts) \ 00277 if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \ 00278 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; 00279 00280 #define zzsetmatch_wsig(_ts, handler) \ 00281 if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 00282 00283 /* For the dflt signal matchers, a FALSE indicates that an error occurred 00284 * just like the other matchers, but in this case, the routine has already 00285 * recovered--we do NOT want to consume another token. However, when 00286 * the match was successful, we do want to consume hence _signal=0 so that 00287 * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;" 00288 * preamble. 00289 */ 00290 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \ 00291 if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \ 00292 _signal = MismatchedToken; 00293 00294 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \ 00295 if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken; 00296 00297 00298 // MR1 10-Apr-97 zzfailed_pred() macro does not backtrack 00299 // MR1 in guess mode. 00300 // MR1 Identification and correction due to J. Lilley 00301 00302 #ifndef zzfailed_pred 00303 #define zzfailed_pred(_p) \ 00304 if (guessing) { \ 00305 zzGUESS_FAIL; \ 00306 } else { \ 00307 fprintf(stdout,"line %d: semantic error; failed predicate: '%s'\n", \ 00308 LT(1)->getLine(), _p); \ 00309 } 00310 #endif 00311 00312 #define zzRULE \ 00313 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \ 00314 _ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)""; \ 00315 int zzErrk=1,zzpf=0; \ 00316 zzTRACEdata \ 00317 ANTLRChar *zzMissText=(ANTLRChar *)""; 00318 00319 #endif 00320 00321 /* S t a n d a r d E x c e p t i o n S i g n a l s */ 00322 00323 #define NoSignal 0 00324 #define MismatchedToken 1 00325 #define NoViableAlt 2 00326 #define NoSemViableAlt 3 00327 00328 /* MR7 Allow more control over signalling */ 00329 /* by adding "Unwind" and "SetSignal" */ 00330 00331 #define Unwind 4 00332 #define setSignal(newValue) *_retsignal=_signal=(newValue) 00333 #define suppressSignal *_retsignal=_signal=0 00334 #define exportSignal *_retsignal=_signal