GME
13
|
00001 /* ANTLRTokenBuffer.C 00002 * 00003 * SOFTWARE RIGHTS 00004 * 00005 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 00006 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 00007 * company may do whatever they wish with source code distributed with 00008 * PCCTS or the code generated by PCCTS, including the incorporation of 00009 * PCCTS, or its output, into commerical software. 00010 * 00011 * We encourage users to develop software with PCCTS. However, we do ask 00012 * that credit is given to us for developing PCCTS. By "credit", 00013 * we mean that if you incorporate our source code into one of your 00014 * programs (commercial product, research project, or otherwise) that you 00015 * acknowledge this fact somewhere in the documentation, research report, 00016 * etc... If you like PCCTS and have developed a nice tool with the 00017 * output, please mention that you developed it using PCCTS. In 00018 * addition, we ask that this header remain intact in our source code. 00019 * As long as these guidelines are kept, we expect to continue enhancing 00020 * this system and expect to make other tools available as they are 00021 * completed. 00022 * 00023 * ANTLR 1.33 00024 * Terence Parr 00025 * Parr Research Corporation 00026 * with Purdue University and AHPCRC, University of Minnesota 00027 * 1989-1998 00028 */ 00029 // See http://support.microsoft.com/kb/148652 00030 //#include "forcelib.h" 00031 #include "stdafx.h" 00032 typedef int ANTLRTokenType; // fool AToken.h into compiling 00033 00034 class ANTLRParser; /* MR1 */ 00035 00036 #define ANTLR_SUPPORT_CODE 00037 00038 #include "config.h" 00039 #include ATOKENBUFFER_H 00040 typedef ANTLRAbstractToken *_ANTLRTokenPtr; 00041 00042 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 00043 static unsigned char test[1000]; 00044 #endif 00045 00046 #ifdef DBG_REFCOUNTTOKEN 00047 int ANTLRCommonToken::ctor = 0; 00048 int ANTLRCommonToken::dtor = 0; 00049 #endif 00050 00051 ANTLRTokenBuffer:: 00052 ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _cs) 00053 { 00054 this->input = _input; 00055 this->k = _k; 00056 buffer_size = chunk_size = _cs; 00057 buffer = (_ANTLRTokenPtr *) 00058 calloc(chunk_size+1,sizeof(_ANTLRTokenPtr )); 00059 if ( buffer == NULL ) { 00060 panic("cannot alloc token buffer"); 00061 } 00062 buffer++; // leave the first elem empty so tp-1 is valid ptr 00063 00064 tp = &buffer[0]; 00065 last = tp-1; 00066 next = &buffer[0]; 00067 num_markers = 0; 00068 end_of_buffer = &buffer[buffer_size-1]; 00069 threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; 00070 _deleteTokens = 1; // assume we delete tokens 00071 parser=NULL; // MR5 - uninitialized reference 00072 } 00073 00074 static void f() {;} 00075 ANTLRTokenBuffer:: 00076 ~ANTLRTokenBuffer() 00077 { 00078 f(); 00079 // Delete all remaining tokens (from 0..last inclusive) 00080 if ( _deleteTokens ) 00081 { 00082 _ANTLRTokenPtr *z; 00083 for (z=buffer; z<=last; z++) 00084 { 00085 (*z)->deref(); 00086 // z->deref(); 00087 #ifdef DBG_REFCOUNTTOKEN 00088 fprintf(stderr, "##########dtor: deleting token '%s' (ref %d)\n", 00089 ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); 00090 #endif 00091 if ( (*z)->nref()==0 ) 00092 { 00093 delete (*z); 00094 } 00095 } 00096 } 00097 00098 if ( buffer!=NULL ) free((char *)(buffer-1)); 00099 } 00100 00101 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 00102 #include <stdio.h> 00103 #endif 00104 00105 _ANTLRTokenPtr ANTLRTokenBuffer:: 00106 getToken() 00107 { 00108 if ( tp <= last ) // is there any buffered lookahead still to be read? 00109 { 00110 return *tp++; // read buffered lookahead 00111 } 00112 // out of buffered lookahead, get some more "real" 00113 // input from getANTLRToken() 00114 if ( num_markers==0 ) 00115 { 00116 if( next > threshold ) 00117 { 00118 #ifdef DBG_TBUF 00119 fprintf(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer); 00120 #endif 00121 makeRoom(); 00122 } 00123 } 00124 else { 00125 if ( next > end_of_buffer ) 00126 { 00127 #ifdef DBG_TBUF 00128 fprintf(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size); 00129 #endif 00130 extendBuffer(); 00131 } 00132 } 00133 *next = getANTLRToken(); 00134 (*next)->ref(); // say we have a copy of this pointer in buffer 00135 last = next; 00136 next++; 00137 tp = last; 00138 return *tp++; 00139 } 00140 00141 void ANTLRTokenBuffer:: 00142 rewind(int pos) 00143 { 00144 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 00145 fprintf(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]); 00146 test[pos]--; 00147 #endif 00148 tp = &buffer[pos]; 00149 num_markers--; 00150 } 00151 00152 /* 00153 * This function is used to specify that the token pointers read 00154 * by the ANTLRTokenBuffer should be buffered up (to be reused later). 00155 */ 00156 int ANTLRTokenBuffer:: 00157 mark() 00158 { 00159 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 00160 test[tp-buffer]++; 00161 fprintf(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]); 00162 #endif 00163 num_markers++; 00164 return tp - buffer; 00165 } 00166 00167 /* 00168 * returns the token pointer n positions ahead. 00169 * This implies that bufferedToken(1) gets the NEXT symbol of lookahead. 00170 * This is used in conjunction with the ANTLRParser lookahead buffer. 00171 * 00172 * No markers are set or anything. A bunch of input is buffered--that's all. 00173 * The tp pointer is left alone as the lookahead has not been advanced 00174 * with getToken(). The next call to getToken() will find a token 00175 * in the buffer and won't have to call getANTLRToken(). 00176 * 00177 * If this is called before a consume() is done, how_many_more_i_need is 00178 * set to 'n'. 00179 */ 00180 _ANTLRTokenPtr ANTLRTokenBuffer:: 00181 bufferedToken(int n) 00182 { 00183 // int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1; 00184 int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1; 00185 // Make sure that at least n tokens are available in the buffer 00186 #ifdef DBG_TBUF 00187 fprintf(stderr, "bufferedToken(%d)\n", n); 00188 #endif 00189 for (int i=1; i<=how_many_more_i_need; i++) 00190 { 00191 if ( next > end_of_buffer ) // buffer overflow? 00192 { 00193 extendBuffer(); 00194 } 00195 *next = getANTLRToken(); 00196 (*next)->ref(); // say we have a copy of this pointer in buffer 00197 last = next; 00198 next++; 00199 } 00200 return tp[n - 1]; 00201 } 00202 00203 /* If no markers are set, the none of the input needs to be saved (except 00204 * for the lookahead Token pointers). We save only k-1 token pointers as 00205 * we are guaranteed to do a getANTLRToken() right after this because otherwise 00206 * we wouldn't have needed to extend the buffer. 00207 * 00208 * If there are markers in the buffer, we need to save things and so 00209 * extendBuffer() is called. 00210 */ 00211 void ANTLRTokenBuffer:: 00212 makeRoom() 00213 { 00214 #ifdef DBG_TBUF 00215 fprintf(stderr, "in makeRoom.................\n"); 00216 fprintf(stderr, "num_markers==%d\n", num_markers); 00217 #endif 00218 /* 00219 if ( num_markers == 0 ) 00220 { 00221 */ 00222 #ifdef DBG_TBUF 00223 fprintf(stderr, "moving lookahead and resetting next\n"); 00224 00225 _ANTLRTokenPtr *r; 00226 fprintf(stderr, "tbuf = ["); 00227 for (r=buffer; r<=last; r++) 00228 { 00229 if ( *r==NULL ) fprintf(stderr, " xxx"); 00230 else fprintf(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText()); 00231 } 00232 fprintf(stderr, " ]\n"); 00233 00234 fprintf(stderr, 00235 "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer); 00236 #endif 00237 00238 // Delete all tokens from 0..last-(k-1) inclusive 00239 if ( _deleteTokens ) 00240 { 00241 _ANTLRTokenPtr *z; 00242 for (z=buffer; z<=last-(k-1); z++) 00243 { 00244 (*z)->deref(); 00245 // z->deref(); 00246 #ifdef DBG_REFCOUNTTOKEN 00247 fprintf(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n", 00248 ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); 00249 #endif 00250 if ( (*z)->nref()==0 ) 00251 { 00252 delete (*z); 00253 } 00254 } 00255 } 00256 00257 // reset the buffer to initial conditions, but move k-1 symbols 00258 // to the beginning of buffer and put new input symbol at k 00259 _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1; 00260 // ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1; 00261 #ifdef DBG_TBUF 00262 fprintf(stderr, "lookahead buffer = ["); 00263 #endif 00264 for (int i=1; i<=(k-1); i++) 00265 { 00266 *p++ = *q++; 00267 #ifdef DBG_TBUF 00268 fprintf(stderr, 00269 " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText()); 00270 #endif 00271 } 00272 #ifdef DBG_TBUF 00273 fprintf(stderr, " ]\n"); 00274 #endif 00275 next = &buffer[k-1]; 00276 tp = &buffer[k-1]; // tp points to what will be filled in next 00277 last = tp-1; 00278 #ifdef DBG_TBUF 00279 fprintf(stderr, 00280 "after: tp=%d, last=%d, next=%d\n", 00281 tp-buffer, last-buffer, next-buffer); 00282 #endif 00283 /* 00284 } 00285 else { 00286 extendBuffer(); 00287 } 00288 */ 00289 } 00290 00291 /* This function extends 'buffer' by chunk_size and returns with all 00292 * pointers at the same relative positions in the buffer (the buffer base 00293 * address could have changed in realloc()) except that 'next' comes 00294 * back set to where the next token should be stored. All other pointers 00295 * are untouched. 00296 */ 00297 void 00298 ANTLRTokenBuffer:: 00299 extendBuffer() 00300 { 00301 int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer; 00302 #ifdef DBG_TBUF 00303 fprintf(stderr, "extending physical buffer\n"); 00304 #endif 00305 buffer_size += chunk_size; 00306 buffer = (_ANTLRTokenPtr *) 00307 realloc((char *)(buffer-1), 00308 (buffer_size+1)*sizeof(_ANTLRTokenPtr )); 00309 if ( buffer == NULL ) { 00310 panic("cannot alloc token buffer"); 00311 } 00312 buffer++; // leave the first elem empty so tp-1 is valid ptr 00313 00314 tp = buffer + save_tp; // put the pointers back to same relative position 00315 last = buffer + save_last; 00316 next = buffer + save_next; 00317 end_of_buffer = &buffer[buffer_size-1]; 00318 threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; 00319 00320 /* 00321 // zero out new token ptrs so we'll know if something to delete in buffer 00322 ANTLRAbstractToken **p = end_of_buffer-chunk_size+1; 00323 for (; p<=end_of_buffer; p++) *p = NULL; 00324 */ 00325 } 00326 00327 ANTLRParser * ANTLRTokenBuffer:: // MR1 00328 setParser(ANTLRParser *p) { // MR1 00329 ANTLRParser *old=parser; // MR1 00330 parser=p; // MR1 00331 input->setParser(p); // MR1 00332 return old; // MR1 00333 } // MR1 00334 // MR1 00335 ANTLRParser * ANTLRTokenBuffer:: // MR1 00336 getParser() { // MR1 00337 return parser; // MR1 00338 } // MR1 00339 00340 /* to avoid having to link in another file just for the smart token ptr 00341 * stuff, we include it here. Ugh. 00342 */ 00343 #include ATOKPTR_C