GME  13
ATokenBuffer.cpp
Go to the documentation of this file.
00001 /* ANTLRTokenBuffer.C
00002  *
00003  * SOFTWARE RIGHTS
00004  *
00005  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
00006  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
00007  * company may do whatever they wish with source code distributed with
00008  * PCCTS or the code generated by PCCTS, including the incorporation of
00009  * PCCTS, or its output, into commerical software.
00010  *
00011  * We encourage users to develop software with PCCTS.  However, we do ask
00012  * that credit is given to us for developing PCCTS.  By "credit",
00013  * we mean that if you incorporate our source code into one of your
00014  * programs (commercial product, research project, or otherwise) that you
00015  * acknowledge this fact somewhere in the documentation, research report,
00016  * etc...  If you like PCCTS and have developed a nice tool with the
00017  * output, please mention that you developed it using PCCTS.  In
00018  * addition, we ask that this header remain intact in our source code.
00019  * As long as these guidelines are kept, we expect to continue enhancing
00020  * this system and expect to make other tools available as they are
00021  * completed.
00022  *
00023  * ANTLR 1.33
00024  * Terence Parr
00025  * Parr Research Corporation
00026  * with Purdue University and AHPCRC, University of Minnesota
00027  * 1989-1998
00028  */
00029 // See http://support.microsoft.com/kb/148652
00030 //#include "forcelib.h"
00031 #include "stdafx.h"
00032 typedef int ANTLRTokenType;     // fool AToken.h into compiling
00033 
00034 class ANTLRParser;                                      /* MR1 */
00035 
00036 #define ANTLR_SUPPORT_CODE
00037 
00038 #include "config.h"
00039 #include ATOKENBUFFER_H
00040 typedef ANTLRAbstractToken *_ANTLRTokenPtr;
00041 
00042 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
00043 static unsigned char test[1000];
00044 #endif
00045 
00046 #ifdef DBG_REFCOUNTTOKEN
00047 int ANTLRCommonToken::ctor = 0;
00048 int ANTLRCommonToken::dtor = 0;
00049 #endif
00050 
00051 ANTLRTokenBuffer::
00052 ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _cs)
00053 {
00054         this->input = _input;
00055         this->k = _k;
00056         buffer_size = chunk_size = _cs;
00057         buffer = (_ANTLRTokenPtr *)
00058                          calloc(chunk_size+1,sizeof(_ANTLRTokenPtr ));
00059         if ( buffer == NULL ) {
00060                 panic("cannot alloc token buffer");
00061         }
00062         buffer++;                               // leave the first elem empty so tp-1 is valid ptr
00063 
00064         tp = &buffer[0];
00065         last = tp-1;
00066         next = &buffer[0];
00067         num_markers = 0;
00068         end_of_buffer = &buffer[buffer_size-1];
00069         threshold = &buffer[(int)(buffer_size*(1.0/2.0))];
00070         _deleteTokens = 1;      // assume we delete tokens
00071         parser=NULL;                            // MR5 - uninitialized reference
00072 }
00073 
00074 static void f() {;}
00075 ANTLRTokenBuffer::
00076 ~ANTLRTokenBuffer()
00077 {
00078         f();
00079         // Delete all remaining tokens (from 0..last inclusive)
00080         if ( _deleteTokens )
00081         {
00082                 _ANTLRTokenPtr *z;
00083                 for (z=buffer; z<=last; z++)
00084                 {
00085                         (*z)->deref();
00086 //                      z->deref();
00087 #ifdef DBG_REFCOUNTTOKEN
00088                                         fprintf(stderr, "##########dtor: deleting token '%s' (ref %d)\n",
00089                                                         ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());
00090 #endif
00091                         if ( (*z)->nref()==0 )
00092                         {
00093                                 delete (*z);
00094                         }
00095                 }
00096         }
00097 
00098         if ( buffer!=NULL ) free((char *)(buffer-1));
00099 }
00100 
00101 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
00102 #include <stdio.h>
00103 #endif
00104 
00105 _ANTLRTokenPtr ANTLRTokenBuffer::
00106 getToken()
00107 {
00108         if ( tp <= last )       // is there any buffered lookahead still to be read?
00109         {
00110                 return *tp++;   // read buffered lookahead
00111         }
00112         // out of buffered lookahead, get some more "real"
00113         // input from getANTLRToken()
00114         if ( num_markers==0 )
00115         {
00116                 if( next > threshold )
00117                 {
00118 #ifdef DBG_TBUF
00119 fprintf(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer);
00120 #endif
00121                         makeRoom();
00122                 }
00123         }
00124         else {
00125                 if ( next > end_of_buffer )
00126                 {
00127 #ifdef DBG_TBUF
00128 fprintf(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size);
00129 #endif
00130                         extendBuffer();
00131                 }
00132         }
00133         *next = getANTLRToken();
00134         (*next)->ref();                         // say we have a copy of this pointer in buffer
00135         last = next;
00136         next++;
00137         tp = last;
00138         return *tp++;
00139 }
00140 
00141 void ANTLRTokenBuffer::
00142 rewind(int pos)
00143 {
00144 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
00145         fprintf(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]);
00146         test[pos]--;
00147 #endif
00148         tp = &buffer[pos];
00149         num_markers--;
00150 }
00151 
00152 /*
00153  * This function is used to specify that the token pointers read
00154  * by the ANTLRTokenBuffer should be buffered up (to be reused later).
00155  */
00156 int ANTLRTokenBuffer::
00157 mark()
00158 {
00159 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
00160         test[tp-buffer]++;
00161         fprintf(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]);
00162 #endif
00163         num_markers++;
00164         return tp - buffer;
00165 }
00166 
00167 /*
00168  * returns the token pointer n positions ahead.
00169  * This implies that bufferedToken(1) gets the NEXT symbol of lookahead.
00170  * This is used in conjunction with the ANTLRParser lookahead buffer.
00171  *
00172  * No markers are set or anything.  A bunch of input is buffered--that's all.
00173  * The tp pointer is left alone as the lookahead has not been advanced
00174  * with getToken().  The next call to getToken() will find a token
00175  * in the buffer and won't have to call getANTLRToken().
00176  *
00177  * If this is called before a consume() is done, how_many_more_i_need is
00178  * set to 'n'.
00179  */
00180 _ANTLRTokenPtr ANTLRTokenBuffer::
00181 bufferedToken(int n)
00182 {
00183 //      int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1;
00184         int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1;
00185         // Make sure that at least n tokens are available in the buffer
00186 #ifdef DBG_TBUF
00187         fprintf(stderr, "bufferedToken(%d)\n", n);
00188 #endif
00189         for (int i=1; i<=how_many_more_i_need; i++)
00190         {
00191                 if ( next > end_of_buffer )     // buffer overflow?
00192                 {
00193                         extendBuffer();
00194                 }
00195                 *next = getANTLRToken();
00196                 (*next)->ref();         // say we have a copy of this pointer in buffer
00197                 last = next;
00198                 next++;
00199         }
00200         return tp[n - 1];
00201 }
00202 
00203 /* If no markers are set, the none of the input needs to be saved (except
00204  * for the lookahead Token pointers).  We save only k-1 token pointers as
00205  * we are guaranteed to do a getANTLRToken() right after this because otherwise
00206  * we wouldn't have needed to extend the buffer.
00207  *
00208  * If there are markers in the buffer, we need to save things and so
00209  * extendBuffer() is called.
00210  */
00211 void ANTLRTokenBuffer::
00212 makeRoom()
00213 {
00214 #ifdef DBG_TBUF
00215         fprintf(stderr, "in makeRoom.................\n");
00216         fprintf(stderr, "num_markers==%d\n", num_markers);
00217 #endif
00218 /*
00219         if ( num_markers == 0 )
00220         {
00221 */
00222 #ifdef DBG_TBUF
00223                 fprintf(stderr, "moving lookahead and resetting next\n");
00224 
00225                 _ANTLRTokenPtr *r;
00226                 fprintf(stderr, "tbuf = [");
00227                 for (r=buffer; r<=last; r++)
00228                 {
00229                         if ( *r==NULL ) fprintf(stderr, " xxx");
00230                         else fprintf(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText());
00231                 }
00232                 fprintf(stderr, " ]\n");
00233 
00234                 fprintf(stderr,
00235                 "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer);
00236 #endif
00237 
00238                 // Delete all tokens from 0..last-(k-1) inclusive
00239                 if ( _deleteTokens )
00240                 {
00241                         _ANTLRTokenPtr *z;
00242                         for (z=buffer; z<=last-(k-1); z++)
00243                         {
00244                                 (*z)->deref();
00245 //                              z->deref();
00246 #ifdef DBG_REFCOUNTTOKEN
00247                                         fprintf(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n",
00248                                                         ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());
00249 #endif
00250                                 if ( (*z)->nref()==0 )
00251                                 {
00252                                         delete (*z);
00253                                 }
00254                         }
00255                 }
00256 
00257                 // reset the buffer to initial conditions, but move k-1 symbols
00258                 // to the beginning of buffer and put new input symbol at k
00259                 _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1;
00260 //              ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1;
00261 #ifdef DBG_TBUF
00262                 fprintf(stderr, "lookahead buffer = [");
00263 #endif
00264                 for (int i=1; i<=(k-1); i++)
00265                 {
00266                         *p++ = *q++;
00267 #ifdef DBG_TBUF
00268                         fprintf(stderr,
00269                         " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText());
00270 #endif
00271                 }
00272 #ifdef DBG_TBUF
00273                 fprintf(stderr, " ]\n");
00274 #endif
00275                 next = &buffer[k-1];
00276                 tp = &buffer[k-1];      // tp points to what will be filled in next
00277                 last = tp-1;
00278 #ifdef DBG_TBUF
00279                 fprintf(stderr,
00280                 "after: tp=%d, last=%d, next=%d\n",
00281                 tp-buffer, last-buffer, next-buffer);
00282 #endif
00283 /*
00284         }
00285         else {
00286                 extendBuffer();
00287         }
00288 */
00289 }
00290 
00291 /* This function extends 'buffer' by chunk_size and returns with all
00292  * pointers at the same relative positions in the buffer (the buffer base
00293  * address could have changed in realloc()) except that 'next' comes
00294  * back set to where the next token should be stored.  All other pointers
00295  * are untouched.
00296  */
00297 void
00298 ANTLRTokenBuffer::
00299 extendBuffer()
00300 {
00301         int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer;
00302 #ifdef DBG_TBUF
00303         fprintf(stderr, "extending physical buffer\n");
00304 #endif
00305         buffer_size += chunk_size;
00306         buffer = (_ANTLRTokenPtr *)
00307                 realloc((char *)(buffer-1),
00308                                 (buffer_size+1)*sizeof(_ANTLRTokenPtr ));
00309         if ( buffer == NULL ) {
00310                 panic("cannot alloc token buffer");
00311         }
00312         buffer++;                               // leave the first elem empty so tp-1 is valid ptr
00313 
00314         tp = buffer + save_tp;  // put the pointers back to same relative position
00315         last = buffer + save_last;
00316         next = buffer + save_next;
00317         end_of_buffer = &buffer[buffer_size-1];
00318         threshold = &buffer[(int)(buffer_size*(1.0/2.0))];
00319 
00320 /*
00321         // zero out new token ptrs so we'll know if something to delete in buffer
00322         ANTLRAbstractToken **p = end_of_buffer-chunk_size+1;
00323         for (; p<=end_of_buffer; p++) *p = NULL;
00324 */
00325 }
00326 
00327 ANTLRParser * ANTLRTokenBuffer::                                // MR1
00328 setParser(ANTLRParser *p) {                                     // MR1
00329   ANTLRParser   *old=parser;                                    // MR1
00330   parser=p;                                                     // MR1
00331   input->setParser(p);                                          // MR1
00332   return old;                                                   // MR1
00333 }                                                               // MR1
00334                                                                 // MR1
00335 ANTLRParser * ANTLRTokenBuffer::                                // MR1
00336 getParser() {                                                   // MR1
00337   return parser;                                                // MR1
00338 }                                                               // MR1
00339 
00340 /* to avoid having to link in another file just for the smart token ptr
00341  * stuff, we include it here.  Ugh.
00342  */
00343 #include ATOKPTR_C