C:/Users/Dennis/src/lang/russell.orig/src/pass1/scanner/scan.c

Go to the documentation of this file.
00001 #
00002 /*
00003  *  Russell scanner
00004  */
00005 
00006 #include    <stdio.h>
00007 #include    <ctype.h> 
00008 #include    "parm.h"
00009 #include    "scan.h"
00010 #include    "../parser/y.tab.h"
00011 
00012 #define STKSIZE     5       /* depth of scanner error recovery stack        */
00013 
00014 
00015 /*
00016  * code for nil pointer
00017  */
00018 #define     NIL     0
00019 
00020 
00021 extern boolean pflag;      /* input is preprocessor output */
00022 #define     ESCCHAR '#'    /* signals position in preprocessor output */
00023 
00024 
00025 /* entry in reserved identifier table */
00026 struct restab {
00027     char    * rt_txt;
00028     int     rt_val; };
00029 
00030 /*
00031  * reserved word tables
00032  */
00033 
00034 struct restab residtab[];
00035 int nresids;
00036 
00037 struct restab resoptab[];
00038 int nresops;
00039 
00040 /*
00041  * table of character classes
00042  * and macro to find character class
00043  */
00044 
00045 int cctab[];
00046 
00047 # define CCLASS(c) ((c) == EOF ? EOFCC : cctab[c])
00048 
00049 
00050 /*
00051  * global variables for communicating with yacc;
00052  */
00053 
00054 int yyline = 0;
00055 int yycolno = 0;
00056 char * yyinfnm;
00057 
00058 int yydebug;
00059 
00060 int yylval;
00061 
00062 /* declarations for table of virtual line numbers versus real line
00063  * numbers and filenames. The table is created by the scanner and
00064  * then used by later passes to convert a virtual line number
00065  * stored in the syntax tree to the real line number printed in
00066  * an error message.
00067  */
00068 typedef struct VrLine{
00069     int vr_vline,            /* virtual line number at which file */
00070                              /* change or line number jump occurred */
00071         vr_rline,            /* corresponding real line number */
00072         vr_fname;            /* string table index of filename */
00073     struct VrLine * vr_next; /* pointer to next record */  } vrline;
00074 
00075 vrline * vrtable = NIL,  /* pointers to first and last table entries */
00076        * vrtend  = NIL;
00077 
00078 int yyvline = 0;  /* curent virtual line number */
00079 
00080 static int scansavc = '\n';    /* n.b. preprocessor line number scan       */
00081                                /* routine only checks for '@' after '\n'   */
00082 
00083 static int scanstk[STKSIZE];
00084 static int stktop = -1;
00085 
00086 char tokenbuf[1000];  /* also used by some other routines as string buffer */
00087 /* static */ int tokenlgth;
00088 
00089 
00090 /*
00091  *      get next token --
00092  *      put it in tokenbuf.
00093  *              return token code.
00094  */
00095 
00096 yylex()
00097 {
00098 register int c;
00099 register cc;
00100 register char *p;
00101 int outtok;
00102 
00103     if (stktop >= 0) {
00104         outtok = scanstk[stktop--];
00105         goto out;
00106     }
00107 
00108 retry:
00109     c = scansavc;
00110     cc = CCLASS(c);
00111     yylval = 0; 
00112     p = tokenbuf;
00113 
00114     while( cc == WHTCC ) {
00115 
00116       if( c == '\n' ) {
00117         yyline++; yyvline++; 
00118         GETCHR(c);
00119         if( c == ESCCHAR && pflag) {
00120             rdposition();
00121             c = '\n';           /* Repeat check for ESCCHAR next time around */
00122             yyline--; yyvline--; /* Line number is correct for next line     */
00123         }
00124       } else {
00125         GETCHR(c);
00126       }
00127       cc = CCLASS(c);
00128     }
00129 
00130     switch(cc) {
00131 
00132     case LETCC:
00133         do {
00134             *p++ = c;
00135             GETCHR(c);
00136             cc = CCLASS(c);
00137         } while( (cc == LETCC) || (cc == DIGCC) );
00138         *p++ = 0;
00139         tokenlgth = p - tokenbuf;
00140         outtok = reschk(residtab,nresids,WORDID);
00141         break;
00142 
00143     case SQUCC: /* single quote */
00144       {
00145         boolean saw_quote = FALSE;
00146         for (;;) {
00147             if ( cc == EOFCC || c == '\n' ) {
00148                 yyperror("Unterminated quoted identifier");
00149                 break;
00150             }
00151             if (c == '\\') {
00152                 GETCHR(c);
00153                 switch(c) {
00154                     case 't': *p++ = '\t'; break;
00155                     case 'n': *p++ = '\n'; break;
00156                     case 'r': *p++ = '\r'; break;
00157                     default: *p++ = c;
00158                 }
00159             } else {
00160                 *p++ = c;
00161             }
00162             GETCHR(c);
00163             cc = CCLASS(c);
00164             if (saw_quote) {
00165                 if (cc == SQUCC) {
00166                     /* ignore this character and keep scanning */
00167                         GETCHR(c);
00168                         cc = CCLASS(c);
00169                 } else {
00170                     /* end of identifier */
00171                         break;
00172                 }
00173             }
00174             saw_quote = (cc == SQUCC);
00175         }
00176         *p++ = '\0';
00177         outtok = WORDID;
00178         yylval = stt_enter(tokenbuf,p-tokenbuf);
00179         break;
00180       }
00181 
00182     case DQUCC: /* double quote */
00183       {
00184         boolean saw_quote = FALSE;
00185         for (;;) {
00186             if (cc == EOFCC || c == '\n') {
00187                 yyperror("Unterminated string");
00188                 break;
00189             }
00190             if (c == '\\') {
00191                 GETCHR(c);
00192                 switch(c) {
00193                     case 't': *p++ = '\t'; break;
00194                     case 'n': *p++ = '\n'; break;
00195                     case 'r': *p++ = '\r'; break;
00196                     default: *p++ = c;
00197                 }
00198             } else {
00199                 *p++ = c;
00200             }
00201             GETCHR(c);
00202             cc = CCLASS(c);
00203             if (saw_quote) {
00204                 if (cc == DQUCC) {
00205                     /* ignore this character and keep scanning */
00206                     /* Note that the previous double quote was saved */
00207                         GETCHR(c);
00208                         cc = CCLASS(c);
00209                 } else {
00210                     /* end of string */
00211                         break;
00212                 }
00213             }
00214             saw_quote = (cc == DQUCC);
00215         }
00216         /* Delete trailing quote. */
00217             *(p - 1) = '\0'; 
00218         outtok = QSTRING;
00219         /* allocate a buffer for the string and return it */
00220             yylval = gc_malloc_atomic(p-tokenbuf-1);
00221             strcpy(yylval,&(tokenbuf[1])); /* skip leading quote */
00222         break;
00223       }
00224 
00225     case SEPCC:
00226         *p = scansavc = c;
00227         GETCHR(c);
00228         if( (scansavc == '(') && (c == '*') )
00229         /* process a comment */
00230             { int startline = yyline;
00231                              /* temporary line counter used in comments   */
00232                              /* so error message has a useful line number */
00233                              /* if EOF occurs inside a comment            */
00234               char * startfnm = yyinfnm;
00235               int cmtnest = 0;
00236 
00237               do {
00238                   switch( scansavc ) {
00239                       case EOF:
00240                           yyline = startline;
00241                           yyinfnm = startfnm;
00242                           goto retry; /* return an end of file */
00243                       case '\n':
00244                           yyline++; yyvline++; 
00245                           if( c == ESCCHAR && pflag ) {
00246                               rdposition();
00247                               c = '\n'; 
00248                                 /* Repeat check for ESCCHAR next time around */
00249                               yyline--; yyvline--;
00250                                 /* Line number is correct for next line      */
00251                           }
00252                           break;
00253                       case '*':
00254                           if( c == ')' )
00255                           cmtnest--;
00256                           break;
00257                       case '(':
00258                           if( c == '*' ) {
00259                               cmtnest++;
00260                               GETCHR(c);
00261                           }
00262                           break;
00263                       case '$':
00264                           if (c == '$') {
00265                             GETCHR(c);
00266                             switch( c ) {
00267                               case '+':
00268                                 yydebug++;
00269                                 break;
00270                               case '-':
00271                                 if(yydebug) yydebug--;
00272                                 break;
00273                             }
00274                           }
00275                           break;
00276                   }
00277                   scansavc = c;
00278                   GETCHR(c);
00279               } while( cmtnest > 0 );
00280               /* put2w( S_YYLINE, yyline ); */
00281               scansavc = c;
00282               goto retry;
00283             }
00284         outtok = *p++;
00285         break;
00286 
00287     case DIGCC:
00288         do {
00289             *p++ = c;
00290             GETCHR(c);
00291             cc = CCLASS(c);
00292         } while( cc == DIGCC || cc == LETCC );
00293         *p++ = '\0';
00294         outtok = UQSTRING; /* unquoted string */
00295         /* allocate buffer and return it, as for quoted strings */
00296             yylval = gc_malloc_atomic(p - tokenbuf);
00297             strcpy(yylval,tokenbuf);
00298         break;
00299 
00300     case OPRCC:
00301         do {
00302             *p++ = c;
00303             GETCHR(c);
00304             cc = CCLASS(c);
00305         } while( cc == OPRCC );
00306         *p++ = 0;
00307         tokenlgth = p - tokenbuf;
00308         outtok = reschk(resoptab,nresops,OPID);
00309         break;
00310 
00311     case EOFCC:
00312         scansavc = '\n';  /* Set things up for core image to be subsequently */
00313                           /* restarted */
00314         return(EOF);
00315 
00316     case BADCC:
00317         GETCHR(c);
00318         scansavc = c;
00319         goto retry;
00320 
00321     }
00322 
00323     scansavc = c;
00324 
00325 out: 
00326 
00327     return ( outtok );
00328 
00329 }
00330 
00331 /*
00332  * read current position ( line no, file name )
00333  *   up to and including newline character
00334  * This routine clobbers tokenbuf & tokenlgth; this shouldn't matter.
00335  */
00336 rdposition()
00337 {
00338 register c;
00339 register n = 0;
00340 register char *p;
00341 
00342     while( (GETCHR(c)) == ' ' ) ;
00343 
00344     for(;;) {
00345         if ( !isdigit(c) ) break;
00346         n = n * 10 + (c - '0');
00347         GETCHR(c);
00348     }
00349     yyline = n;
00350 
00351     for(;;) {
00352         if( c == EOF ) goto bad;
00353     if( c != ' ' ) break;
00354         GETCHR(c);
00355     }
00356     if( c != '"' ) goto bad;
00357     GETCHR(c);
00358 
00359     p = tokenbuf;
00360     for(;;) {
00361         if( c == EOF ) goto bad;
00362         if( c == '"') break;
00363         *p++ = c;
00364         GETCHR(c);
00365     }
00366     *p++ = 0;
00367     tokenlgth = p - tokenbuf;
00368     yyinfnm = (char *)stt_enter( tokenbuf, tokenlgth );
00369     addposition(yyinfnm, yyline);
00370 
00371     goto out;
00372 
00373   bad: 
00374     /* There was a syntax error in the line number specification. */
00375     /* This is either a preprocessor error or a bizarre input pgm */
00376       yyperror("Error in line number");
00377   out:
00378     /* Scan to the end of the line discarding any junk.  */
00379       while( (c != '\n') && (c != EOF) ) {
00380         GETCHR(c);
00381       }
00382 }
00383 
00384 /* add new record to vrtable, associating fn and ln with the current */
00385 /* value of yyvline                                                  */
00386 addposition(fn,ln)
00387 unsigned fn;  /* stt pointer */
00388 int ln;
00389 {   register vrline * p;
00390 
00391     p = (vrline *) gc_malloc(sizeof(vrline));
00392     if (vrtable == NIL) 
00393         vrtable = p;
00394     else
00395         (vrtend -> vr_next) = p;
00396     vrtend = p;
00397     (p -> vr_vline) = yyvline;
00398     (p -> vr_rline) = ln;
00399     (p -> vr_fname) = fn;
00400     (p -> vr_next ) = NIL;
00401 }
00402 
00403 /*
00404  * look up contents of token buf in a reserved-word table
00405  *   of nentries entries.
00406  *
00407  * return value if it is reserved,
00408  *   otherwise return default value,
00409  *     add token buf to string table,
00410  *       and set yylval to the string table pointer.
00411  */
00412 
00413 reschk( tbl, nentries, dflt )
00414 register struct restab *tbl;
00415 {
00416 register int m;
00417 register int l, r;
00418 register char * this_entry;
00419 
00420     l = 0; r = nentries-1;
00421     while( l < r ) {
00422       m = (l + r) / 2;
00423       this_entry = tbl[m].rt_txt;
00424       if( *this_entry < *tokenbuf ||
00425           (*this_entry == *tokenbuf && strcmp(this_entry, tokenbuf) < 0) )
00426         l = m + 1;
00427       else
00428         r = m;
00429     }
00430 
00431     if( strcmp(tbl[l].rt_txt,tokenbuf) == 0 )
00432       return( tbl[l].rt_val );
00433     else {
00434       yylval = stt_enter( tokenbuf, tokenlgth );
00435       return( dflt );
00436     }
00437 }
00438 
00439 
00440 /*
00441  * push a token back onto the input
00442  */
00443 
00444 yyunlex( tok )
00445 int tok;
00446 {
00447     if( stktop < (STKSIZE-1) )
00448     scanstk[++stktop] = tok;
00449     else
00450     yyperror("Compiler Error: scanner stack overflow");
00451 
00452 }
00453 

Generated on Fri Jan 25 10:39:46 2008 for russell by  doxygen 1.5.4