This grammar produces a scanner that will be able to recognize most C source files. Each symbol in the source file is identified and labeled according to it's token in the grammar. The symbol and line number that it appears on, is reported as well.
comment "/*"([^*]|"*"+[^*/])*"*"*"*/"
ws [ \t\v\r\f]+
identifier [A-Za-z_][A-Za-z0-9_]*
dec [0-9]
decimalint [1-9][0-9]*
octalint 0[0-7]*
hexint 0[xX][0-9a-fA-F]+
integer ({decimalint})|({octalint})|({hexint})
exp [Ee][+-]?{dec}+
float1 {dec}+{exp}
float2 {dec}*"."{dec}+({exp})?
float3 {dec}+"."{dec}*({exp})?
float ({float1})|({float2})|({float3})
escapechar \\[ntvbrfa\\'"0]
print1 [ -&(-[\]-~]
print2 [ -!#-[\]-~]
character '({escapechar}|{print1})'
string \"({print2}|{escapechar})*\"
%x com
%{
#include
static int lineNo = 1;
void count();
%}
%%
"/*" { BEGIN com; }
. { }
"*/" { BEGIN (0); }
"\n" { lineNo++; }
^#.* { }
"auto" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"break" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"case" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"char" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"const" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"continue" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"default" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"do" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"double" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"else" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"enum" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"extern" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"float" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"for" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"goto" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"if" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"int" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"long" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"register" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"return" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"short" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"signed" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"sizeof" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"static" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"struct" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"switch" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"typedef" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"union" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"unsigned" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"void" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"volatile" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
"while" { count(); printf("%-15.15s\t\t%s\n", "RESERVED", yytext); }
{ws} { }
{identifier} { count(); printf("%-15.15s\t\t%s\n", "IDENTIFIER", yytext); }
{integer} { count(); printf("%-15.15s\t\t%s\n", "INTEGER", yytext); }
{float} { count(); printf("%-15.15sFLOAT\t\t%s\n", "FLOAT", yytext); }
{character} { count(); printf("%-15.15s\t\t%s\n", "CHARACTER", yytext); }
{string} { count(); printf("%-15.15s\t\t%s\n", "STRING", yytext); }
"\n" { lineNo++; }
">>=" { count(); printf("%-15.15s\t\t%s\n", "RIGHT_ASSIGN", yytext); }
"<<=" { count(); printf("%-15.15s\t\t%s\n", "LEFT_ASSIGN", yytext); }
"+=" { count(); printf("%-15.15s\t\t%s\n", "ADD_ASSIGN)", yytext); }
"-=" { count(); printf("%-15.15s\t\t%s\n", "SUB_ASSIGN", yytext); }
"*=" { count(); printf("%-15.15s\t\t%s\n", "MUL_ASSIGN", yytext); }
"/=" { count(); printf("%-15.15s\t\t%s\n", "DIV_ASSIGN", yytext); }
"%=" { count(); printf("%-15.15s\t\t%s\n", "MOD_ASSIGN", yytext); }
"&=" { count(); printf("%-15.15s\t\t%s\n", "AND_ASSIGN", yytext); }
"^=" { count(); printf("%-15.15s\t\t%s\n", "XOR_ASSIGN", yytext); }
"|=" { count(); printf("%-15.15s\t\t%s\n", "OR_ASSIGN", yytext); }
">>" { count(); printf("%-15.15s\t\t%s\n", "RIGHT_OP", yytext); }
"<<" { count(); printf("%-15.15s\t\t%s\n", "LEFT_OP", yytext); }
"++" { count(); printf("%-15.15s\t\t%s\n", "INC_OP", yytext); }
"--" { count(); printf("%-15.15s\t\t%s\n", "DEC_OP", yytext); }
"->" { count(); printf("%-15.15s\t\t%s\n", "PTR_OP", yytext); }
"&&" { count(); printf("%-15.15s\t\t%s\n", "AND_OP", yytext); }
"||" { count(); printf("%-15.15s\t\t%s\n", "OR_OP", yytext); }
"<=" { count(); printf("%-15.15s\t\t%s\n", "LE_OP", yytext); }
">=" { count(); printf("%-15.15s\t\t%s\n", "GE_OP", yytext); }
"==" { count(); printf("%-15.15s\t\t%s\n", "EQ_OP", yytext); }
"!=" { count(); printf("%-15.15s\t\t%s\n", "NE_OP", yytext); }
";" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"{" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"}" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"," { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
":" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"=" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"(" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
")" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"[" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"]" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"." { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"&" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"!" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"~" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"-" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"+" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"*" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"/" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"%" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"<" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
">" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"^" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"|" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
"?" { count(); printf("%-15.15s\t\t%s\n", "PUNCTUATION", yytext); }
. { printf("%-15.15s%s\n", "LEXICAL ERROR", yytext); }
%%
void main()
{
yylex();
}
int yywrap() {return 1;}
void count()
{
printf("%s%d\t", "On line #", lineNo);
}
Copyright (c) 1992-1998 David M Shean