Help me improve a lexical analyzer C++ code

Status
Not open for further replies.

neerom

Newbie level 1
Joined
Apr 29, 2006
Messages
1
Helped
0
Reputation
0
Reaction score
0
Trophy points
1,281
Activity points
1,406
lexical analyser

will u please help me with this code..... m not satisfied with this code...written in c++... but want to improve this code...please help me...

#include<iostream.h>
#include<conio.h>
#include<stdio.h>
#include<fstream.h>
#include<io.h>
#include<ctype.h>
#include<string.h>
#include<stdlib.h>

void readch();
void scanner();
void check();
void update(int);
void printerror();
void parser();
void whilest();
void forst();
void ifst();
void expression();
void statement();
void assignst();
//void printerror();
void inputst();
void outputst();
void subexpression();
void term();
void factor();
void declaration();
void variable();
enum basic_symbol{inc,plus,dec1,minus,divide,mul,semicol,lesseq,less,grteq,
great,equal,assign,stbrt,endbrt,stbrac,endbrac,noteq,not,
per,coma,and,or,ifsym,elsesym,intsym,fltsym,forsym,cinsym
,strconst,charsym,coutsym,whilesym,ident,intconst,charconst
,stringconst,apos,illeg,stringsym,startsym,illegst,coutop,
cinop,stsq,endsq};

basic_symbol token;
basic_symbol rwtable_token[11]={ifsym,forsym,intsym,cinsym,elsesym,charsym,coutsym,
whilesym,fltsym,stringsym,startsym};







char rwtable_string[11][11]={"if","for","int","cin","else","char","cout","while"
,"float","string","program" };
char ch;
int guidtable[9]={0,0,0,1,4,7,9,10,11};
int linenumber=0;
char name[50];
int intval;
char cval;
char stval[50];
ifstream f;
int flag;
char errortable [25][60]={"Error........starting brace missing {",
"Error........ending brace missing }",
"Error........start bracket missing (",
"Error........ending bracket missing )",
"Error........start symbol missing ",
"Error........Assign operator missing =",
"Error........Semicolon Missing ;",
"Error........Identifier missing ",
"Error........end array operator missing ]",
"Error........cout operator missing >>",
"Error........cin operator missing <<",
"Error........Extra Token ",
"Error........illegal Token ",
"Error........Wrong expression ",
"Error........No Initial Value Assigned ",
"Error........Inccremental Statement Missing",
"Error........No Conditional Statement ",
"Error........Multiple Declaration ",
"Error........Undefined Identifier ",
"Error........Array index out of bound ",
"Error........Wrong Array Index ",
"Error........Illegal Use of Array "
};



int ident_in=0;
enum id {var,constant,fun};
int var_flag=1; //it is used to check if there is a variable or function
int pos=1; //that will inc the no of position of node
char ident_n[30]; //name os ident
char t[6]; //type of the ident
int array_i=-1; // index of the array
id id_temp; //
basic_symbol id_kind; //use to store the type of identifier
int blk;
int array_insert=0;
struct node
{
char ident_name[30];
char type[6];
int array_index;
int isdec;
id i_id;
node *next;
int block;
};


class linklist
{
private:
node *head;
node *pointertopos(int);
int count;
public:
linklist();
void insert();
void multipleCheck();
void declared();
void print();
void checkBound();

} ;


linklist::linklist()
{
head=NULL;
count=0;
}

void linklist::insert()
{
if(pos>0&&pos<=count+1)
{
ident_in++;
if(pos==1)
{
node *x=new node;
x->next=head;
head=x;
strcpy(head->ident_name,ident_n);

//****for the defination of identifier*****//
if(id_kind==intsym)
strcpy(head->type,"int");
if(id_kind==charsym)
strcpy(head->type,"char");
if(id_kind==stringsym)
strcpy(head->type,"string");
head->isdec=1;
head->block=blk;
if(array_i!=-1)
head->array_index=array_i;
if(var_flag==0)
head->i_id=var;
else
head->i_id=fun;
x=NULL;
count++;
pos++;
strcpy(ident_n," ");
}
else
{
node *x=new node;
node *y=pointertopos(pos-1);
x->next=y->next;
y->next=x;

strcpy(x->ident_name,ident_n);
if(id_kind==intsym)
strcpy(x->type,"int");
if(id_kind==charsym)
strcpy(x->type,"char");
if(id_kind==stringsym)
strcpy(x->type,"string");
x->isdec=1;
x->block=blk;

//strcpy->type=id_kind;
if(array_i!=-1)
x->array_index=array_i;
if(var_flag==0)
x->i_id=var;
else
x->i_id=fun;
strcpy(ident_n," ");
//x->data=ele;
count++;
x=NULL;
y=NULL;
pos++;
}
}
else
cout<< "Wrong Pos"<<endl;
}

node* linklist:ointertopos(int pos)
{
node * temp=head;
for(int c=1;c<pos;c++)
temp=temp->next;
return temp;
}

void linklist::checkBound()
{
node *temp=head;

if(array_insert>0)
{

while(temp==head)
{

if(strcmp(temp->ident_name,ident_n)==0)
{
if(temp->array_index>=array_i)
{
if(array_i>=0)
break;
else
update(20);
}
else
update(19);
}
temp=temp->next;
}
}
}

void linklist::declared()
{

node * temp=head;
if(ident_in>0)
{
while(temp!=NULL)
{

if(strcmp(temp->ident_name,ident_n)==0)
{

if(temp->isdec==1)
break;
else
update(18);
}
else
update(18);

temp=temp->next;
}

}
}
void linklist::multipleCheck()
{
node * temp=head;

if(ident_in>0)
{
while(temp!=NULL)
{
if(strcmp(temp->ident_name,ident_n)==0)
{
if( (id_kind==intsym&&(strcmp(temp->type,"int")==0)) || (id_kind==charsym&&(strcmp(temp->type,"char")==0))||(id_kind==stringsym&&(strcmp(temp->type,"string")==0)))
update(17);
}
temp=temp->next;
}
}
}

void linklist:rint()
{
node * temp=head;
while(temp!=NULL)
{
cout<<" "<<temp->ident_name<<endl;
cout<<" "<<temp->type<<endl;
cout<<" "<<temp->array_index<<endl;
if(temp->i_id==var)
cout<<" "<<"variable"<<endl;
else
cout<<" "<<"fun"<<endl;
temp=temp->next;
}
}


linklist var_list;


void main()
{

f.open("c:\\abc.txt",ios::in);

if(!f)
{cout<<"file not found";
getch();}
else
{
readch();
scanner();
parser();
//var_list.print();
getch();
}
}

struct error
{
int errno[25];
int lineno[25];
};
int errorcount=0;
error array;

void update(int n)
{
//flag++;
array.errno[errorcount]=n;
array.lineno[errorcount]=linenumber;
errorcount++;
}

void printerror()
{
if(errorcount!=0)
{
int temp;
cout<<"\n\nFile compiled Total number of errors are : "<<errorcount<<endl;
cout<<"\t\t*****************************************\n\n";
for(int i=0;i<errorcount;i++)
{
temp=array.errno;
cout<<errortable[temp]<<" in line "<<array.lineno<<endl;
}
}
else
cout<<"\n\n\nFile Compiled No Errors"<<endl;
cout<<"\n\t\t*****************************************\n"<<endl;
cout<<"\t\t\t\t\t\t(c) DIOS Compiler"<<endl;
cout<<"\t\t\t\t\t\tdios@hotmail.com"<<endl;


}

void readch()
{
f.get(ch);
}

int allow_checking=0;
void scanner()
{
int a;
if(ch==eof(a))
token=illeg;
while(ch==' '||ch=='\t'||ch=='\n')
if(ch=='\n')
{
linenumber++;
readch();
}
else
readch();

//for a digit
if(isdigit(ch))
{
int i=0;
name=ch;
i++;
readch();
while(isdigit(ch))
{
name=ch;
readch();
i++;
}
name='\0';
intval=atoi(name);
token=intconst;

}
else
{
// for an identifier
if(isalpha(ch)||ch=='_')
{
int index=0;
name[index]=ch;
index++;
readch();
while(isalpha(ch)||isdigit(ch)||(ch=='_'))
{
name[index]=ch;
readch();
index++;
}
name[index]='\0';
token=ident;
int no=strlen(name);
for( int i=guidtable[no] ; i<guidtable[no+1] ; i++)
if(strcmp(name,rwtable_string)==0)
{token=rwtable_token;
break;}
if(token==ident)
{

strcpy(ident_n,name);
if(ident_in>0&&allow_checking==0)
var_list.declared();
else
allow_checking=0;
//else
//update(18);
}
}


else{
//for operators

switch(ch)
{

case '[':
readch();
token=stsq;
break;
case ']':
readch();
token=endsq;
break;
case '"':

readch();
int i=0;
token=stringconst;
while(ch!='"')
{
stval=ch;
readch();
i++;
if(ch=='\n')
{
token=illeg;
update(12);
break;
}
}
stval='\0';
readch();
break;
case '+':

readch();
if(ch=='+')
{
token=inc;
readch();
}
else
token=plus;
break;

case '\'':
token=charconst;
readch();
cval=ch;
readch();
while(ch!='\'')
{
token=illeg;
update(12);
readch();
}
readch();
break;
case '-':

readch();
if(ch=='-')
token=dec1;

else
token=minus;
break;

case '/':
readch();
if(ch=='/')
{ while(ch!='\n')
{
readch();
}
scanner();
}
else
token=divide;
break;
case '*':

token=mul;
readch();
break;
case ';':

token=semicol;
readch();
break;
case '<':

readch();
token=less;
if(ch=='=')
{
token=lesseq;
readch();
}

if(ch=='<')
{
token=coutop;
readch();
}

break;
case '>':

readch();
token=great;
if(ch=='=')
{
token=grteq;
readch();
}
if(ch=='>')
{
token=cinop;
readch();
}
break;
case '=':

readch();
token=assign;
if(ch=='=')
{
token=equal;
readch();
}
//token=assign;
break;
case '(':

token=stbrt;
readch();
break;
case ')':

token=endbrt;
readch();
break;
case '{':

token=stbrac;
readch();
break;
case '}':

token=endbrac;
readch();
break;
case '!':

readch();
if(ch=='=')
token=noteq;
else
token=not;
break;

case '%':

token=per;
readch();
break;

case ',':

token=coma;
readch();
break;

case '&':

readch();
if(ch=='&')
token=and;
break;

case '|':

readch();
if(ch=='|')
token=or;
break;
case '@':
case '?':
case '$':
case '~':
case '#':
case '^':
token=illeg;
update(12);
readch();
scanner();
break;
}
}
}

}
int allow_inserting_of_array=0;

void parser()
{
if(token==startsym)
scanner();
else
{
update(4);
}
if(token==stbrt)
scanner();
else
{
update(2);
}
if(token==endbrt)
scanner();
else
{
update(3);
}
blk++;
if(token==stbrac)
scanner();
while(token==intsym||token==charsym||token==stringsym||token==forsym
||token==ifsym||token==whilesym||token==cinsym||token==coutsym
||token==ident)
{
//use for the delaration
if(token==intsym||token==charsym||token==stringsym)
{
allow_checking=1;
allow_inserting_of_array++;
id_kind=token;//use for semantic
scanner();
declaration();

}

statement();

}
scanner(); //*/*//*/*/************************************
//using for all other statement


if(token!=endbrac)
{
update(1);
}
blk--;
printerror();
}


int arraychk=0;
void declaration()
{


if(token==ident)
{
strcpy(ident_n,name); //use for semantic
scanner();
if(token==stbrt)
var_flag=1;
variable();
}
else
{
update(14);
}
var_list.multipleCheck();
if(arraychk==0)
var_list.insert();
if(token==coma)
{
scanner();
if(token==ident)
{
scanner();
variable();
if(token==coma)
{
scanner();
if(token==ident) scanner();
variable();
if(token==coma){
scanner();
if(token==ident) {scanner();variable();}
else
update(7);
}
else
update(7); }
}
else
update(7);
}



if(token==semicol)
scanner();
else
{
update(6);
}


}


void variable()
{

if(token==stsq)
{

arraychk=1;
scanner();
array_i=intval;
var_list.checkBound();
if(token==ident||token==intconst)
subexpression();
else
update(21);
if(arraychk==1&&allow_inserting_of_array==1)
{
var_list.insert();
allow_inserting_of_array=0;
}
if(token==endsq)
scanner();/////////scanner look ahead
else
{
update(8);
}
array_insert++;

}
}

void statement()
{ ///for assignst
int control =0;//chaecking wheather there is any statement or not to handle semi col

if(token==ident)
{
scanner();
if(token==inc||token==dec1)
{
scanner();
control=1;
}
else
{
assignst(); //for assign st

}
}
if(token==ifsym)
{
scanner();
ifst();
control=0;
}
if(token==forsym)
{
scanner();
forst();
//print();
control=0;
}
if(token==whilesym)
{

scanner();
whilest();

control=0;
}
if(token==coutsym)
{
scanner();
outputst();
control=1;
}
if(token==cinsym)
{
scanner();
inputst();
control=1;
}
if(token==stbrac)
{

scanner();
while(token==forsym||token==ifsym||token==whilesym||
token==cinsym||token==coutsym||token==ident||token==intsym||
token==charsym||token==stringsym)
{
if(token==intsym||token==charsym||token==stringsym)
{
scanner();
declaration();
}
statement();
}
if(token==endbrac)
scanner();
else
{
update(1);
}

}

if(control==1)
{
if(token==semicol)
scanner();
else
{
update(6);
}
}

}



void assignst()
{
variable();

if(token==assign)
scanner();

else
{
update(5);
}

expression();
if(token==semicol)
scanner();
else
update(6);



}


void forst()
{


if(token==stbrt)
scanner();
else
{
update(2);
}

if(token==ident)
{
scanner();
if(token==assign)
assignst();

}
else
if(token==intconst)
scanner();
else
{
update(14);
}
/* if(token==semicol)
scanner();
else
{
update(6);
}*/

if(token==ident||token==intconst)
expression();
else
update(16);

if(token==semicol)
scanner();
else
{
update(6);
}

if(token==ident)
{
scanner();
if(token==assign)
assignst();
else
if(token==inc||token==dec1)
scanner();

}
else
if(token==intconst)
scanner();
else
{
update(15);
}
if(token==endbrt)
scanner();
else
{
update(3);
}
statement(); //there can be a++ in the loop then*********
/*if(token==intsym||token==charsym||token==stringsym)
{
scanner();
declaration();
} */



}


void whilest()
{

if(token==stbrt)
scanner();
else
{
scanner();
if(token==stbrt)
{
update(11);
scanner();
}
else
{
update(2);
}
}
expression();

if(token==endbrt)
scanner();
else
{
update(3);
}

statement();
/*if(token==intsym||token==charsym||token==stringsym)
{
scanner();
declaration();
}*/

}

void outputst()
{
if(token==coutop)
scanner();
else
{
update(9);
}
if(token==stringconst)
scanner();
else
expression();
}

void inputst()
{
if(token==cinop)
scanner();
else
{
update(10);
}
if(token==ident)
{
scanner();
variable();
}
else
{
update(7);
}

}

void expression()
{

subexpression();

while(token==less||token==lesseq||token==great||token==grteq||token==equal
||token==noteq)
{
scanner();
if(token==ident||token==intconst||token==charconst)
subexpression();
else
{
update(13);
}
}
if(token==inc||token==dec1)
scanner();

}

void subexpression()
{

term();

while(token==plus||token==minus||token==or)
{
scanner();
term();
}
}

void term()
{

factor();

while(token==mul||token==divide||token==and||token==per)
{
scanner();
factor();
}

}


void factor()
{
if(token==ident)
{
scanner();
variable();
}
if(token==stringconst||token==charconst||token==intconst)
scanner();
if(token==stbrt)
{
scanner();
expression();
if(token==endbrt)
scanner();
else
{
update(3);
}
}
if(token==not)
{
scanner();
factor();
}
}


void ifst()
{


if(token==stbrt)
scanner();
else
{
update(2);
}

expression();

if(token==endbrt)
scanner();
else
{
update(3);
}


statement();
if(token==intsym||token==charsym||token==stringsym)
{
scanner();
declaration();
}
if(token==elsesym)
{
scanner();
statement();
if(token==intsym||token==charsym||token==stringsym)
{
scanner();
declaration();
}
}
}
 

Re: lexical analyser

IMO!

It seems to me that you are expecting people to do the work for you. If you want to modify the code for greater efficiency you first need to understand it, well. Then when you do, read as many articles on the subject - then and only then return to the problem and your question will be answered.

There’s a book on my shelf called "modern compiler implementation in C" 0-521-58653-4 which contains information about lexical analyzers and may help you better understand your problem. I’m sure there’s also lots of free content on Google with regards to this.
 

Status
Not open for further replies.
Cookies are required to use this site. You must accept them to continue using the site. Learn more…