//here is my code
#include<iostream>
#include<conio.h>
#include<string>
#include<fstream>
using namespace std;
ifstream ifs("input.txt");
ofstream ofs("output.txt");
ofstream efs("error.txt");
class token{ //THIS CLASS CONATINS TOKEN
private:
string ttype;
string tvalue;
public:
token(){
ttype = "error";
tvalue = "\0";
}
void settoken(string type, string val){
ttype = type;
tvalue = val;
}
string gettype(){
return ttype;
}
void print(){
cout << "\ntoken found :" << "\t" << tvalue << "\t" << "Token Type:" << "\t" << ttype << "\n";
ofs << "\ntoken found :" << "\t" << tvalue << "\t" << "Token Type:" << "\t" << ttype << "\n";
}
};
class lexical{ //THIS CLASS CONTAINS ALL FUNCTIONALITIES TO BE PERFORMED
private:
token tokn[100]; //ARRAY CONTAINING ALL TOKENS
string table1[100]; //HASH TABLE ARRAY
string ttypes[6]; //BASIC TOKEN TYPES
int ind ;
public:
lexical(){
ind=0;
ttypes[0] = "Keyword" ;
ttypes[1] = "Comments" ;
ttypes[2] = "Operators" ;
ttypes[3] = "Number" ;
ttypes[4] = "ID" ;
ttypes[5] = "Braket" ;
}
void setchunk(string ch){ //BASIC FUNCTION THAT CREATES TOKEN
int i = 0,j,tf=0;
static int f = 0; //FLAG FOR MULTI LINE COMMENTS END
static int ln = 0; //LINE NO
ln++;
string str = ""; //IT CONTAINS ANY ID OR ERROR
while (i < ch.size()){ //THIS LOOP CHECKS ALL TOKEN TYPES EXCEPT ID BECAUSE ID CONFLICTS WITH KEYWORD
if (f != 1){
str = "";
tf = 0;
for (; i < ch.size(); i++)
{
if (iskeyword(str)){
tf = 1; //this variable is used for token indexing problems
str = "";
break;
}
if (iscomment(&ch[i], &i, &f)){
tf = 1;
break;
}
else if (isoperator(&ch[i], &i)){
tf = 1;
break;
}
if (isbraket(&ch[i])){
tf = 1;
i++;
break;
}
else if (isnumber(&ch[i], &i)){
tf = 1;
break;
}
else if (ch[i] == ' ' || ch[i] == '\t'){
i++;
break;
}
str = str + ch[i];
}
j = 0;
while (1){ //THIS LOOP CHECKS AND SAPARATES THE ID AND ERROR
if (isID(str, &j,tf)){}
if (str != "" &&str[j] != '\n'&&j != str.size()){ //CHECKS FOR ERROR
efs << "\n" << str[j] << "\t invalid char at Line no :" << "\t" << ln << "\n"; //put error in error file
j++;
}
else
break; //EXITS WHILE LOOP WHEN STRING IS EMPTY
}
}
else{ //ELSE FOR MULTI LINE COMMENTS FLAG
if (ch[i] == '*'&&ch[i + 1] == '/'){
f=iscomment(&ch[i], &i, &f);
f = 0;
i++;
}
i++;
}
}
}
int hash(string st){ //SIMPLE HASH FUNCTION
int sum = 0,i;
for (i = 0; i < st.size(); i++){
sum = sum + st[i];
}
i=(sum % 100);
return i;
}
bool iscomment(char *ch, int *ij, int *f){ //CHECK FOR COMMENT SINGLE LINE + MULTI LINE
string st = "";
int i = 0;
int state = 0;
if (*f == 1){
state = 6;
}
while (1){
switch (state){
case 0:
if (ch[i] == '/'){
i++;
state = 1;
}
else
return false;
break;
case 1:
if (ch[i] == '/'){
i++;
state = 2;
}
else if (ch[i] == '*'){
state = 4;
i++;
}
else
return false;
break;
case 2:
while (ch[i] != '\n'){
i++;
}
if (ch[i] == '\n'){
i++;
state = 3;
}
else
return false;
break;
case 3:
tokn[ind].settoken("Comments ", "//--------");
ind++;
*ij = *ij + i;
return true;
break;
case 4:
while (ch[i] != '*' && ch[i] != '\n'){
i++;
}
if (ch[i] == '*'){
i++;
state = 5;
}
else if (ch[i] == '\n'){
*f = 1;
*ij = *ij + i;
return true;
}
else
return false;
break;
case 5:
if (ch[i] == '/'){
i++;
state = 6;
}
else
state = 4;
break;
case 6:
tokn[ind].settoken("Comments ML", "/*--------*/");
ind++;
*ij = *ij + i;
return true;
break;
}
}
}
void keytbl(){ //ADDS ALL KEYWORDS TO HASH TABLE
string keyword[] = { "CONST", "FLOAT", "INT", "DEAN", "BREAK", "CONTINUE", "ELSE", "FOR", "SWITCH", "VOID", "CASE", "ENUM", "SIZEOF", "TYPEDEF", "CHAR", "DO", "IF", "RETURN", "UNION", "WHILE", "UCP", "HOD", "and", "or" };
int j = 0;
for (int i = 0; i < 24; i++){
j = hash(keyword[i]);
while (table1[j] != "\0"){
j++;
}
table1[j] = keyword[i];
}
}
bool isoperator(char *ch, int *ij){ //CHECKS FOR OPERATOR MATCH
string st, c;
int j = 0, f = 0;
string operators[] = { ">>>", "<<<", "|&","&|", "++", "--", "&&", "!=", "<>", ":=", "==", "*", "+", "/", "<", ">", "-", "+=","-=", ";","||" };
for (int i = 0; i < 21; i++){
st = operators[i];
for (int im=0; im < st.size(); im++){
if (ch[im] != st[im]){
f = 0;
break;
}
else
f = 1;
}
if (f == 1){
tokn[ind].settoken("operator", st);
ind++;
*ij = *ij + st.size();
return true;
}
}
return false;
}
bool isbraket(char *ch){ //CHECKS BRACKET
string st = "";
st = st + *ch;
char brakets[] = { '{', '}', '(', ')', '[', ']' ,'"','"' };
for (int i = 0; i < 8; i++){
if (brakets[i] == *ch){
tokn[ind].settoken("Braket", st);
ind++;
return true;
}
}
return false;
}
bool isnumber(char *ch, int *ij){ //CHECKS FOR NUMBER
int i = 0, state = 0;
string st = "";
while (1){
switch (state){
case 0:
if (isdigit(ch[i])){
st = st + ch[i];
i++;
state = 2;
}
else if (ch[i] == '.')
{
st = st + ch[i];
i++;
state = 1;
}
else
return false;
break;
case 1:
while (isdigit(ch[i])){
st = st + ch[i];
i++;
}
if (ch[i] == '.')
{
return false;
}
else
state = 5;
break;
case 2:
while (isdigit(ch[i])){
st = st + ch[i];
i++;
}
if (ch[i] == '.')
{
st = st + ch[i];
i++;
state = 3;
}
else
state = 5;
break;
case 3:
if (isdigit(ch[i]))
{
st = st + ch[i];
i++;
state = 4;
}
else
return false;
break;
case 4:
while (isdigit(ch[i])){
st = st + ch[i];
i++;
}
if (ch[i] == '.')
{
return false;
}
else
state = 5;
break;
case 5:
tokn[ind].settoken("Number", st);
ind++;
*ij = *ij + st.size();
return true;
break;
}
}
}
void printtkn(int *n){ //PRINTS ALL TOKENS IN ONE LINE
int i = *n+1;
if (i==1){ i = i - 1; }
for (; i < (ind ); i++){
tokn[i].print();
}
*n = i-1;
cout << "\n\n";
}
int isID(string ch, int *j,int tf){ //CHECKS ID MATCH
int i = *j, state = 1;
bool f = 0;
int dc=0; //Checks if atleast one digit is present in the ID
string st = "";
while (1){
switch (state){
case 1:
if (isletter(ch[i])){
st = st + ch[i];
i++;
state = 2;
}
else if (ch[i] == '_'){
st = st + ch[i];
i++;
state = 3;
}
else if(isdigit(ch[i]))
{
dc=1;
st = st + ch[i];
i++;
state = 2;
}
else{
*j = i;
return false;
}
break;
case 2:
while (isletter(ch[i]) || isdigit(ch[i])){
if(isdigit(ch[i]))
{
dc=1;
}
st = st + ch[i];
i++;
}
if (ch[i] == '_'){
st = st + ch[i];
i++;
state == 3;
}
else{
state = 4;
break;
}
case 3:
if (isletter(ch[i]) || isdigit(ch[i])){
if(isdigit(ch[i]))
{
dc=1;
}
st = st + ch[i];
i++;
state = 2;
}
else if (ch[i] == '_'){
*j = i;
return false;
}
else
state = 4;
break;
case 4:
int check=ch.size();
if(ch[check] == '_'|| dc==0)
{
*j = i;
return false;
}
else
{
if (tf == 1){
tokn[ind] = tokn[ind - 1];
tokn[ind - 1].settoken("ID", st);
}
else
tokn[ind].settoken("ID", st);
ind++;
*j = i;
return true;
}
}
}
}
bool isletter(char ch){
if ((('z' >= ch) && ('a' <= ch)) || (('Z' >= ch) && ('A' <= ch))){
return true;
}
return false;
}
bool isdigit(char ch){
char number[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
for (int v = 0; v < 10; v++){
if (ch == number[v]){
return true;
}
}
return false;
}
bool iskeyword(string ch){ //CHECKS FOR KEYWORD
string st, c;
if (ch != ""){
int j = hash(ch);
for (int i = 0; i < 4; i++){
st = table1[j];
if (ch == st){
tokn[ind].settoken("Keyword", st);
ind++;
return true;
}
j++;
}
}
return false;
}
};
void main()
{
lexical le;
static int n = 0; //for line by line token printing
int line = 0;
string strline;
le.keytbl(); //add all keywords in hash table
while (!ifs.eof())
{
getline(ifs, strline, '\n'); //reading one line of file
line++;
ofs << "---------\n";
ofs << line << "\n"; //printing line no
strline = strline + "\n";
if (strline[0] != '\n'){ //checking that line isn,t empty
cout << strline << "\n";
le.setchunk(strline);
le.printtkn(&n);
}
}
}