473,836 Members | 1,510 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

A simple parser

Hi guys

I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.

-------------------------------------------------------------cut here

/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1

// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}

// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(in t c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}

// Just prints the function name
static int PrintFunction(F ILE *f)
{
printf("Functio n %s found line %d ...",IdBuffer,l ine);
return Fgetc(f);
}

// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c ,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer[i] = 0;
return c;
}
static int ParseString(FIL E *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}

static int ParseComment(FI LE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment (FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}

// Skips white space and comments
static int SkipWhiteSpace( int c,FILE *f) {
if (c ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
}
}
return c;
}

// Skips chars between simple quotes
static int ParseQuotedChar (FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",arg v[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
break;
case '\'':
c = ParseQuotedChar (f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|le vel) == 0) {
c = SkipWhiteSpace( c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f );
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c, 1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}
Oct 14 '06
121 6571
On Sun, 15 Oct 2006 22:49:22 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>my first reaction (to an article asking for code crits) is to run the code
through a compiler - and I might not even read it first, especially if it's
long. The resulting compiler diagnostics give me a place to start the crit.
Thats fine. But if you plan to do that, you need to accept that C99 is
around, that people use its features, and that you can't play Canute
and insist they stop. Therefore you have to eliminate C99/C89 issues
from your response, either by manually tuning them out or by using a
compiler that supports a reasonable subset of C99. Frankly ts stupid
and arrogant of you, not to say a bloody waste of bandwidth, to deny
the existence of C89.

--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #91
On Mon, 16 Oct 2006 02:01:57 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>Keith Thompson said:
>>
Which explains your mistake, sir.

I remain to be convinced that it was a mistake.
Thats obvious, but frankly, thats part of the problem - that you're
not prepared to step away from the fight and look at the issue
dispassionately .

I strongly suggest you step back and take stock.
--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #92
On Mon, 16 Oct 2006 03:55:37 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>Keith Thompson said:

<snip>
gratuitous snip of the actual point of the post. You're behaving like
a very stupid person.
--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #93
On Mon, 16 Oct 2006 14:47:59 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>Tak-Shing Chan said:
>On Mon, 16 Oct 2006, Richard Heathfield wrote:
>>Tak-Shing Chan said:

I believe that you are confusing portability with backward
compatibilit y.

Your beliefs about my intent are always amusing.

Pray tell, where did I say /anything/ about your intent?

Whatever.
You know what? I've had enough of you. You've behaved atrociously in
this thread, childish and bullying by turns, rude and petty and silly.
Welcome to my killfile.

--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #94
Mark McIntyre <ma**********@s pamcop.netwrite s:
On Sun, 15 Oct 2006 23:36:08 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>Richard said:
[..]
>>Err, for the C99 features I use (primarily localised declarations),
gcc. I thought I mentioned that?

In order to get gcc to support those features, one must invoke it in a
non-conforming mode.

This is false and you know it.
No, he's right on this point.

gcc has a mode in which it's reasonably (but not perfectly) conforming
to C90. It has no mode in which it's as conforming to C99.

gcc supports "//" comments and mixed declarations and statements
either as gcc-specific extensions, or as part of its incomplete C99
support. The only way to get gcc to accept those features
(non-standard in C90, standard in C99) is to invoke it in a mode that
does not conform to any standard. Such a mode causes it to accept
*other* extensions as well; in other words, it will then fail to
diagnose a number of non-C90 constructs.

If gcc allowed you to accept "//" comments and mixed declarations and
statements while still rejecting all other non-C90 extensions, it
might be a different story. If there were a list of C99 features that
are supported by all current C compilers (a least common denominator
bigger than strict C90 but smaller than full C99), *and* a way to
invoke gcc so that it accepts those features and no others, then it
might be possible to write portable code that uses some of C99. But,
as far as I know, gcc's command-line options do not allow that kind of
fine-grained control.

Many (most?) other C compilers also support some C99 features, but not
necessarily the same set that gcc supports. If I compile my code with
gcc, using a mode in which it accepts "//" comments, it will fail to
diagnose accidental use of C99 features that might not be supported by
some other compiler. The *only* reliable way to guarantee maximal
portability is to invoke gcc is strict C90-conforming mode; any code
that passes that (barring compiler bugs) should be portable to any
C90-conforming implementation. (Well, that's not *quite* true; I
don't think gcc will complain about "int i = 33000;".)

Is there a table somewhere showing exactly which C99 features are
supported by which implementations ? I know that such a list exists
for gcc, but a table for multiple implementations could give us a
better idea of what's really portable. And if such a table existed
and were generally known, it just might advance the goal of C99
conformance across the board.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 17 '06 #95
Mark McIntyre <ma**********@s pamcop.netwrite s:
On Mon, 16 Oct 2006 14:47:59 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>Tak-Shing Chan said:
>>On Mon, 16 Oct 2006, Richard Heathfield wrote:

Tak-Shing Chan said:

I believe that you are confusing portability with backward
compatibili ty.

Your beliefs about my intent are always amusing.

Pray tell, where did I say /anything/ about your intent?

Whatever.

You know what? I've had enough of you. You've behaved atrociously in
this thread, childish and bullying by turns, rude and petty and silly.
Welcome to my killfile.
That's entirely up to you, of course, but you might consider just
killing this thread and moving on. Richard has made, and continues to
make, a great contribution to this newsgroup. If you killfile him
over his behavior in this one thread, it will be, in my opinion, your
loss.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 17 '06 #96
jacob navia wrote:
>>Are you sure that's strictly conforming?

From n1124 6.9.1p6:

"If the declarator includes an identifier list, each declaration in the
declaration list shall have at least one declarator, those declarators
shall declare only identifiers from the identifier list, and every
identifier in the identifier list shall be declared."

"e[sizeof (enum e { e1 = 3 })]" is a declarator, and declares two
identifiers not in the identifier list, does it not?


Hehe, you're right, which makes several other compilers not conforming
in this respect 8-) I suspect the paragraph in question was not
intended to apply to those identifiers, but I could be wrong.

To my chagrin, I should probably fix my testsuite.

Error tb.c: 2 declared parameter 'e1' is missing

Phew !!!!

:-)
Rereading, I'm inclined back to my original reading. "those
declarators" I pretty clearly doesn't cover e1 here - e1
is not declared in the "declaratio n-list", it's declared in an
expression embedded in such a declaration.

e1 definitely isn't a declared as a parameter.
Oct 17 '06 #97
Keith Thompson wrote:
But that's not what I asked about. I asked if there are any *major
areas* in which gcc fails to conform to C90.
I answered - the whole area of constant expressions. It's probably
the hardest part of the standard to get right and definitely the
weakest area of most compilers. I think only EDG gets it right.
Oct 17 '06 #98
On Tue, 17 Oct 2006 21:09:45 GMT, in comp.lang.c , Keith Thompson
<ks***@mib.orgw rote:
>Mark McIntyre <ma**********@s pamcop.netwrite s:
>On Sun, 15 Oct 2006 23:36:08 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>>Richard said:
[..]
>>>Err, for the C99 features I use (primarily localised declarations),
gcc. I thought I mentioned that?

In order to get gcc to support those features, one must invoke it in a
non-conforming mode.

This is false and you know it.

No, he's right on this point.
No he's not.
>gcc has a mode in which it's reasonably (but not perfectly) conforming
to C90. It has no mode in which it's as conforming to C99.
I disagree, but unlike RJH I have no intention of wasting my time
prolonging this thread further.
--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #99
On Tue, 17 Oct 2006 21:15:10 GMT, in comp.lang.c , Keith Thompson
<ks***@mib.orgw rote:
>If you killfile him
over his behavior in this one thread, it will be, in my opinion, your
loss.
Indeed it is, but I have begun to plonk for stupidity and rudeness,
rather than inaccuracy, hence he falls into the bucket whereas jacob
doesn't (yet) as he's inaccurate more than he's rude and so I feel he
needs to be corrected. Richard is currently stupid and rude more than
he's inaccurate, and so he fits the bill.... :-(

--
Mark McIntyre

"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it."
--Brian Kernighan
Oct 17 '06 #100

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
2220
by: Kenneth Downs | last post by:
Well, I'm coming to the end of a large and exhausting project, done in my new favorite language PHP, and its time for a diversion. I'm wondering if anyone has experience with writing simple parsers. I've never done it myself, but I know they are not as mysterious as they may seem, it's a matter of finding the tools. The idea is to take something like CSS format, except that it allows nesting, and turn it into associative arrays, such...
13
2302
by: Paulo Pinto | last post by:
Hi, does anyone know of a Python package that is able to load XML like the XML::Simple Perl package does? For those that don't know it, this package maps the XML file to a dictionary.
4
2455
by: Leif K-Brooks | last post by:
I'm writing a site with mod_python which will have, among other things, forums. I want to allow users to use some HTML (<em>, <strong>, <p>, etc.) on the forums, but I don't want to allow bad elements and attributes (onclick, <script>, etc.). I would also like to do basic validation (no overlapping elements like <strong><em>foo</em></strong>, no missing end tags). I'm not asking anyone to write a script for me, but does anyone have general...
8
6508
by: Dan | last post by:
Using XML::Simple in perl is extreemly slow to parse big XML files (can be up to 250M, taking ~1h). How can I increase my performance / reduce my memory usage? Is SAX the way forward?
4
11450
by: Greg B | last post by:
Well since getopt() doesn't seem to be compatible with Windows, and the free implementation of it for Windows that I found still had some annoying restrictions, I thought I'd whip up a simple parser myself. Just wanted to see if anyone could provide me with some constructive criticism :) any feedback would be greatly appreciated ----------------------------------------------------------------------------- #include "stdio.h" #include...
1
3079
by: steve smith | last post by:
Hi I have just downloaded the Borland C# Builder and the Micorsoft ..Net framework SDK v1.1 from the borland webist, and i am trying to get a simple program to run, however I keep getting errors, any ideas why this might be happening? Program i am running is: namespace ExamProblem { using System;
26
495
by: jacob navia | last post by:
Summary: I have changed (as proposed by Chuck) the code to use isalpha() instead of (c>='a' && c <= 'z') etc. I agree that EBCDIC exists :-) I eliminated the goto statement, obviously it is better in a tutorial to stick to structured programming whenever possible...
4
2751
by: =?Utf-8?B?SmFu?= | last post by:
In my application the user can configure automation-scripts by inserting different "actions" into a "procedure". These different procedure- and action-objects are all translated into C# code before execution. One "action" type is an expression-evaluator. At the moment the expression the user writes into the action is just inserted into the generated C# code unchanged. The problem is the variables in my system and in the "procedures";...
11
1352
by: Stef Mientki | last post by:
hello, I need to translate the following string a = '(0, 0, 0, 255), (192, 192, 192, 255), True, 8' into the following list or tuple b = Is there a simple way to to this. (Not needed now, but might need it in the future: even deeper nested
7
1198
by: bvdp | last post by:
Is there a simple/safe expression evaluator I can use in a python program. I just want to pass along a string in the form "1 + 44 / 3" or perhaps "1 + (-4.3*5)" and get a numeric result. I can do this with eval() but I really don't want to subject my users to the problems with that method. In this use I don't need python to worry about complex numbers, variables or anything else. Just do the math on a set of values. Would eval() with...
0
9814
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However, people are often confused as to whether an ONU can Work As a Router. In this blog post, we’ll explore What is ONU, What Is Router, ONU & Router’s main usage, and What is the difference between ONU and Router. Let’s take a closer look ! Part I. Meaning of...
0
10838
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
0
10544
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven tapestry of website design and digital marketing. It's not merely about having a website; it's about crafting an immersive digital experience that captivates audiences and drives business growth. The Art of Business Website Design Your website is...
1
10585
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
1
7788
isladogs
by: isladogs | last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new presenter, Adolph Dupré who will be discussing some powerful techniques for using class modules. He will explain when you may want to use classes instead of User Defined Types (UDT). For example, to manage the data in unbound forms. Adolph will...
0
6977
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
5821
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
2
4010
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.
3
3111
bsmnconsultancy
by: bsmnconsultancy | last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating effective websites that not only look great but also perform exceptionally well. In this comprehensive...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.