473,890 Members | 1,393 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

A simple parser

Hi guys

I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.

-------------------------------------------------------------cut here

/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1

// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}

// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(in t c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}

// Just prints the function name
static int PrintFunction(F ILE *f)
{
printf("Functio n %s found line %d ...",IdBuffer,l ine);
return Fgetc(f);
}

// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c ,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer[i] = 0;
return c;
}
static int ParseString(FIL E *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}

static int ParseComment(FI LE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment (FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}

// Skips white space and comments
static int SkipWhiteSpace( int c,FILE *f) {
if (c ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
}
}
return c;
}

// Skips chars between simple quotes
static int ParseQuotedChar (FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",arg v[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
break;
case '\'':
c = ParseQuotedChar (f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|le vel) == 0) {
c = SkipWhiteSpace( c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f );
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c, 1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}
Oct 14 '06
121 6602
zebedee <ze*****@zebede e.netwrites:
Keith Thompson wrote:
>But that's not what I asked about. I asked if there are any *major
areas* in which gcc fails to conform to C90.

I answered - the whole area of constant expressions. It's probably
the hardest part of the standard to get right and definitely the
weakest area of most compilers. I think only EDG gets it right.
Then we disagree on whether that's a "major area". To be precise,
constant expressions are certainly a major area of C, but I believe
that gcc works correctly with *most* constant expressions. There are
bugs, of course.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 17 '06 #101
Mark McIntyre said:
You know what? I've had enough of you. You've behaved atrociously in
this thread, childish and bullying by turns, rude and petty and silly.
I don't know where you're getting that from. My responses in this thread
have been consistent with pretty well all my responses in this newsgroup
over the last seven years or so. I'm sick and tired of Tak-Shing's
fine-toothed thread-stretcher bickering over non-C-related minutiae, and I
figured that an argument about the meaning of "intent" could only head in
the same direction, which is why I dismissed it so off-handedly.
Welcome to my killfile.
<shrugWhat you read is your decision. What I say is mine.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #102
Mark McIntyre said:
On Sun, 15 Oct 2006 23:36:08 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>Richard said:
>>Richard Heathfield <in*****@invali d.invalidwrites :
Yes, but where are the compilers?
Err, for the C99 features I use (primarily localised declarations),
gcc. I thought I mentioned that?

In order to get gcc to support those features, one must invoke it in a
non-conforming mode.

This is false and you know it.
If I knew it to be false, I would not have claimed it to be true. Not only
do I not know it to be false, but I believe it to be true. Keith Thompson
has already given the reason, so I won't repeat it here.
In order to get an obsolete version of gcc to support those features,
you have to turn off C90 compatibility. Whoopy doo.
I'm using the gcc that ships with the OS I'm using. It's a working system,
which I'm reluctant to change for as long as it suits my needs. I'm not
interested in whether other people consider it obsolete, but only in
whether it does what I need it to do from a conformance perspective. It
does, and that's what matters.
Stop this Richard, you're making a tit of yourself.
It is not wise to insult people merely for disagreeing with you.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #103
Mark McIntyre said:

<snip>
Portability? Then stick to pre-ANSI, there's still some compilers
around that require it.
C90 offers much better coverage than K&R C. When C99 offers wider coverage
than C90, nobody will be more pleased than I (if only because it means
these silly little discussions will end at last).
[...] you're being a fool.
For disagreeing with you? I Don't Think So (tm).

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #104
Harald van D?k said:
Richard Heathfield wrote:
>Harald van D?k said:
> If you would also like an example of
strictly conforming C90 code being rejected, see
http://gcc.gnu.org/PR19977.

What's "strictly conforming" about integer overflow?

There is no integer overflow, since the code is never executed. This is
allowed in strictly conforming programs.
What makes you think the code is never executed? The code is merely a single
translation unit. The implementation cannot know whether the code will be
executed. It can certainly tell that the initialisation will result in
integer overflow, however, and it is perfectly within its rights to
diagnose this.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #105
Mark McIntyre said:
On Mon, 16 Oct 2006 02:01:57 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>Keith Thompson said:
>>>
Which explains your mistake, sir.

I remain to be convinced that it was a mistake.

Thats obvious, but frankly, thats part of the problem - that you're
not prepared to step away from the fight and look at the issue
dispassionately .
I *have* looked at the whole C99 issue dispassionately . Otherwise, I
wouldn't bother "fighting" about it.
I strongly suggest you step back and take stock.
I took stock already. All present and correct. You?

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #106
Mark McIntyre said:
On Sun, 15 Oct 2006 22:49:22 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>my first reaction (to an article asking for code crits) is to run the code
through a compiler - and I might not even read it first, especially if
it's long. The resulting compiler diagnostics give me a place to start the
crit.

Thats fine. But if you plan to do that, you need to accept that C99 is
around,
Where?
that people use its features, and that you can't play Canute
and insist they stop.
On the contrary, I'll be delighted when they can start.
Therefore you have to eliminate C99/C89 issues
from your response, either by manually tuning them out or by using a
compiler that supports a reasonable subset of C99.
No, I don't *have* to do that at all. If I use a compiler that supports a
reasonable non-C90 subset of C99 and take advantage of the features
thereof, my code is suddenly not portable to any compiler that uses some
other reasonable non-C90 subset of C99 that does not wholly encompass my
compiler's reasonable non-C90 subset. Is this not blindingly obvious?

And is this not a sufficiently dangerous danger that it should be pointed
out to those who use C99isms? After all, we quite often warn people against
much less likely dangers.

Frankly ts stupid
and arrogant of you, not to say a bloody waste of bandwidth, to deny
the existence of C89.
I have never done so as far as I am aware. I fully agree that C89 exists. I
will even go so far as to agree that C99 exists. The existence of one, or
two, or even six (ish) implementations is not sufficient, however, and to
call people stupid and arrogant for pointing out the facts is ill-advised.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #107
Richard Heathfield <in*****@invali d.invalidwrites :
Harald van D?k said:
>Richard Heathfield wrote:
>>Harald van D?k said:
>> If you would also like an example of
strictly conforming C90 code being rejected, see
http://gcc.gnu.org/PR19977.

What's "strictly conforming" about integer overflow?

There is no integer overflow, since the code is never executed. This is
allowed in strictly conforming programs.

What makes you think the code is never executed? The code is merely a single
translation unit. The implementation cannot know whether the code will be
executed. It can certainly tell that the initialisation will result in
integer overflow, however, and it is perfectly within its rights to
diagnose this.
Of course, but it's not within its rights to fail to translate it (in
a conforming mode).

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 18 '06 #108
Mark McIntyre said:
On Sun, 15 Oct 2006 21:02:04 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>>
Did you spot something in Jacob's code that was not C99-conforming?

No. Did you spot something in the code that *needed* a C99 feature, a

Lets not wander off track here. We're talking about whether /you/
found something that wasn't conforming.
Are we? I'm not. Which leaves you talking to whom, about what, exactly?

<snip>

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #109
Keith Thompson said:
Richard Heathfield <in*****@invali d.invalidwrites :
>Harald van D?k said:
>>Richard Heathfield wrote:
Harald van D?k said:

If you would also like an example of
strictly conforming C90 code being rejected, see
http://gcc.gnu.org/PR19977.

What's "strictly conforming" about integer overflow?

There is no integer overflow, since the code is never executed. This is
allowed in strictly conforming programs.

What makes you think the code is never executed? The code is merely a
single translation unit. The implementation cannot know whether the code
will be executed. It can certainly tell that the initialisation will
result in integer overflow, however, and it is perfectly within its
rights to diagnose this.

Of course, but it's not within its rights to fail to translate it (in
a conforming mode).
C&V, please. I know of no requirement on implementations to translate *any*
program (except the one that has a brazillion nested foobars in it, of
course), let alone one that is manifestly incorrect.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #110

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
2223
by: Kenneth Downs | last post by:
Well, I'm coming to the end of a large and exhausting project, done in my new favorite language PHP, and its time for a diversion. I'm wondering if anyone has experience with writing simple parsers. I've never done it myself, but I know they are not as mysterious as they may seem, it's a matter of finding the tools. The idea is to take something like CSS format, except that it allows nesting, and turn it into associative arrays, such...
13
2303
by: Paulo Pinto | last post by:
Hi, does anyone know of a Python package that is able to load XML like the XML::Simple Perl package does? For those that don't know it, this package maps the XML file to a dictionary.
4
2457
by: Leif K-Brooks | last post by:
I'm writing a site with mod_python which will have, among other things, forums. I want to allow users to use some HTML (<em>, <strong>, <p>, etc.) on the forums, but I don't want to allow bad elements and attributes (onclick, <script>, etc.). I would also like to do basic validation (no overlapping elements like <strong><em>foo</em></strong>, no missing end tags). I'm not asking anyone to write a script for me, but does anyone have general...
8
6508
by: Dan | last post by:
Using XML::Simple in perl is extreemly slow to parse big XML files (can be up to 250M, taking ~1h). How can I increase my performance / reduce my memory usage? Is SAX the way forward?
4
11451
by: Greg B | last post by:
Well since getopt() doesn't seem to be compatible with Windows, and the free implementation of it for Windows that I found still had some annoying restrictions, I thought I'd whip up a simple parser myself. Just wanted to see if anyone could provide me with some constructive criticism :) any feedback would be greatly appreciated ----------------------------------------------------------------------------- #include "stdio.h" #include...
1
3081
by: steve smith | last post by:
Hi I have just downloaded the Borland C# Builder and the Micorsoft ..Net framework SDK v1.1 from the borland webist, and i am trying to get a simple program to run, however I keep getting errors, any ideas why this might be happening? Program i am running is: namespace ExamProblem { using System;
26
495
by: jacob navia | last post by:
Summary: I have changed (as proposed by Chuck) the code to use isalpha() instead of (c>='a' && c <= 'z') etc. I agree that EBCDIC exists :-) I eliminated the goto statement, obviously it is better in a tutorial to stick to structured programming whenever possible...
4
2751
by: =?Utf-8?B?SmFu?= | last post by:
In my application the user can configure automation-scripts by inserting different "actions" into a "procedure". These different procedure- and action-objects are all translated into C# code before execution. One "action" type is an expression-evaluator. At the moment the expression the user writes into the action is just inserted into the generated C# code unchanged. The problem is the variables in my system and in the "procedures";...
11
1355
by: Stef Mientki | last post by:
hello, I need to translate the following string a = '(0, 0, 0, 255), (192, 192, 192, 255), True, 8' into the following list or tuple b = Is there a simple way to to this. (Not needed now, but might need it in the future: even deeper nested
7
1204
by: bvdp | last post by:
Is there a simple/safe expression evaluator I can use in a python program. I just want to pass along a string in the form "1 + 44 / 3" or perhaps "1 + (-4.3*5)" and get a numeric result. I can do this with eval() but I really don't want to subject my users to the problems with that method. In this use I don't need python to worry about complex numbers, variables or anything else. Just do the math on a set of values. Would eval() with...
0
9826
by: Hystou | last post by:
Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can effortlessly switch the default language on Windows 10 without reinstalling. I'll walk you through it. First, let's disable language synchronization. With a Microsoft account, language settings sync across devices. To prevent any complications,...
0
11234
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
1
10925
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
0
9640
agi2029
by: agi2029 | last post by:
Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development project—planning, coding, testing, and deployment—without human intervention. Imagine an AI that can take a project description, break it down, write the code, debug it, and then launch it, all on its own.... Now, this would greatly impact the work of software developers. The idea...
0
7171
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
6058
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
1
4682
by: 6302768590 | last post by:
Hai team i want code for transfer the data from one system to another through IP address by using C# our system has to for every 5mins then we have to update the data what the data is updated we have to send another system
2
4276
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.
3
3282
bsmnconsultancy
by: bsmnconsultancy | last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating effective websites that not only look great but also perform exceptionally well. In this comprehensive...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.