473,883 Members | 1,663 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

A simple parser

Hi guys

I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.

-------------------------------------------------------------cut here

/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1

// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}

// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(in t c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}

// Just prints the function name
static int PrintFunction(F ILE *f)
{
printf("Functio n %s found line %d ...",IdBuffer,l ine);
return Fgetc(f);
}

// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c ,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer[i] = 0;
return c;
}
static int ParseString(FIL E *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}

static int ParseComment(FI LE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment (FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}

// Skips white space and comments
static int SkipWhiteSpace( int c,FILE *f) {
if (c ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
}
}
return c;
}

// Skips chars between simple quotes
static int ParseQuotedChar (FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",arg v[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
break;
case '\'':
c = ParseQuotedChar (f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|le vel) == 0) {
c = SkipWhiteSpace( c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f );
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c, 1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}
Oct 14 '06
121 6580
Richard Heathfield <in*****@invali d.invalidwrites :
Mark McIntyre said:
>On Mon, 16 Oct 2006 02:01:57 +0000, in comp.lang.c , Richard
Heathfield <in*****@invali d.invalidwrote:
>>>Keith Thompson said:
Which explains your mistake, sir.

I remain to be convinced that it was a mistake.

Thats obvious, but frankly, thats part of the problem - that you're
not prepared to step away from the fight and look at the issue
dispassionatel y.

I *have* looked at the whole C99 issue dispassionately . Otherwise, I
wouldn't bother "fighting" about it.
I suggest that this isn't just about the C99 issue.
>I strongly suggest you step back and take stock.

I took stock already. All present and correct. You?
Without reference to Mark's recent postings in this thread, let me
explain why I personally had a bit of a problem with your followup
near the beginning of this brouhaha. I'll probably drop the subject
after this, unless you care to discuss it further.

jacob navia posted a chunk of code. It was, as far as I can tell,
valid C99. It was, as far as I can tell, valid C90 with the exception
of its use of "//" comments and of mixed declarations and statements.

Your response was to post the error messages produced by your compiler
(gcc 2.whatever in C90 conforming mode).

In my opinion, it should have been obvious to you that the code wasn't
intended to be C90, and that it was probably intended to be valid C99.
C99 code is clearly topical in this newsgroup, and would be even if
there were *no* conforming C99 compilers, or even if no compilers
implemented any C99 features (other than the ones already in C90).

You demonstrated that a conforming C90 compiler becomes confused when
confronted with "//" comments (unless it specifically recognizes them
for the purpose of diagnosing them, which yours doesn't). This is, I
believe, well known and not particularly interesting.

You acted like someone who doesn't even know about "//" comments,
which I'm certain is not the case.

You could have pointed out that "//" comments are ill-advised in
Usenet articles, and that mixing declarations and statements makes the
code less portable than it could be; that would have been a
contribution to the discussion. You knew, or should have known, the
actual issue with the code, but rather than saying so, you posted
something that appeared to be mere snark. Whether you intended it
that way is another question. Whether you were influenced by the
identity of the previous poster is yet another question, one on which
I will not speculate out loud.

(And a series of overreactions led to a fairly pointless flame war,
but I'm not commenting on that.)

Your contributions to this newsgroup over the years have been
invaluable, more than enough, IMHO, to earn you a pass for the
occasional lapse.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 18 '06 #111
Richard Heathfield <in*****@invali d.invalidwrites :
Keith Thompson said:
>Richard Heathfield <in*****@invali d.invalidwrites :
>>Harald van D?k said:
Richard Heathfield wrote:
Harald van D?k said:

If you would also like an example of
strictly conforming C90 code being rejected, see
http://gcc.gnu.org/PR19977.
>
What's "strictly conforming" about integer overflow?

There is no integer overflow, since the code is never executed. This is
allowed in strictly conforming programs.

What makes you think the code is never executed? The code is merely a
single translation unit. The implementation cannot know whether the code
will be executed. It can certainly tell that the initialisation will
result in integer overflow, however, and it is perfectly within its
rights to diagnose this.

Of course, but it's not within its rights to fail to translate it (in
a conforming mode).

C&V, please. I know of no requirement on implementations to translate *any*
program (except the one that has a brazillion nested foobars in it, of
course), let alone one that is manifestly incorrect.
Sure, any program other than the "one instance of every one of the
following" cited in the translation limits clause of the standard,
could hit some translation limit and therefore fail to compile.

Here's the code fragment from PR19977:
=============== =============== ==
#include <limits.h>

void
f (void)
{
int c = INT_MAX + 1;
}
=============== =============== ==

Let's embed this in a complete program:
=============== =============== ==
#include <limits.h>

void f(void)
{
int c = INT_MAX + 1;
}

int main(void)
{
return 0;
}
=============== =============== ==

I believe this program is strictly conforming.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 18 '06 #112
Keith Thompson said:
Without reference to Mark's recent postings in this thread, let me
explain why I personally had a bit of a problem with your followup
near the beginning of this brouhaha. I'll probably drop the subject
after this, unless you care to discuss it further.
Probably not. I think we're probably both sick of it already, aren't we?
jacob navia posted a chunk of code. It was, as far as I can tell,
valid C99. It was, as far as I can tell, valid C90 with the exception
of its use of "//" comments and of mixed declarations and statements.
That's quite an "except", but okay, I believe you.
Your response was to post the error messages produced by your compiler
(gcc 2.whatever in C90 conforming mode).
Right. The message being "my C90 compiler doesn't like this code". And I
don't suppose yours does, either.
In my opinion, it should have been obvious to you that the code wasn't
intended to be C90,
It was obvious to my compiler, at any rate. :-)
and that it was probably intended to be valid C99.
Fine. When the world and his dog get a C compiler, the code will become
relevant, at which point I'll take a closer look.
C99 code is clearly topical in this newsgroup,
Undoubtedly. I am not disputing the topicality of the OP's code.

<snip>
Your contributions to this newsgroup over the years have been
invaluable, more than enough, IMHO, to earn you a pass for the
occasional lapse.
Thanks for the compliment, but I'm still not convinced that this /is/ a
lapse. If you examine my contributions to this newsgroup over the years,
you will find that I generally admit it when I'm wrong. If I could see how
I were in the wrong here, fine - but try as I might, I can't see what's
wrong with pointing out the non-portability of code posted on the
comp.lang.c newsgroup. You've done it yourself, dozens if not hundreds of
times.

???

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #113
Keith Thompson said:

<snip>
Here's the code fragment from PR19977:
<snip>
#include <limits.h>

void f(void)
{
int c = INT_MAX + 1;
}

int main(void)
{
return 0;
}
=============== =============== ==

I believe this program is strictly conforming.
I'm not sure about that. I've never been very convinced by these "this bit
is never called, so it doesn't count" arguments. But what I /am/ sure about
is that either - as in your harness, for example - f() is never called, in
which case it's dead code which should be removed, or it *is* called, in
which case it's broken code which should be fixed. Either way, if gcc
swears at it, that's a Good Thing, IMHO, and certainly no barrier to
portability. If you want it to compile under gcc, fix the integer overflow
problem.

Or, of course, you could simply use the version of gcc I use, which compiles
the code, your harness and all, just fine (although it does issue a
diagnostic message, which it is within its rights to do).

Bug? What bug?

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #114
Richard Heathfield <in*****@invali d.invalidwrites :
[...]
Thanks for the compliment, but I'm still not convinced that this /is/ a
lapse. If you examine my contributions to this newsgroup over the years,
you will find that I generally admit it when I'm wrong. If I could see how
I were in the wrong here, fine - but try as I might, I can't see what's
wrong with pointing out the non-portability of code posted on the
comp.lang.c newsgroup. You've done it yourself, dozens if not hundreds of
times.
Yes, but when I point out that a piece of code is non-portable, I
explain *why* (and, usually, so do you).

You didn't point out that the code was non-portable. You posted a
bunch of compiler error messages, implying that the code was full of
syntax errors (which it really *wasn't*), and you made no further
comment at all. Your reply, taken by itself, was indistinguishab le
from one from someone who had never heard of "//" comments at all,
didn't recognize them in the posted source code, and honestly thought
they were nothing more than syntax errors. Perhaps you were playing
dumb to make a point; if so, the point didn't come across very well in
this case.

Your response was, as in the old Microsoft joke about the hot air
balloon, technically currect but not useful.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 18 '06 #115
Keith Thompson said:
Your response was, as in the old Microsoft joke about the hot air
balloon, technically currect but not useful.
It was IBM when I heard it. Plus ca change, plus ca meme chose.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #116
Richard Heathfield wrote:
Keith Thompson said:

<snip>
Here's the code fragment from PR19977:

<snip>
#include <limits.h>

void f(void)
{
int c = INT_MAX + 1;
}

int main(void)
{
return 0;
}
=============== =============== ==

I believe this program is strictly conforming.

I'm not sure about that. I've never been very convinced by these "this bit
is never called, so it doesn't count" arguments.
http://open-std.org/JTC1/SC22/WG14/www/docs/dr_109.html
But what I /am/ sure about
is that either - as in your harness, for example - f() is never called, in
which case it's dead code which should be removed, or it *is* called, in
which case it's broken code which should be fixed. Either way, if gcc
swears at it, that's a Good Thing, IMHO, and certainly no barrier to
portability. If you want it to compile under gcc, fix the integer overflow
problem.

Or, of course, you could simply use the version of gcc I use, which compiles
the code, your harness and all, just fine (although it does issue a
diagnostic message, which it is within its rights to do).

Bug? What bug?
To be sure, did you use both the -ansi and -pedantic-errors options?
Versions as old as 3.2.3 are listed in the "known to fail" list for
that bug. If your version is older and accepts it, fair enough. If it
doesn't, the fact that other "intended to be conforming" modes are
available is not really relevant, unless you don't consider problems
that are only exposed with optimisations enabled as bugs either.

Oct 18 '06 #117
Harald van Dijk wrote:
Richard Heathfield wrote:
Keith Thompson said:

<snip>
Here's the code fragment from PR19977:
<snip>
#include <limits.h>
>
void f(void)
{
int c = INT_MAX + 1;
}
>
int main(void)
{
return 0;
}
=============== =============== ==
>
I believe this program is strictly conforming.
I'm not sure about that. I've never been very convinced by these "this bit
is never called, so it doesn't count" arguments.

http://open-std.org/JTC1/SC22/WG14/www/docs/dr_109.html
But what I /am/ sure about
is that either - as in your harness, for example - f() is never called,in
which case it's dead code which should be removed, or it *is* called, in
which case it's broken code which should be fixed. Either way, if gcc
swears at it, that's a Good Thing, IMHO, and certainly no barrier to
portability. If you want it to compile under gcc, fix the integer overflow
problem.

Or, of course, you could simply use the version of gcc I use, which compiles
the code, your harness and all, just fine (although it does issue a
diagnostic message, which it is within its rights to do).

Bug? What bug?

To be sure, did you use both the -ansi and -pedantic-errors options?
Versions as old as 3.2.3 are listed in the "known to fail" list for
That should be "as old as 2.95.3", of course, sorry.
that bug. If your version is older and accepts it, fair enough. If it
doesn't, the fact that other "intended to be conforming" modes are
available is not really relevant, unless you don't consider problems
that are only exposed with optimisations enabled as bugs either.
Oct 18 '06 #118
Harald van D?k said:
Richard Heathfield wrote:
>Keith Thompson said:
I believe this program is strictly conforming.

I'm not sure about that. I've never been very convinced by these "this
bit is never called, so it doesn't count" arguments.

http://open-std.org/JTC1/SC22/WG14/www/docs/dr_109.html
Fair enough. I'm still not *very* convinced, but I'm a lot more convinced
than I was. :-)
If your version is older and accepts it, fair enough.
It is, and it does.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 18 '06 #119
Richard Heathfield posted:
>Stop this Richard, you're making a tit of yourself.

It is not wise to insult people merely for disagreeing with you.

Mark McIntyre is an thorough-bred asshole, plain and simple. I've yet to hear
him utter one pleasant syllable.

--

Frederick Gotham
Oct 18 '06 #120

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
2222
by: Kenneth Downs | last post by:
Well, I'm coming to the end of a large and exhausting project, done in my new favorite language PHP, and its time for a diversion. I'm wondering if anyone has experience with writing simple parsers. I've never done it myself, but I know they are not as mysterious as they may seem, it's a matter of finding the tools. The idea is to take something like CSS format, except that it allows nesting, and turn it into associative arrays, such...
13
2302
by: Paulo Pinto | last post by:
Hi, does anyone know of a Python package that is able to load XML like the XML::Simple Perl package does? For those that don't know it, this package maps the XML file to a dictionary.
4
2457
by: Leif K-Brooks | last post by:
I'm writing a site with mod_python which will have, among other things, forums. I want to allow users to use some HTML (<em>, <strong>, <p>, etc.) on the forums, but I don't want to allow bad elements and attributes (onclick, <script>, etc.). I would also like to do basic validation (no overlapping elements like <strong><em>foo</em></strong>, no missing end tags). I'm not asking anyone to write a script for me, but does anyone have general...
8
6508
by: Dan | last post by:
Using XML::Simple in perl is extreemly slow to parse big XML files (can be up to 250M, taking ~1h). How can I increase my performance / reduce my memory usage? Is SAX the way forward?
4
11451
by: Greg B | last post by:
Well since getopt() doesn't seem to be compatible with Windows, and the free implementation of it for Windows that I found still had some annoying restrictions, I thought I'd whip up a simple parser myself. Just wanted to see if anyone could provide me with some constructive criticism :) any feedback would be greatly appreciated ----------------------------------------------------------------------------- #include "stdio.h" #include...
1
3081
by: steve smith | last post by:
Hi I have just downloaded the Borland C# Builder and the Micorsoft ..Net framework SDK v1.1 from the borland webist, and i am trying to get a simple program to run, however I keep getting errors, any ideas why this might be happening? Program i am running is: namespace ExamProblem { using System;
26
495
by: jacob navia | last post by:
Summary: I have changed (as proposed by Chuck) the code to use isalpha() instead of (c>='a' && c <= 'z') etc. I agree that EBCDIC exists :-) I eliminated the goto statement, obviously it is better in a tutorial to stick to structured programming whenever possible...
4
2751
by: =?Utf-8?B?SmFu?= | last post by:
In my application the user can configure automation-scripts by inserting different "actions" into a "procedure". These different procedure- and action-objects are all translated into C# code before execution. One "action" type is an expression-evaluator. At the moment the expression the user writes into the action is just inserted into the generated C# code unchanged. The problem is the variables in my system and in the "procedures";...
11
1355
by: Stef Mientki | last post by:
hello, I need to translate the following string a = '(0, 0, 0, 255), (192, 192, 192, 255), True, 8' into the following list or tuple b = Is there a simple way to to this. (Not needed now, but might need it in the future: even deeper nested
7
1202
by: bvdp | last post by:
Is there a simple/safe expression evaluator I can use in a python program. I just want to pass along a string in the form "1 + 44 / 3" or perhaps "1 + (-4.3*5)" and get a numeric result. I can do this with eval() but I really don't want to subject my users to the problems with that method. In this use I don't need python to worry about complex numbers, variables or anything else. Just do the math on a set of values. Would eval() with...
0
9933
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However, people are often confused as to whether an ONU can Work As a Router. In this blog post, well explore What is ONU, What Is Router, ONU & Routers main usage, and What is the difference between ONU and Router. Lets take a closer look ! Part I. Meaning of...
0
11121
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
1
10836
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
0
9564
agi2029
by: agi2029 | last post by:
Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development projectplanning, coding, testing, and deploymentwithout human intervention. Imagine an AI that can take a project description, break it down, write the code, debug it, and then launch it, all on its own.... Now, this would greatly impact the work of software developers. The idea...
1
7960
isladogs
by: isladogs | last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new presenter, Adolph Dupr who will be discussing some powerful techniques for using class modules. He will explain when you may want to use classes instead of User Defined Types (UDT). For example, to manage the data in unbound forms. Adolph will...
0
7114
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
5982
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
1
4606
by: 6302768590 | last post by:
Hai team i want code for transfer the data from one system to another through IP address by using C# our system has to for every 5mins then we have to update the data what the data is updated we have to send another system
2
4210
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.