473,836 Members | 1,509 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

A simple parser

Hi guys

I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.

-------------------------------------------------------------cut here

/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1

// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}

// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(in t c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}

// Just prints the function name
static int PrintFunction(F ILE *f)
{
printf("Functio n %s found line %d ...",IdBuffer,l ine);
return Fgetc(f);
}

// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c ,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer[i] = 0;
return c;
}
static int ParseString(FIL E *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}

static int ParseComment(FI LE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment (FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}

// Skips white space and comments
static int SkipWhiteSpace( int c,FILE *f) {
if (c ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
}
}
return c;
}

// Skips chars between simple quotes
static int ParseQuotedChar (FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",arg v[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f) ;
else if (c == '/')
c = ParseCppComment (f);
break;
case '\'':
c = ParseQuotedChar (f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|le vel) == 0) {
c = SkipWhiteSpace( c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f );
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c, 1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}
Oct 14 '06
121 6570
Richard said:
Richard Heathfield <in*****@invali d.invalidwrites :
>Richard said:
>>Richard Heathfield <in*****@invali d.invalidwrites :
<snip>
>>>Look, if there were some compelling reason for using C99 features,
okay, fair enough: "sorry for leaving Richard H (and most of the
conforming world) behind but these C99 features are just too useful to
ignore, and if that means a portability loss, so be it". But that does
not appear to be the case here.

This is not about C90 vs C99. This is about "works everywhere" vs
"works almost nowhere, unless you use a non-conforming compiler".

But since gcc supports a subset, and a useful subset, its hardly "almost
nowhere" is it?

When I invoke my gcc implementation in conforming mode, it (correctly)
diagnoses single-line comments, mixed code/decls, etc. If you are asking
me to turn off conforming mode, the answer is "No".

Which conforming mode?
I only have two - K&R C and C90 - and of the two I choose C90. (No surprise
there.)
I have the following command line options: Pretty strict it is too.

CFLAGS=-std=c99 -pedantic-errors -Wall -pthread -g $(DEBUGFLAGS)
Note that gcc, despite its std=c99 switch, is not a conforming C99 compiler.
Nor does it have a conforming C99 libc.

FYI my command line switches to gcc are:

CFLAGS=-W -Wall -ansi -pedantic -Wformat-nonliteral -Wcast-align
-Wpointer-arith -Wbad-function-cast -Wmissing-prototypes
-Wstrict-prototypes -Wmissing-declarations -Winline -Wundef
-Wnested-externs -Wcast-qual -Wshadow -Wconversion -Wwrite-strings
-Wno-conversion -ffloat-store -O2
Nothing particularly evil. OK, we know our target OS.

These features don't suddenly make it weaker C code - this is
comp.lang.c and C99 is the C language too.
Yes, but where are the compilers?
Like it, or more probably in your case, not.
I'm not against C99. I'm against non-portability. But wait...!

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 15 '06 #41
Richard Heathfield <in*****@invali d.invalidwrites :

My stuff tends to be platform-independent (as far as it can be). Which
explains mine. And it, too, is perfectly valid.
Yes. Agreed. But it doesn't make it valid for you to criticise someone whose
target compiler does support C99 and who wishes to use those features.

And that was why there were a few "err, hang on there" replies.

Oct 15 '06 #42
Richard Heathfield <in*****@invali d.invalidwrites :
>
Yes, but where are the compilers?
Err, for the C99 features I use (primarily localised declarations),
gcc. I thought I mentioned that?
>Like it, or more probably in your case, not.

I'm not against C99. I'm against non-portability. But wait...!
Not all C code is intended to be portable. And C99 is a developing
standard ....

Oct 15 '06 #43
Richard said:
Richard Heathfield <in*****@invali d.invalidwrites :

>My stuff tends to be platform-independent (as far as it can be). Which
explains mine. And it, too, is perfectly valid.

Yes. Agreed. But it doesn't make it valid for you to criticise someone
whose target compiler does support C99
It does? What makes you think so? As far as I'm aware, Mr Navia uses
lcc-win32 and, sometimes, gcc. Neither of these is C99-conforming.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 15 '06 #44
Richard said:
Richard Heathfield <in*****@invali d.invalidwrites :
>>
Yes, but where are the compilers?

Err, for the C99 features I use (primarily localised declarations),
gcc. I thought I mentioned that?
In order to get gcc to support those features, one must invoke it in a
non-conforming mode. I thought I mentioned that?
>>Like it, or more probably in your case, not.

I'm not against C99. I'm against non-portability. But wait...!

Not all C code is intended to be portable.
Certainly true. That is why we have platform-specific newsgroups, such as
comp.os.ms-windows.program mer.win32, comp.unix.progr ammer,
comp.os.msdos.p rogrammer, and even comp.compilers. lcc - and
platform-specific code can be profitably discussed in such newsgroups.
And C99 is a developing standard ....
If that is so, then it makes it all the harder to write conforming code in
it, let alone get that code to work as portably as C90 code.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 15 '06 #45
Richard Heathfield wrote:
>>>
foo.c:12: parse error before `/'
foo.c:17: stray '\' in program
[51 lines deleted]
>>make: *** [foo.o] Error 1

<shrugI figured it had to be something like that. So - does anyone have a
conforming C99 compiler that we can use to test Mr Navia's code? No? Oh
well.
You're just trolling now. This is about as useful as asking
why someone is left-shifting cout.

Besides, you have admitted on other threads that it's OK for a
C90 compiler to have one or two conformance problems in deep
dark corners that nobody uses, as long as it compiles 'normal'
conforming programs correctly.

Any compiler that claims any sort of C99 progress will
compile Navia's code. It's not as if // comments and
declarations after statements, are any sort of esoteric
language features. In fact, many C90 compilers support
those constructs as extensions anyway.

Oct 15 '06 #46
Old Wolf said:
Richard Heathfield wrote:
>>>>
foo.c:12: parse error before `/'
foo.c:17: stray '\' in program
[51 lines deleted]
make: *** [foo.o] Error 1

<shrugI figured it had to be something like that. So - does anyone have
a conforming C99 compiler that we can use to test Mr Navia's code? No? Oh
well.

You're just trolling now.
Nope.
This is about as useful as asking
why someone is left-shifting cout.
And, in its way, such a question is useful, insofar as it draws attention
(admittedly in a somewhat sideways manner) to the fact that C++ is not C,
and should be discussed in a C++ group rather than a C group.

Now, I fully accept that Mr Navia's code is /topical/ here in clc. There is
no question about that. My point is only that he has made his code
unnecessarily difficult to test because he has introduced C99isms
gratuitously. He is perfectly within his rights to do that and still remain
topical. Nevertheless, making code gratuitously difficult to compile is not
a useful strategy. It's akin to posting obfuscated code (with which a
compiler will have no trouble at all, but to which clcers will still object
as it is gratuitously difficult to read - even though it might well be a
strictly conforming C90 program!).
Besides, you have admitted on other threads that it's OK for a
C90 compiler to have one or two conformance problems in deep
dark corners that nobody uses, as long as it compiles 'normal'
conforming programs correctly.
I'm not saying I approve of such conformance problems, of course, but yes,
we live with what we've got. What we *haven't* got is a plethora of
compilers that even /claim/ conformance to C99. We have a small handful, of
which gcc is not one.
Any compiler that claims any sort of C99 progress will
compile Navia's code. It's not as if // comments and
declarations after statements, are any sort of esoteric
language features. In fact, many C90 compilers support
those constructs as extensions anyway.
To enable those extensions in C90 compilers requires invoking them in
non-conforming mode, does it not?

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
Oct 15 '06 #47
Richard <rg****@gmail.c omwrites:
Richard Heathfield <in*****@invali d.invalidwrites :
[...]
>When I invoke my gcc implementation in conforming mode, it (correctly)
diagnoses single-line comments, mixed code/decls, etc. If you are asking me
to turn off conforming mode, the answer is "No".

Which conforming mode?

I have the following command line options: Pretty strict it is too.

CFLAGS=-std=c99 -pedantic-errors -Wall -pthread -g $(DEBUGFLAGS)
That does not conform to any standard. It fails to diagnose some
things that are syntax errors in C90, and fails to implement some
features of C99.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 16 '06 #48
In article <45************ ***********@new s.orange.frjaco b navia <ja***@jacob.re mcomp.frwrites:
Jean-Marc Bourguet wrote:
....
>>That doesn't mean such machines don't exist. I've spent several years
working on such machines. Look up "EBCDIC" in Google.
<snip>

Also BCD, as used on a 14xx series system. (came before "EBCDIC") :-)
BCD is worse, not only there are not contiguous, they aren't even in order.

Well, EBCDIC was a 7 bit code, used for punched cards. The eighth bit
was there to signal the card reader that a character was in that column.
Very wrong. EBCDIC is an 8 bit code, and never has been a 7-bit code. It
was not only used for punched cards, but also on papertape, internally,
and whatever (note the 'IC': Internal Code'). (Although I have never seen
punched card equipment that actually did do full EBCDIC.)
Using only 7 bits, the codes are continuous.
Wrong. There are gaps between 'I' and 'J', and between 'R' and 'S'.
The closing curly brace is in the order between 'I' and 'J' and the
backslash between 'R' and 'S'. When you go to 7 bits (and hence do
use BCD instead of EBCDIC), you will find symbols like '-', '/', '+',
'&' and quite some others (depending on the version of BCD used) in
the range of letters.
--
dik t. winter, cwi, kruislaan 413, 1098 sj amsterdam, nederland, +31205924131
home: bovenover 215, 1025 jn amsterdam, nederland; http://www.cwi.nl/~dik/
Oct 16 '06 #49
Richard Heathfield <in*****@invali d.invalidwrites :
Ian Collins said:
>I thought your original repose was a little melodramatic, considering
C99 is (a) standard C, the topic of this group.

Well, I suppose it was, but it wasn't intended to be. (Can melodrama be
accidental? I don't know.) Anyway, the point is this: that, quite often, my
first reaction (to an article asking for code crits) is to run the code
through a compiler - and I might not even read it first, especially if it's
long. The resulting compiler diagnostics give me a place to start the crit.

And that's what I did this time. If the OP had been someone with a track
record for being reasonable and rational and logical, I might have looked
more closely at the source after seeing how many diagnostics it generated,
and realised at that point that it used C99 features. But since it was only
our resident "all the world's a Win32 box running lcc-win32"-er, I was not
highly motivated to investigate the source of the errors.
Which explains your mistake, sir.

--
Keith Thompson (The_Other_Keit h) ks***@mib.org <http://www.ghoti.net/~kst>
San Diego Supercomputer Center <* <http://users.sdsc.edu/~kst>
We must do something. This is something. Therefore, we must do this.
Oct 16 '06 #50

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
2219
by: Kenneth Downs | last post by:
Well, I'm coming to the end of a large and exhausting project, done in my new favorite language PHP, and its time for a diversion. I'm wondering if anyone has experience with writing simple parsers. I've never done it myself, but I know they are not as mysterious as they may seem, it's a matter of finding the tools. The idea is to take something like CSS format, except that it allows nesting, and turn it into associative arrays, such...
13
2302
by: Paulo Pinto | last post by:
Hi, does anyone know of a Python package that is able to load XML like the XML::Simple Perl package does? For those that don't know it, this package maps the XML file to a dictionary.
4
2455
by: Leif K-Brooks | last post by:
I'm writing a site with mod_python which will have, among other things, forums. I want to allow users to use some HTML (<em>, <strong>, <p>, etc.) on the forums, but I don't want to allow bad elements and attributes (onclick, <script>, etc.). I would also like to do basic validation (no overlapping elements like <strong><em>foo</em></strong>, no missing end tags). I'm not asking anyone to write a script for me, but does anyone have general...
8
6508
by: Dan | last post by:
Using XML::Simple in perl is extreemly slow to parse big XML files (can be up to 250M, taking ~1h). How can I increase my performance / reduce my memory usage? Is SAX the way forward?
4
11450
by: Greg B | last post by:
Well since getopt() doesn't seem to be compatible with Windows, and the free implementation of it for Windows that I found still had some annoying restrictions, I thought I'd whip up a simple parser myself. Just wanted to see if anyone could provide me with some constructive criticism :) any feedback would be greatly appreciated ----------------------------------------------------------------------------- #include "stdio.h" #include...
1
3079
by: steve smith | last post by:
Hi I have just downloaded the Borland C# Builder and the Micorsoft ..Net framework SDK v1.1 from the borland webist, and i am trying to get a simple program to run, however I keep getting errors, any ideas why this might be happening? Program i am running is: namespace ExamProblem { using System;
26
495
by: jacob navia | last post by:
Summary: I have changed (as proposed by Chuck) the code to use isalpha() instead of (c>='a' && c <= 'z') etc. I agree that EBCDIC exists :-) I eliminated the goto statement, obviously it is better in a tutorial to stick to structured programming whenever possible...
4
2751
by: =?Utf-8?B?SmFu?= | last post by:
In my application the user can configure automation-scripts by inserting different "actions" into a "procedure". These different procedure- and action-objects are all translated into C# code before execution. One "action" type is an expression-evaluator. At the moment the expression the user writes into the action is just inserted into the generated C# code unchanged. The problem is the variables in my system and in the "procedures";...
11
1352
by: Stef Mientki | last post by:
hello, I need to translate the following string a = '(0, 0, 0, 255), (192, 192, 192, 255), True, 8' into the following list or tuple b = Is there a simple way to to this. (Not needed now, but might need it in the future: even deeper nested
7
1198
by: bvdp | last post by:
Is there a simple/safe expression evaluator I can use in a python program. I just want to pass along a string in the form "1 + 44 / 3" or perhaps "1 + (-4.3*5)" and get a numeric result. I can do this with eval() but I really don't want to subject my users to the problems with that method. In this use I don't need python to worry about complex numbers, variables or anything else. Just do the math on a set of values. Would eval() with...
0
10834
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
1
10584
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
1
7782
isladogs
by: isladogs | last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new presenter, Adolph Dupré who will be discussing some powerful techniques for using class modules. He will explain when you may want to use classes instead of User Defined Types (UDT). For example, to manage the data in unbound forms. Adolph will...
0
6976
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
5645
by: TSSRALBI | last post by:
Hello I'm a network technician in training and I need your help. I am currently learning how to create and manage the different types of VPNs and I have a question about LAN-to-LAN VPNs. The last exercise I practiced was to create a LAN-to-LAN VPN between two Pfsense firewalls, by using IPSEC protocols. I succeeded, with both firewalls in the same network. But I'm wondering if it's possible to do the same thing, with 2 Pfsense firewalls...
0
5817
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
1
4446
by: 6302768590 | last post by:
Hai team i want code for transfer the data from one system to another through IP address by using C# our system has to for every 5mins then we have to update the data what the data is updated we have to send another system
2
4006
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.
3
3108
bsmnconsultancy
by: bsmnconsultancy | last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating effective websites that not only look great but also perform exceptionally well. In this comprehensive...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.