473,320 Members | 1,936 Online
Bytes | Software Development & Data Engineering Community
Post Job

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 473,320 software developers and data experts.

fslurp() critique

Hi, here's an implementation of file slurping in C, like @lines = <FILE> in perl
or $lines = file("/path/to/file") in php. There's also a main() testing it out.
All suggestions are very welcome! Thank you.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEST_MAIN

#define CHUNK 4096

#define SLURP_TEXT 1
#define SLURP_BINARY 2

/* returns NULL terminated array of strings from text file, and array's size */
char **fslurp(const char *fname, int *);

/* copies file to memory, returns size in memory and pointer to beginning */
unsigned char *fslurpb(const char *fname, size_t *size);

/* frees whatever fslurp() returned */
void ffree(char **list);

/* frees whatever fslurpb() returned */
void ffreeb(unsigned char *mem);
static void *slurp(const char *, int, size_t *);
char **fslurp(const char *fname, int *numread)
{
char **retval = 0;
char *mem = 0, *p;
size_t offt = 0;
size_t num_lines = 0, softlim = 1;

mem = slurp(fname, SLURP_TEXT, &offt);

/* got the file, now set up the pointers */
for (p = mem; p < mem + offt; ) {
char *q;

if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}

retval[num_lines++] = p;
if (!(q = memchr(p, '\n', mem + offt - p - 1)))
q = mem + offt;
*q = 0;
p = q + 1;
}

retval[num_lines] = 0; /* we got room for an extra 0 */
goto exit;

nomem:
free(retval);
free(mem);
retval = 0;
exit:
if (numread)
*numread = num_lines;
return retval;
}

unsigned char *fslurpb(const char *fname, size_t *size)
{
return slurp(fname, SLURP_BINARY, size);
}

void ffree(char **list)
{
if (list) {
free(list[0]);
free(list);
}
}

void ffreeb(unsigned char *mem)
{
free(mem);
}

static void *slurp(const char *fname, int mode, size_t *numread)
{
unsigned char *mem = 0;
size_t offt = 0, size = 0;
const char *realmode;
FILE *fp;

switch (mode) {
case SLURP_TEXT:
realmode = "r";
break;
case SLURP_BINARY:
default:
realmode = "rb";
break;
}

if ((fp = fopen(fname, realmode)) == 0)
goto exit;

/* slurp the file in */
for (;;) {
void *p;
size_t read;

if (!(p = realloc(mem, size += CHUNK)))
goto nomem;
mem = p;
read = fread(mem + offt, 1, CHUNK, fp);
offt += read;
if (read < CHUNK)
goto exit;
}

nomem:
free(mem);
mem = 0;
exit:
if (fp)
fclose(fp);
*numread = offt;
return mem;
}

#ifdef TEST_MAIN

/* really lame */
static int is_unix(void)
{
int retval = 0;
FILE *fp = fopen("/bin/sh", "r");
if (fp) {
retval = 1;
fclose(fp);
} else {
retval = (getenv("LOGNAME") != 0);
}
return retval;
}

static void list_dump(char **list)
{
if (!list) {
fputs("(null)", stderr);
} else {
int n;
for (n = 1 ; *list; list++, n++) {
fprintf(stderr, "%3d) %s\n", n, *list);
}
}
}

int main(int argc, char **argv)
{
/* test text slurp */
{
char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini", 0);
list_dump(lines);
ffree(lines);
}

/* test binary slurp */
{
const char *fname = argv[0];
size_t num;
void *mem = fslurpb(fname, &num);
if (!mem) {
char buf[FILENAME_MAX + 32];
sprintf(buf, "%s could not be read", fname);
perror(buf);
} else {
fprintf(stderr, "Read %s into memory, size: %lu\n", fname,
(unsigned long) num);
ffreeb(mem);
}
}

getchar();
return 0;
}
#endif /* TEST_MAIN */

Nov 13 '05 #1
6 1704
"rihad" <ri***@mail.ru> wrote in message
news:b9********************************@4ax.com...
Hi, here's an implementation of file slurping in C, like @lines = <FILE> in perl or $lines = file("/path/to/file") in php. There's also a main() testing it out. All suggestions are very welcome! Thank you.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEST_MAIN

#define CHUNK 4096

#define SLURP_TEXT 1
#define SLURP_BINARY 2

/* returns NULL terminated array of strings from text file, and array's size */ char **fslurp(const char *fname, int *);

/* copies file to memory, returns size in memory and pointer to beginning */ unsigned char *fslurpb(const char *fname, size_t *size);
Why not work with file handles, this will also clean up your code a bit and
is more consistent with likewise functions.

static void *slurp(const char *, int, size_t *);

Prototyping all functions could be nicer, even if you are not
forward-referencing them. This may also be a good place to put your
comments. Of course eventually the other function prototypes will be put in
an .h file.
char **fslurp(const char *fname, int *numread)
{
[code snipped]
if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}
This does appear as unstructured code to me. Altough I have nothing against
goto, IMHO it's unproperly used here.

if (!tmp) {
free(retval);
retval = NULL; /* or (void*)0, or just 0 as you use */
free(mem);
/* *numread = 0; <- if you really want to, but on error
nothing has to be defined as long as
you properly document this */
break;
}

In this case the labels are dropped, as well as the "goto exit;".
retval[num_lines] = 0; /* we got room for an extra 0 */
You would have to precede this with "if ( retval != NULL )" or something
similar.
static void *slurp(const char *fname, int mode, size_t *numread)
{
unsigned char *mem = 0;
size_t offt = 0, size = 0;
const char *realmode;
FILE *fp;

switch (mode) {
case SLURP_TEXT:
realmode = "r";
break;
case SLURP_BINARY:
default:
realmode = "rb";
break;
}
This may be more scalable, but this may be more readable:

if ( mode == SLURP_TEXT )
realmode = "r";
else
realmode = "rb";

Then again, if you'd to work with filehandles, this wouldn't be an issue, as
well as all the error checking and having to close the file (and checking
whether it should be closed).

if ((fp = fopen(fname, realmode)) == 0)
goto exit;
why not:

if ( ... )
return ( NULL );

/* slurp the file in */
for (;;) {
I would use a while loop here, which reads until EOF, saves you another goto
void *p;
size_t read;

if (!(p = realloc(mem, size += CHUNK)))
goto nomem;
Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always enlarge
it if need be.
mem = p;
read = fread(mem + offt, 1, CHUNK, fp);
offt += read;
if (read < CHUNK)
goto exit;
Same thing on the goto as before
}

nomem:
free(mem);
mem = 0;
exit:
if (fp)
fclose(fp);
*numread = offt;
return mem;
}

[code snipped]

int main(int argc, char **argv)
{
/* test text slurp */
{
char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini",

0);

I don't have a boot.ini - MSDOS.SYS should be available on all Windows
systems (in text-format, it's binary on pre Windows, try autoexec.bat on
true DOS systems, it will almost always have some content) - but what about
Mac (pre OS X) users? Why not take it from the command line, like for the
binary slurp?

I think it's a nice initiative. Good luck,

--
Martijn
http://www.sereneconcepts.nl

Nov 13 '05 #2
On Mon, 29 Sep 2003 11:40:30 +0200, "Martijn"
<su*********************@hotNOFILTERmail.com> wrote:
/* copies file to memory, returns size in memory and pointer to beginning*/
unsigned char *fslurpb(const char *fname, size_t *size);


Why not work with file handles, this will also clean up your code a bit and
is more consistent with likewise functions.


OTOH it makes usage a bit simpler, though not as flexible. It's just a matter of
moving things around and extending the interface to allow both options.
char **fslurp(const char *fname, int *numread)
{


[code snipped]
if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}


This does appear as unstructured code to me. Altough I have nothing against
goto, IMHO it's unproperly used here.


I'd have to disagree: IMO when goto labels are constrained to this special usage
(as a function "trailer") they are quite useful and almost always make the code
simpler. Without such "generic" trailers, you would inevitably either have to
duplicate code (or hide it behind a macro) or scatter "special cases" all over
the place and thus making it a tiny bit less scalable, two alternatives I'm
trying to avoid. Besides, I like the idea of a function having a single exit
point.

if (!tmp) {
free(retval);
retval = NULL; /* or (void*)0, or just 0 as you use */
free(mem);
/* *numread = 0; <- if you really want to, but on error
nothing has to be defined as long as
you properly document this */
break;
}

In this case the labels are dropped, as well as the "goto exit;".
Why not make this special treatment generic for the long run? If there were
another error condition, you would still have to do proper cleanup yet again.
retval[num_lines] = 0; /* we got room for an extra 0 */
You would have to precede this with "if ( retval != NULL )" or something
similar.


Ah yes, to protect against an empty file.

/* slurp the file in */
for (;;) {


I would use a while loop here, which reads until EOF, saves you another goto


Yeah, but that would force me move size_t read outside the loop and away into
the outer block. Besides, gotos are strictly following the already-seen usage
pattern.
void *p;
size_t read;

if (!(p = realloc(mem, size += CHUNK)))
goto nomem;


Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always enlarge
it if need be.


I can't find fstat() in my copy of n869.txt. Is it nonstandard?

Nov 13 '05 #3
"rihad" <ri***@mail.ru> wrote in message
news:8n********************************@4ax.com...
I can't find fstat() in my copy of n869.txt. Is it nonstandard?


No it's not standard. Nor are 'file handles'.

-Mike
Nov 13 '05 #4
rihad wrote:
Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always
enlarge it if need be.


I can't find fstat() in my copy of n869.txt. Is it nonstandard?


As far as I know, it's not - it's POSIX if I'm correct, but I have seen
several compilers that support it. Maybe stat is available. Anyway, it is
just a suggestion. You could always use a macro, that is defined as a
function that return the proper value if such a function is available, or as
0 (or CHUNK) if none is available.

--
Martijn
http://www.sereneconcepts.nl
Nov 13 '05 #5
Martijn wrote:

rihad wrote:
Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always
enlarge it if need be.


I can't find fstat() in my copy of n869.txt. Is it nonstandard?


As far as I know, it's not - it's POSIX if I'm correct,


On clc, "standard" means "the C standard".
POSIX is great, but it's off topic here.

--
pete
Nov 13 '05 #6
>>> I can't find fstat() in my copy of n869.txt. Is it nonstandard?

As far as I know, it's not - it's POSIX if I'm correct,
^^^^^^^^^^^^^^^^^^^^^^^^^^
On clc, "standard" means "the C standard".
POSIX is great, but it's off topic here.


I know - I concurred it's not part of the standard, thus rendering it off
topic -, in both mails the bit concerning fstat was only minor. In the
remainder of the last post I tried to offer a portable (and standard
compliant) way.

Well, hopefully _somebody_ benefits from this all...

--
Martijn
http://www.sereneconcepts.nl
Nov 13 '05 #7

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
by: Saqib Ali | last post by:
Hello All, I m not sure if this is the right place to ask for a critique. If not please refer me to another group. Thanks. I would a criqtique of the following website:...
19
by: TC | last post by:
Are there any good sites or forums for a web critique? I went to alt.html.critique and it's pretty dead.
9
by: bowsayge | last post by:
Inspired by fb, Bowsayge decided to write a decimal integer to binary string converter. Perhaps some of the experienced C programmers here can critique it. It allocates probably way too much...
188
by: christopher diggins | last post by:
I have posted a C# critique at http://www.heron-language.com/c-sharp-critique.html. To summarize I bring up the following issues : - unsafe code - attributes - garbage collection -...
39
by: Eric | last post by:
There is a VB.NET critique on the following page: http://www.vb7-critique.741.com/ for those who are interested. Feel free to take a look and share your thoughts. Cheers, Eric. Ps: for those...
0
by: ryjfgjl | last post by:
ExcelToDatabase: batch import excel into database automatically...
0
by: jfyes | last post by:
As a hardware engineer, after seeing that CEIWEI recently released a new tool for Modbus RTU Over TCP/UDP filtering and monitoring, I actively went to its official website to take a look. It turned...
0
by: ArrayDB | last post by:
The error message I've encountered is; ERROR:root:Error generating model response: exception: access violation writing 0x0000000000005140, which seems to be indicative of an access violation...
1
by: PapaRatzi | last post by:
Hello, I am teaching myself MS Access forms design and Visual Basic. I've created a table to capture a list of Top 30 singles and forms to capture new entries. The final step is a form (unbound)...
1
by: CloudSolutions | last post by:
Introduction: For many beginners and individual users, requiring a credit card and email registration may pose a barrier when starting to use cloud servers. However, some cloud server providers now...
1
by: Defcon1945 | last post by:
I'm trying to learn Python using Pycharm but import shutil doesn't work
1
by: Shællîpôpï 09 | last post by:
If u are using a keypad phone, how do u turn on JavaScript, to access features like WhatsApp, Facebook, Instagram....
0
by: af34tf | last post by:
Hi Guys, I have a domain whose name is BytesLimited.com, and I want to sell it. Does anyone know about platforms that allow me to list my domain in auction for free. Thank you
0
by: Faith0G | last post by:
I am starting a new it consulting business and it's been a while since I setup a new website. Is wordpress still the best web based software for hosting a 5 page website? The webpages will be...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.