Hi, here's an implementation of file slurping in C, like @lines = <FILE> in perl
or $lines = file("/path/to/file") in php. There's also a main() testing it out.
All suggestions are very welcome! Thank you.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEST_MAIN
#define CHUNK 4096
#define SLURP_TEXT 1
#define SLURP_BINARY 2
/* returns NULL terminated array of strings from text file, and array's size */
char **fslurp(const char *fname, int *);
/* copies file to memory, returns size in memory and pointer to beginning */
unsigned char *fslurpb(const char *fname, size_t *size);
/* frees whatever fslurp() returned */
void ffree(char **list);
/* frees whatever fslurpb() returned */
void ffreeb(unsigned char *mem);
static void *slurp(const char *, int, size_t *);
char **fslurp(const char *fname, int *numread)
{
char **retval = 0;
char *mem = 0, *p;
size_t offt = 0;
size_t num_lines = 0, softlim = 1;
mem = slurp(fname, SLURP_TEXT, &offt);
/* got the file, now set up the pointers */
for (p = mem; p < mem + offt; ) {
char *q;
if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}
retval[num_lines++] = p;
if (!(q = memchr(p, '\n', mem + offt - p - 1)))
q = mem + offt;
*q = 0;
p = q + 1;
}
retval[num_lines] = 0; /* we got room for an extra 0 */
goto exit;
nomem:
free(retval);
free(mem);
retval = 0;
exit:
if (numread)
*numread = num_lines;
return retval;
}
unsigned char *fslurpb(const char *fname, size_t *size)
{
return slurp(fname, SLURP_BINARY, size);
}
void ffree(char **list)
{
if (list) {
free(list[0]);
free(list);
}
}
void ffreeb(unsigned char *mem)
{
free(mem);
}
static void *slurp(const char *fname, int mode, size_t *numread)
{
unsigned char *mem = 0;
size_t offt = 0, size = 0;
const char *realmode;
FILE *fp;
switch (mode) {
case SLURP_TEXT:
realmode = "r";
break;
case SLURP_BINARY:
default:
realmode = "rb";
break;
}
if ((fp = fopen(fname, realmode)) == 0)
goto exit;
/* slurp the file in */
for (;;) {
void *p;
size_t read;
if (!(p = realloc(mem, size += CHUNK)))
goto nomem;
mem = p;
read = fread(mem + offt, 1, CHUNK, fp);
offt += read;
if (read < CHUNK)
goto exit;
}
nomem:
free(mem);
mem = 0;
exit:
if (fp)
fclose(fp);
*numread = offt;
return mem;
}
#ifdef TEST_MAIN
/* really lame */
static int is_unix(void)
{
int retval = 0;
FILE *fp = fopen("/bin/sh", "r");
if (fp) {
retval = 1;
fclose(fp);
} else {
retval = (getenv("LOGNAME") != 0);
}
return retval;
}
static void list_dump(char **list)
{
if (!list) {
fputs("(null)", stderr);
} else {
int n;
for (n = 1 ; *list; list++, n++) {
fprintf(stderr, "%3d) %s\n", n, *list);
}
}
}
int main(int argc, char **argv)
{
/* test text slurp */
{
char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini", 0);
list_dump(lines);
ffree(lines);
}
/* test binary slurp */
{
const char *fname = argv[0];
size_t num;
void *mem = fslurpb(fname, &num);
if (!mem) {
char buf[FILENAME_MAX + 32];
sprintf(buf, "%s could not be read", fname);
perror(buf);
} else {
fprintf(stderr, "Read %s into memory, size: %lu\n", fname,
(unsigned long) num);
ffreeb(mem);
}
}
getchar();
return 0;
}
#endif /* TEST_MAIN */ 6 1704
"rihad" <ri***@mail.ru> wrote in message
news:b9********************************@4ax.com... Hi, here's an implementation of file slurping in C, like @lines = <FILE>
in perl or $lines = file("/path/to/file") in php. There's also a main() testing it
out. All suggestions are very welcome! Thank you.
#include <stdio.h> #include <stdlib.h> #include <string.h>
#define TEST_MAIN
#define CHUNK 4096
#define SLURP_TEXT 1 #define SLURP_BINARY 2
/* returns NULL terminated array of strings from text file, and array's
size */ char **fslurp(const char *fname, int *);
/* copies file to memory, returns size in memory and pointer to beginning
*/ unsigned char *fslurpb(const char *fname, size_t *size);
Why not work with file handles, this will also clean up your code a bit and
is more consistent with likewise functions. static void *slurp(const char *, int, size_t *);
Prototyping all functions could be nicer, even if you are not
forward-referencing them. This may also be a good place to put your
comments. Of course eventually the other function prototypes will be put in
an .h file.
char **fslurp(const char *fname, int *numread) {
[code snipped]
if (num_lines + 1 == softlim) { char **tmp = realloc(retval, (softlim += 5) * sizeof *retval); if (!tmp) goto nomem; retval = tmp; }
This does appear as unstructured code to me. Altough I have nothing against
goto, IMHO it's unproperly used here.
if (!tmp) {
free(retval);
retval = NULL; /* or (void*)0, or just 0 as you use */
free(mem);
/* *numread = 0; <- if you really want to, but on error
nothing has to be defined as long as
you properly document this */
break;
}
In this case the labels are dropped, as well as the "goto exit;".
retval[num_lines] = 0; /* we got room for an extra 0 */
You would have to precede this with "if ( retval != NULL )" or something
similar.
static void *slurp(const char *fname, int mode, size_t *numread) { unsigned char *mem = 0; size_t offt = 0, size = 0; const char *realmode; FILE *fp;
switch (mode) { case SLURP_TEXT: realmode = "r"; break; case SLURP_BINARY: default: realmode = "rb"; break; }
This may be more scalable, but this may be more readable:
if ( mode == SLURP_TEXT )
realmode = "r";
else
realmode = "rb";
Then again, if you'd to work with filehandles, this wouldn't be an issue, as
well as all the error checking and having to close the file (and checking
whether it should be closed). if ((fp = fopen(fname, realmode)) == 0) goto exit;
why not:
if ( ... )
return ( NULL ); /* slurp the file in */ for (;;) {
I would use a while loop here, which reads until EOF, saves you another goto
void *p; size_t read;
if (!(p = realloc(mem, size += CHUNK))) goto nomem;
Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always enlarge
it if need be.
mem = p; read = fread(mem + offt, 1, CHUNK, fp); offt += read; if (read < CHUNK) goto exit;
Same thing on the goto as before
}
nomem: free(mem); mem = 0; exit: if (fp) fclose(fp); *numread = offt; return mem; }
[code snipped] int main(int argc, char **argv) { /* test text slurp */ { char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini",
0);
I don't have a boot.ini - MSDOS.SYS should be available on all Windows
systems (in text-format, it's binary on pre Windows, try autoexec.bat on
true DOS systems, it will almost always have some content) - but what about
Mac (pre OS X) users? Why not take it from the command line, like for the
binary slurp?
I think it's a nice initiative. Good luck,
--
Martijn
http://www.sereneconcepts.nl
On Mon, 29 Sep 2003 11:40:30 +0200, "Martijn"
<su*********************@hotNOFILTERmail.com> wrote: /* copies file to memory, returns size in memory and pointer to beginning*/ unsigned char *fslurpb(const char *fname, size_t *size);
Why not work with file handles, this will also clean up your code a bit and is more consistent with likewise functions.
OTOH it makes usage a bit simpler, though not as flexible. It's just a matter of
moving things around and extending the interface to allow both options. char **fslurp(const char *fname, int *numread) {
[code snipped]
if (num_lines + 1 == softlim) { char **tmp = realloc(retval, (softlim += 5) * sizeof *retval); if (!tmp) goto nomem; retval = tmp; }
This does appear as unstructured code to me. Altough I have nothing against goto, IMHO it's unproperly used here.
I'd have to disagree: IMO when goto labels are constrained to this special usage
(as a function "trailer") they are quite useful and almost always make the code
simpler. Without such "generic" trailers, you would inevitably either have to
duplicate code (or hide it behind a macro) or scatter "special cases" all over
the place and thus making it a tiny bit less scalable, two alternatives I'm
trying to avoid. Besides, I like the idea of a function having a single exit
point.
if (!tmp) { free(retval); retval = NULL; /* or (void*)0, or just 0 as you use */ free(mem); /* *numread = 0; <- if you really want to, but on error nothing has to be defined as long as you properly document this */ break; }
In this case the labels are dropped, as well as the "goto exit;".
Why not make this special treatment generic for the long run? If there were
another error condition, you would still have to do proper cleanup yet again. retval[num_lines] = 0; /* we got room for an extra 0 */ You would have to precede this with "if ( retval != NULL )" or something similar.
Ah yes, to protect against an empty file. /* slurp the file in */ for (;;) {
I would use a while loop here, which reads until EOF, saves you another goto
Yeah, but that would force me move size_t read outside the loop and away into
the outer block. Besides, gotos are strictly following the already-seen usage
pattern. void *p; size_t read;
if (!(p = realloc(mem, size += CHUNK))) goto nomem;
Why don't you check the filesize (fstat is available for quite some compilers), so you can make an educated guess on the size and always enlarge it if need be.
I can't find fstat() in my copy of n869.txt. Is it nonstandard?
"rihad" <ri***@mail.ru> wrote in message
news:8n********************************@4ax.com... I can't find fstat() in my copy of n869.txt. Is it nonstandard?
No it's not standard. Nor are 'file handles'.
-Mike
rihad wrote: Why don't you check the filesize (fstat is available for quite some compilers), so you can make an educated guess on the size and always enlarge it if need be.
I can't find fstat() in my copy of n869.txt. Is it nonstandard?
As far as I know, it's not - it's POSIX if I'm correct, but I have seen
several compilers that support it. Maybe stat is available. Anyway, it is
just a suggestion. You could always use a macro, that is defined as a
function that return the proper value if such a function is available, or as
0 (or CHUNK) if none is available.
--
Martijn http://www.sereneconcepts.nl
Martijn wrote: rihad wrote: Why don't you check the filesize (fstat is available for quite some compilers), so you can make an educated guess on the size and always enlarge it if need be.
I can't find fstat() in my copy of n869.txt. Is it nonstandard?
As far as I know, it's not - it's POSIX if I'm correct,
On clc, "standard" means "the C standard".
POSIX is great, but it's off topic here.
--
pete
>>> I can't find fstat() in my copy of n869.txt. Is it nonstandard? As far as I know, it's not - it's POSIX if I'm correct,
^^^^^^^^^^^^^^^^^^^^^^^^^^
On clc, "standard" means "the C standard". POSIX is great, but it's off topic here.
I know - I concurred it's not part of the standard, thus rendering it off
topic -, in both mails the bit concerning fstat was only minor. In the
remainder of the last post I tried to offer a portable (and standard
compliant) way.
Well, hopefully _somebody_ benefits from this all...
--
Martijn http://www.sereneconcepts.nl This thread has been closed and replies have been disabled. Please start a new discussion. Similar topics
by: Saqib Ali |
last post by:
Hello All,
I m not sure if this is the right place to ask for a critique. If not
please refer me to another group. Thanks.
I would a criqtique of the following website:...
|
by: TC |
last post by:
Are there any good sites or forums for a web critique? I went to
alt.html.critique and it's pretty dead.
|
by: bowsayge |
last post by:
Inspired by fb, Bowsayge decided to write a decimal integer
to binary string converter. Perhaps some of the experienced
C programmers here can critique it. It allocates probably
way too much...
|
by: christopher diggins |
last post by:
I have posted a C# critique at
http://www.heron-language.com/c-sharp-critique.html. To summarize I bring up
the following issues :
- unsafe code
- attributes
- garbage collection
-...
|
by: Eric |
last post by:
There is a VB.NET critique on the following page:
http://www.vb7-critique.741.com/
for those who are interested. Feel free to take a look and share your
thoughts.
Cheers, Eric.
Ps: for those...
|
by: ryjfgjl |
last post by:
ExcelToDatabase: batch import excel into database automatically...
|
by: jfyes |
last post by:
As a hardware engineer, after seeing that CEIWEI recently released a new tool for Modbus RTU Over TCP/UDP filtering and monitoring, I actively went to its official website to take a look. It turned...
|
by: ArrayDB |
last post by:
The error message I've encountered is; ERROR:root:Error generating model response: exception: access violation writing 0x0000000000005140, which seems to be indicative of an access violation...
|
by: PapaRatzi |
last post by:
Hello,
I am teaching myself MS Access forms design and Visual Basic. I've created a table to capture a list of Top 30 singles and forms to capture new entries. The final step is a form (unbound)...
|
by: CloudSolutions |
last post by:
Introduction:
For many beginners and individual users, requiring a credit card and email registration may pose a barrier when starting to use cloud servers. However, some cloud server providers now...
|
by: Defcon1945 |
last post by:
I'm trying to learn Python using Pycharm but import shutil doesn't work
|
by: Shællîpôpï 09 |
last post by:
If u are using a keypad phone, how do u turn on JavaScript, to access features like WhatsApp, Facebook, Instagram....
|
by: af34tf |
last post by:
Hi Guys, I have a domain whose name is BytesLimited.com, and I want to sell it. Does anyone know about platforms that allow me to list my domain in auction for free. Thank you
|
by: Faith0G |
last post by:
I am starting a new it consulting business and it's been a while since I setup a new website. Is wordpress still the best web based software for hosting a 5 page website? The webpages will be...
| |