default schrieb:
Hello All.
I was looking around for a function like strtok() which would tokenize
on
the complete list of delimiters, rather than tokenize on *any* of the
delimiters
in the group. I ended up just rolling a function. Thought I would
post it here
for discussion.
Thanks.
---------------------------------------------------------
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
//
// pass a buffer, a substring, and a current
// position to start looking within the buffer
//
Apart from these comments, your code is C90 or C99.
With these comments, your code is only C99 -- which is rather
unusual and unnecessary.
Even if you intended C99, then // comments are dangerous
for usenet messages because line breaks may change the source's
meaning.
To the beef:
Your comment mostly states the obvious but does not say what
the function does or what it returns
char *mstrtok (char *buf, char *delim, int curpos)
Neither buf nor delim are changed, so make them const char*.
The curpos _after_ searching for the delim string may be
of some interest.
By allowing
int *pCurrpos
or
size_t *pCurrpos
to be adjusted by mstrtok(), you do not have to calculate the
information twice.
{
char *srcp; // src pointer to start searching from
char *ptr; // return val from strstr()
char *freeret; // malloc() this if something to return
int len = 0; // holds length of word found between delims
int malsize = 0; // size of buffer space to malloc (len + 1)
If malsize only ever holds len+1, then it is rather unnecessary.
>
// get a starting point by adding the src addr and curent search
position
Here the line broke and your code became uncompileable.
srcp = buf;
srcp += curpos;
Nice, but why not write
srcp = &buf[curpos];
// make sure src ptr is inside the buffer space
if (srcp <= (buf + strlen (buf)))
Why not use
if (curpos <= strlen(buf))
instead?
{
// find the next delim occurance in the srcp
Useless commment
ptr = strstr (srcp, delim);
// was delim found ?
if (ptr)
{
// adjust by subtracting the source address from the ptr
address
len = (ptr - srcp);
}
else
{
// if not there, then len is the end of string minus the
current src address
Useless comment
printf (" debug.no strstr()\n");
len = (buf + strlen (buf)) - srcp;
why not use
len = strlen(buf) - curpos;
instead?
}
// setup malloc buffer size and make room for NULL 0 at end of
string
malsize = len + 1;
freeret = malloc (malsize);
// did malloc fail?
if (freeret)
{
memset (freeret, 0x0, malsize);
You could calloc() instead or even manually add the string terminator.
strncpy (freeret, srcp, len);
printf (" len|%s %d\n", freeret, len);
}
else
{
// error - malloc failed. should we exit(1) here?
printf ("**error mstrtok(): unable to malloc %d bytes\n",
len);
Error output ought to go to stderr().
}
}
else
{
// did not find another substring
printf (" debug.nosub\n") ;
freeret = NULL;
It is IMO clearer to initialise freeret to NULL at the
beginning.
}
return (freeret);
}
//
// pass a string to the mstrtok function and print out results
//
int main (void)
{
char buf[] = "foo:.:bar:.:b: az:.:STARToiow: :.ii:::eeerrEND ";
char delim[] = ":.:";
If you do not want to change buf or delim, use const char instead.
int cur = 0;
char *mptr;
// can loop forever, look for NULL mptr from mstrtok()
do
{
// call mstrtok with buffer, delimiter and current position
// for looking in the string
mptr = mstrtok (buf, delim, cur);
// was a pointer returned?
if (mptr)
{
// keep track of current pos for next call.
cur += strlen (mptr) + strlen (delim);
// do something with newly malloc'd mptr and then free it
printf ("debug.mptr|%s \n", mptr);
free (mptr);
Note: With free(mptr), the bit pattern in mptr (its representation)
may become a trap representation. Using mptr from here on may have
bad consequences.
}
else
{
// a NULL pointer was returned. no other substrings to find
printf ("debug.mptr|nu ll\n");
}
// debug
printf ("debug.cur|%d\ n", cur);
}
while (mptr != NULL);
Such as here.
>
return (0);
return is not a function, so return 0 suffices.
}
The same programme, using size_t instead of int and containing
the above corrections:
,---
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/*
* Extract string between &buf[curpos] and first occurrence of
* delim or end of string; return extracted string in separately
* malloc()ed storage; return NULL on malloc() failure or curpos
* strlen(buf)
*/
static char *mstrtok (const char *buf,
const char *delim,
size_t curpos)
{
char *freeret = NULL; /* malloc() this if something to return */
size_t buf_strlen = strlen (buf);
/* make sure src ptr is inside the buffer space*/
if (curpos <= buf_strlen)
{
const char *srcp = &buf[curpos];
/* find the next delim occurance in the srcp*/
char *ptr = strstr (srcp, delim);
size_t len = 0; /* holds length of word found between delims */
/* was delim found ?*/
if (NULL != ptr)
{
len = (ptr - srcp);
}
else
{
fprintf (stderr, " debug.no strstr()\n");
len = buf_strlen - curpos;
}
freeret = malloc(len + 1);
if (NULL != freeret)
{
strncpy (freeret, srcp, len);
freeret[len] = '\0';
printf (" len|%s %lu\n", freeret, (unsigned long) len);
}
else
{
/* error - malloc failed. should we exit(1) here?*/
fprintf (stderr, "**error mstrtok():"
" unable to malloc %lu bytes\n",
(unsigned long) len);
}
}
else
{
/* did not find another substring */
fprintf (stderr, " debug.nosub\n") ;
}
return (freeret);
}
/* */
/* pass a string to the mstrtok function and print out results */
/* */
int main (void)
{
const char buf[] = "foo:.:bar:.:b: az:.:STARToiow: :.ii:::eeerrEND ";
const char delim[] = ":.:";
size_t delim_strlen = strlen(delim);
size_t cur = 0;
char *mptr = NULL;
/* can loop forever, look for NULL mptr from mstrtok() */
do
{
free (mptr);
mptr = mstrtok (buf, delim, cur);
/* was a pointer returned? */
if (NULL != mptr)
{
/* keep track of current pos for next call. */
cur += strlen (mptr) + delim_strlen;
/* do something with newly malloc'd mptr and then free it */
printf ("debug.mptr|%s \n", mptr);
}
else
{
/* a NULL pointer was returned. no other substrings to find */
printf ("debug.mptr|nu ll\n");
}
/* debug */
printf ("debug.cur|%lu \n", (unsigned long)cur);
}
while (mptr != NULL);
return (0);
}
`---
Note: free(NULL); is well defined (and has no effect).
Note that if we changed that to
static char *mstrtok (const char *buf,
const char *delim,
size_t *pCurrpos);
then we could change the the do--while loop to
do
{
free (mptr);
mptr = mstrtok (buf, delim, &cur);
/* was a pointer returned? */
if (NULL != mptr)
{
/* do something with newly malloc'd mptr and then free it */
printf ("debug.mptr|%s \n", mptr);
}
....
Tests for corner cases you omitted:
":.:"
":.:.:"
"foo:.::.:b ar"
These are important and should be considered.
Cheers
Michael
--
E-Mail: Mine is an /at/ gmx /dot/ de address.