Joerg Schoen wrote:
spike wrote:
Im writing a program to search for a string in a binary file.
And it works. The problem is: It is sooo slow! how can i make
it faster? It takes 27 seconds just to search a 5 meg file.
I guess it has something to do with the strequal() function...
First of all: Your strequal function should be named differently,
since the standard reserves any name with "str..." for itself.
Secondly, it is basically a slow implementation of "strcmp"
.... snip ... fgetpos(fp, &pos);
pos = pos-(STRSIZE-1);
fsetpos(fp, &pos);
That is what really kills you! Read 4 bytes, check them, rewind 3
bytes and start again.
Find something better here!
Here's a revision to what I published to the OP's original
request. He must have not seen that. Some slight changes have
been made to ensure that we can have binary file access, which is
not necessarily possible via stdin. This never backtracks.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
/* The difference between a binary and a text file, on read,
is the conversion of end-of-line delimiters. What those
delimiters are does not affect the action. There is also
a potential problem with 0x1a EOF markers in DOS/Windoze.
This is a version of Knuth-Morris-Pratt algorithm. The
point of using this is to avoid any backtracking in file
reading, and thus avoiding any use of buffer arrays.
*/
size_t chrcount; /* debuggery, count of input chars, zeroed */
/* --------------------- */
/* Almost straight out of Sedgewick */
/* The next array indicates what index in id should next be
compared to the current char. Once the (lgh - 1)th char
has been successfully compared, the id has been found.
The array is formed by comparing id to itself. */
void initnext(int *next, const char *id, int lgh)
{
int i, j;
assert(lgh > 0);
next[0] = -1; i = 0; j = -1;
while (i < lgh) {
while ((j >= 0) && (id[i] != id[j])) j = next[j];
i++; j++;
next[i] = j;
}
#if (0)
for (i = 0; i < lgh; i++)
printf("id[%d] = '%c' next[%d] = %d\n",
i, id[i], i, next[i]);
#endif
} /* initnext */
/* --------------------- */
/* reads f without rewinding until either EOF or *marker
has been found. Returns EOF if not found. At exit the
last matching char has been read, and no further. */
int kmpffind(const char *marker, int lgh, int *next, FILE *f)
{
int j; /* char position in marker to check */
int ch; /* current char */
assert(lgh > 0);
j = 0;
while ((j < lgh) && (EOF != (ch = getc(f)))) {
chrcount++;
while ((j >= 0) && (ch != marker[j])) j = next[j];
j++;
}
return ch;
}
/* --------------------- */
int binfsrch(const char *marker, FILE *f)
{
int *next;
int lgh;
int ch;
int items; /* count of markers found */
if (!(next = malloc(strlen(marker) * sizeof *next))) {
puts("No memory");
exit(EXIT_FAILURE);
}
else {
lgh = strlen(marker);
initnext(next, marker, lgh);
items = 0;
while (EOF != kmpffind(marker, lgh, next, f)) {
/* found it, dump the following printable chars */
items++;
printf("%d %s : \"", items, marker);
while (isprint(ch = getc(f))) {
chrcount++;
putchar(ch);
}
puts("\"");
if (EOF == ch) break;
else chrcount++;
}
return items;
}
} /* binfsrch */
/* --------------------- */
int main(int argc, char **argv)
{
FILE *f;
f = stdin;
if (3 == argc) {
if (!(f = fopen(argv[2], "rb"))) {
printf("Can't open %s\n", argv[2]);
exit(EXIT_FAILURE);
}
argc--;
}
if (2 != argc) {
puts("Usage: binfsrch name [binaryfile]");
puts(" (file defaults to stdin text mode)");
}
else if (binfsrch(argv[1], f)) {
printf("\"%s\" : found\n", argv[1]);
}
else printf("\"%s\" : not found\n", argv[1]);
printf("%lu chars\n", (unsigned long)chrcount);
return 0;
} /* main binfsrch */
--
Chuck F (cb********@yahoo.com) (cb********@worldnet.att.net)
Available for consulting/temporary embedded and systems.
<http://cbfalconer.home.att.net> USE worldnet address!