By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
454,499 Members | 1,751 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 454,499 IT Pros & Developers. It's quick & easy.

C program to count occurences of substrings in strings

P: n/a
JD
Hi guys

I'm trying to write a program that counts the occurrences of HTML tags
in a text file. This is what I have so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MB 1048576

int CountString(char *, char *);

int main(int argc, char **argv)
{
char buf[MB];
FILE *f;
char *name;
char *p;
int lines;
int count[6] = {0, 0, 0, 0, 0, 0};
int i;

if (argc == 1) {
printf("You need to specify a file on the command line\n");
return 0;
}

name = argv[1];

if ((f = fopen(name, "r")) == NULL) {
printf("Couldn't open '%s' for reading!\n", name);
return 1;
}

lines = 0;
i = 0;

while(fgets(buf, MB, f) != NULL) {

lines++;
if ((p = strrchr(buf, '\n')) != NULL) { *p = '\0'; }
/* printf("%s\n", buf); */

count[0] += CountString(buf, "<table");
count[1] += CountString(buf, "</table>");
count[2] += CountString(buf, "<tr");
count[3] += CountString(buf, "</tr>");
count[4] += CountString(buf, "<td");
count[5] += CountString(buf, "</td>");
}

for (i = 0; i < 6; i++) {
printf("count[%d] = %d\n", i, count[i]);
}

fclose(f);
return 0;
}

int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}

return count;
}

If I run it on this test page:
<html>
<head>
<title>Test</title>
</head>
<body>

<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
</table>
</body>
</html>

It gives:

count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0

Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?

Thanks
Nov 15 '05 #1
Share this Question
Share on Google+
1 Reply


P: n/a
JD wrote:
int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
p++;
}

return count;
}

If I run it on this test page:
<html>
<head>
<title>Test</title>
</head>
<body>

<table width="100%" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
</table>
</body>
</html>

It gives:

count[0] = 59
count[1] = 1
count[2] = 0
count[3] = 0
count[4] = 0
count[5] = 0

Which is clearly not correct. Can anyone give me any pointers as to what
I'm doing wrong?

Thanks


You might want to try:

int CountString(char *buf, char *str)
{
int length = strlen(str);
char *p = buf;
int count = 0;

while (strlen(p) >= length) {
if (strncmp(p, str, length) == 0) { count++; }
p++;
}

return count;
}

You only advanced 'p', 'buf' remains at the same location in the
string.
I personally prefer:

while (strlen(buf) >= length) {
if (strncmp(buf, str, length) == 0) { count++; }
buf++;
}

The 'p' pointer is not strictly necessary.

Nov 15 '05 #2

This discussion thread is closed

Replies have been disabled for this discussion.