I just whipped up this code in c# using Regular Expressions.
I found 1037 instances of "Relf" in cola in 15 milliseconds:
#region Using directives
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
#endregion
namespace ConsoleApplication1
{
public class FileClass
{
public static void Main()
{
String s = ReadFromFile("c:\\sample\\Cola.txt");
Regex rex = new Regex("Relf");
DateTime init = DateTime.Now;
int icount = rex.Matches(s).Count; // returns 1
DateTime end = DateTime.Now;
TimeSpan duration = end.Subtract(init);
Console.WriteLine("number of relfs " + icount);
Console.WriteLine(duration.Milliseconds);
}
static String ReadFromFile(string filename)
{
StreamReader SR;
String S;
SR = File.OpenText(filename);
S = SR.ReadToEnd();
SR.Close();
return S;
}
}
}
Jeff_Relf wrote:
Jeff_Relf ( 30, 10.28 P, A Individual.NET, X ),
Hi Kelsey, Oops my last attempt only searched ten thousand words,
This time I'm searching 100 thousand words.
I'm using Cola.TXT as my test file, which is about 4 megs,
and contains over 2,000 of the most recent articles.
My web page is down right now... too much traffic ?
But when it comes up, I hope to put a more recent Cola.TXT here:
( I say, hope, because I'm using NoCharge.COM, free dialup )
http://www.Cotse.NET/users/jeffrelf/Cola.TXT
Only spaces and below ( Ascii ) were considered whitespace.
Here were my latest results:
Searching the first one hundred thousand words in Cola.TXT.
.00167 Seconds to find 12 occurences of Relf .
.00089 Seconds to find 0 occurences of sesquipedalian .
My machine is very slow too, the L2 cache is only 128 KB,
with 256 MB of DRAM and a 40 gig hard disk.
When my web page comes up I'll put the .EXE and .CPP files here:
http://www.Cotse.NET/users/jeffrelf/Search.EXE
http://www.Cotse.NET/users/jeffrelf/Search.CPP
Not including my standard macros, but including the timing code,
this was my main(): <<
main() {
QueryPerformanceFrequency( ( Quad * ) & Secnd ); Secnd_Dub = Secnd ;
FILE * fp = fopen( "Cola.TXT", "r" );
if ( ! fp ) { printf("_ Word.TXT _ Can't be Read." ); return ; }
int Sz = filelength( fileno( fp ) ) + 1 ; LnT Ln = { 0 };
{ LnP B = ( LnP ) malloc( Sz ); Sz = fread( B, 1, Sz, fp );
fclose( fp ); fp = fopen( "Results.TXT", "w" );
B [ Sz ] = 0 ; LnP P = B ; int Ch = * P --, Ch2 ;
GoToDark
Loop( 100000 ) { Inc ( Ln ) = P ; if ( ! Ch ) break ; Next_Tok }
if ( J < LLL ) {
fprintf( fp, "Found %d thousand words, needed 100.", J / 1000 );
fclose( fp ); return ; } }
fprintf( fp
, "Searching the first one hundred thousand words in Cola.TXT.\n\n");
LnP Pats [] = { "Relf", "sesquipedalian" };
Loop( sizeof Pats / sizeof * Pats ) { LnP Pat = Pats [ J ];
double Mark = Secs ;
int Cnt = 0 ; LoopXx ( Ln ) if ( Eq( P, Pat ) ) Cnt ++ ;
double Dur = Secs - Mark ;
char SecStr [ 99 ] ; sprintf( SecStr, "%1.5f" , Dur );
fprintf( fp, "%s Seconds to find %d occurences of %s .\n"
, SecStr + ( * SecStr == '0' ), Cnt, Pat ); } fclose( fp ); } >>