473,780 Members | 2,243 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

word count problem

3 New Member
can anyone help me add a function that will count the occurance of each word in an input file. here's the code i have so far it counts the number of characters, words, and lines but i need the occurance of each word.

Expand|Select|Wrap|Line Numbers
  1. #include <fstream>  
  2. #include <iostream> 
  3. #include <string>   
  4. #include <cstdlib> 
  5.  
  6. using namespace std; 
  7.  
  8.  
  9.  
  10. string getInputFileName(); // a function to prompt for the complete file name
  11.  
  12. int numCharsInFile( ifstream &in, int &numLines ); // a function to count the
  13.                                                    //    number of characters and
  14.                                                    //    lines in a text file
  15.  
  16. int numWordsInFile( ifstream &in, int &numWords ); // a function to count words in file
  17.  
  18.  
  19.  
  20.  main ()
  21. {
  22.  
  23.     char c;
  24.   int nLines,          // number of lines in the text file
  25.       nChars,          // number of characters in the text file
  26.       avgCharsPerLine, // average number of characters per line
  27.       nWords;          // number of words in the text file
  28.  
  29.  
  30.   ifstream inFile; // handle for the input text file
  31.  
  32.   string fileName; // complete file name including the path
  33.  
  34.   fileName = getInputFileName(); // prompt and obtain the full file name
  35.  
  36.   inFile.open(fileName.c_str()); // try to open the file
  37.  
  38.   if( !inFile.is_open() )    // test for unsuccessfull file opening
  39.    {
  40.      cerr << "Cannot open file: " << fileName << endl << endl;
  41.      exit (0);
  42.    }
  43.  
  44.  
  45.   nChars = numCharsInFile( inFile, nLines ); // determine the number of lines
  46.                                             //    and characters in the file
  47.   nWords = numWordsInFile( inFile, nWords); // determine the number of words
  48.  
  49.   avgCharsPerLine = nChars / nLines;
  50.  
  51.  
  52.   cout << "The number of characters in the file: " << fileName
  53.        << " is = " << nChars << endl << endl;
  54.  
  55.   cout << "The number of lines in the file: " << fileName
  56.        << " is = " << nLines << endl << endl;
  57.  
  58.  
  59.   cout << "The number of Words in the file: " << fileName
  60.        << " is = " << nWords << endl << endl;
  61.  
  62.   cout << "The average number of characters per line in the text file: "
  63.        << fileName << " is: " << avgCharsPerLine << endl << endl;
  64.     cin>>c;
  65.   inFile.close(); // close the input file
  66.  
  67. }
  68.  
  69.  
  70.  
  71. string getInputFileName()
  72.  {
  73.    string fName; // fully qualified name of the file
  74.  
  75.    cout << "Please enter the fully qualified name of the " << endl
  76.         << "input text file (i.e. including the path): ";
  77.    cin >> fName; // cannot handle blanks in a file name or path
  78.    cout << endl; 
  79.  
  80.    return fName;
  81.  }
  82.  
  83.  
  84.  
  85.  
  86.  
  87. int numCharsInFile( ifstream &in, int &numLines )
  88.  {
  89.    int numChars = 0; 
  90.  
  91.    char ch; // character holder;
  92.  
  93.    numLines = 0; // initialize the number of lines to zero
  94.  
  95.    while ( in.get(ch) ) // get the next character from the file
  96.                         //   the function get will also get whitespace
  97.                         //   i.e. blanks, tabs and end of line characters
  98.     {
  99.      if (ch != ' ' )
  100.      {
  101.        if(ch != '\n')
  102.        numChars++;// increase the count of characters by one if ch is NOT '\n' AND NOT a blank space
  103.        else
  104.        {
  105.        numLines++;     // increase the count of lines by one if ch IS '\n'
  106.        }
  107.      } 
  108.     }
  109.     numLines += 1; // for some reason it needs to add one and the results are correct
  110.    return numChars; 
  111.  }
  112.  
  113.  
  114.  
  115.  
  116. int numWordsInFile( ifstream &in, int &nWords)
  117.  {
  118.     in.clear();
  119.  
  120.     in.seekg(0, ios_base::beg); 
  121.  
  122.     int numWords = 0 ; 
  123.  
  124.    char ch; 
  125.  
  126.  
  127.    while (in.get(ch)) 
  128.    {      
  129.  
  130.     if ( ch == ' ' || ch == '\n' || ch == '\t' ) 
  131.        numWords++;    
  132.  
  133.  
  134.     }
  135.  
  136.    return numWords+1; 
  137.  }
Nov 5 '07 #1
3 2710
scruggsy
147 New Member
can anyone help me add a function that will count the occurance of each word in an input file. here's the code i have so far it counts the number of characters, words, and lines but i need the occurance of each word.
I'm not going to write the code, but think about it: If you're going to count the occurrence of each distinct word, you'll need to remember those words. So as you read words in, you'll need to store them so that subsequent words can be compared to them. How you store them is up to you, as is how you compare them. STL containers can be a big help there. Take a look at std::set if you're not familiar with it; it's a container which can't hold duplicate elements, which lets you easily determine if a word occurs more than once in the file. Another good way to do this might be to just store each word as it is written, then sort the words in alphabetical order: recurring words will appear next to each other, making it easy to count them.
Nov 5 '07 #2
Laharl
849 Recognized Expert Contributor
std::map would probably be better than std::set, since that way you can map strings (words) to integers (frequency counts).
Nov 5 '07 #3
weaknessforcats
9,208 Recognized Expert Moderator Expert
Also keep in mind that the >> operator stops on whitespace. You can fetch one word by:
Expand|Select|Wrap|Line Numbers
  1. sting str;
  2. fileName >> str;
  3.  
Also, you are not required to declare your variables at the beginning of each function. It looks like you have a C background and are just starting out on C++.
Nov 6 '07 #4

Sign in to post your reply or Sign up for a free account.

Similar topics

5
13573
by: The Roys | last post by:
Hi Im doing something wrong in quitting the Word.Application in my VB program. I have General Declarations Dim AppWord As Word.Application Form_Load() Set AppWord = CreateObject("Word.Application")
5
7652
by: jester.dev | last post by:
Hello, I'm learning Python from Python Bible, and having some problems with this code below. When I run it, I get nothing. It should open the file poem.txt (which exists in the current directory) and count number of times any given word appears in the text. #!/usr/bin/python
3
8819
by: agent mike | last post by:
I am trying to count words in a text file. I am using the following code: in_stream.get(c); if(c == ' ' || c == '.' || c == ',') word_count++; and the word count is too low. If I include " .... || c == '\n' the word count is too high as it counts returns of blank lines as a word.
5
12872
by: STeve | last post by:
Hey guys, I currently have a 100 page word document filled with various "articles". These articles are delimited by the Style of the text (IE. Heading 1 for the various titles) These articles will then be converted into HTML and saved. I want to write a parser through vb.net that uses the word object model and was wondering how this could be achieved? The problem i am running into is that i can not test whether the selected text is...
2
13534
by: Mikey | last post by:
Sample VB .NET source code to create mailing labels or customized letters using MS Word MailMerge This VB .NET source code will start MS Word and call methods and set properties in MS Word to execute a MailMerge to create mailing labels or customized letters. A label name known to MS Word MailMerge mailing label wizard may be used or a template file containing the field names Line1 thru Line5 for each record to be printed. If a...
1
7652
by: Adam Faulkner via DotNetMonster.com | last post by:
I had a problem before extracting pages from an existing word document and then inserting the content into a new word document. The following code below works with Microsoft Word 2000 Function ParseWordDoc(ByVal Filename As String) As String Dim sNewFileName As String Dim WordApp As Word.Application = New Word.Application Dim BaseDoc As Word.Document Dim DestDoc As Word.Document
1
4217
by: vmoreau | last post by:
I have a text and I need to find a Word that are not enclosed in paranthesis. Can it be done with a regex? Is someone could help me? I am not familar with regex... Example looking for WORD: (there is a WORD in ( my string WORD )) and * WORD * to (find WORD) and * WORD * Should give me the to word between star (star ar not part of string)
4
12442
by: etuncer | last post by:
Hello All, I have Access 2003, and am trying to build a database for my small company. I want to be able to create a word document based on the data entered through a form. the real question is this: can Access create the document and place it as an OLE object to the relevant table? Any help is greatly appreciated. Ricky
2
4965
by: beanie | last post by:
i am a beginer in c programming and i am trying to Create a Concordance of Word Count for a Text File but my code is not working.pls can anyone helpme out.here is my code: #include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> struct word { struct word *left; /* tree to the left */ struct word *right; /* tree to the right */ char *WORD;
6
3391
by: boyindie86 | last post by:
Hi I have been fighting with this lump of code for the last week what I am trying to do is that I am passing words into passages of texts, and I want the system to go and find exact word matches only and place square brackets around them e.g word = car PASSAGE: the red car required a lot of care to prevent a scar I only want it to place a square bracket around the word car and ignore the car in "Care" and "scar"
0
10306
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
0
10139
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven tapestry of website design and digital marketing. It's not merely about having a website; it's about crafting an immersive digital experience that captivates audiences and drives business growth. The Art of Business Website Design Your website is...
1
10075
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
0
8961
agi2029
by: agi2029 | last post by:
Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development project—planning, coding, testing, and deployment—without human intervention. Imagine an AI that can take a project description, break it down, write the code, debug it, and then launch it, all on its own.... Now, this would greatly impact the work of software developers. The idea...
1
7485
isladogs
by: isladogs | last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new presenter, Adolph Dupré who will be discussing some powerful techniques for using class modules. He will explain when you may want to use classes instead of User Defined Types (UDT). For example, to manage the data in unbound forms. Adolph will...
0
6727
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
1
4037
by: 6302768590 | last post by:
Hai team i want code for transfer the data from one system to another through IP address by using C# our system has to for every 5mins then we have to update the data what the data is updated we have to send another system
2
3632
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.
3
2869
bsmnconsultancy
by: bsmnconsultancy | last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating effective websites that not only look great but also perform exceptionally well. In this comprehensive...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.