By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
454,280 Members | 1,601 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 454,280 IT Pros & Developers. It's quick & easy.

Perl code for parsing a text file and output a text file

P: 18
Hi,
This code is in Perl (just a trial, not tested) to parse a text file and output to another file.
It is used to delete lines that are not required and output lines that the user wants, to a new file.

The problems are:
1 This script is giving blank lines in the new file.
2 I have to remove a whole delimitter (#if FOR_LAB) from the text file.
3.The tool should be able to parse the file, and remove FOR_LAB’ed code, with out affecting the surrounding code, i.e.
I have to remove whole of the sequence of lines:

Expand|Select|Wrap|Line Numbers
  1. #if FOR_LAB
  2.    DWORD dwSize = file.getFileSize();
  3.    DWORD dwMinFileSize = sizeof(CVTableCfgFileHeader) +
  4.             sizeof(CVTableCfgFileFracHdr) + sizeof(CVTableCfgFileSigDesc);
  5.       if (dwSize < dwMinFileSize)
  6.       {
  7.          Error.Log(__FILE__, "CVT config file %s is too short.", pszFileName);
  8.          return false;     // need at least one signal description to be useful
  9.       }
  10. #endif
not just the first 1..



Code:

Expand|Select|Wrap|Line Numbers
  1. #!/usr/local/bin/perl
  2.  
  3. #program to read cvtablebuild.cpp
  4. #and write to cvtablebuild_final.cpp
  5. #
  6.  
  7. $file = '/users/aj/files/cvtablebuild.cpp';
  8. open(INFO, $file);                                   #opens file cvtablebuild.cpp
  9. open(DATA, ">cvtablebuild_final.cpp");     #file to write data to
  10. @lines = <INFO>;                                  #assigns lines to array
  11.  
  12. foreach $line (@lines) #go through each line in file
  13. {
  14.     if ($line ^#if FOR_LAB)
  15.     {
  16.         $line = ~ s/$line//;
  17.     }
  18.  
  19.     DATA == $line;
  20.     print DATA "\n";
  21. }
  22.  
  23. close(INFO);                                             #closes file
  24. close(DATA);
Thankq in advance, for any kind of help... :):)


Regards,
Anubhav Jhamb
Feb 21 '08 #1
Share this Question
Share on Google+
13 Replies


P: 89
Expand|Select|Wrap|Line Numbers
  1. #!/usr/bin/perl
  2.  
  3. $file = 'read';
  4. $wrt = 'write';
  5. open(info, $file);
  6. open(data, ">>$wrt");
  7.  
  8. @lines = <info>;
  9.  
  10. foreach $lin (@lines){
  11. #...................................correct the search condition
  12. if($lin =~ /^#FOR_LAB/){
  13. #..................................make the line blank
  14. $lin = "";
  15. }
  16.  
  17. print data "$lin";
  18. }

regards
rohit
Feb 21 '08 #2

nithinpes
Expert 100+
P: 410
Hi,
This code is in Perl (just a trial, not tested) to parse a text file and output to another file.
It is used to delete lines that are not required and output lines that the user wants, to a new file.
The problems are:
1 This script is giving blank lines in the new file.
2 I have to remove a whole delimitter (#if FOR_LAB) from the text file.
3.The tool should be able to parse the file, and remove FOR_LAB’ed code, with out affecting the surrounding code, i.e.
I have to remove whole of the sequence of lines:

#if FOR_LAB
DWORD dwSize = file.getFileSize();
DWORD dwMinFileSize = sizeof(CVTableCfgFileHeader) +
sizeof(CVTableCfgFileFracHdr) + sizeof(CVTableCfgFileSigDesc);
if (dwSize < dwMinFileSize)
{
Error.Log(__FILE__, "CVT config file %s is too short.", pszFileName);
return false; // need at least one signal description to be useful
}
#endif
not just the first 1..



Code:

#!/usr/local/bin/perl

#program to read cvtablebuild.cpp
#and write to cvtablebuild_final.cpp
#

$file = '/users/aj/files/cvtablebuild.cpp';
open(INFO, $file); #opens file cvtablebuild.cpp
open(DATA, ">cvtablebuild_final.cpp"); #file to write data to
@lines = <INFO>; #assigns lines to array

foreach $line (@lines) #go through each line in file
{
if ($line ^#if FOR_LAB)
{
$line = ~ s/$line//;
}

DATA == $line;
print DATA "\n";
}

close(INFO); #closes file
close(DATA);

Thankq in advance, for any kind of help... :):)


Regards,
Anubhav Jhamb
If you want to skip blank lines, you need not print them. And also, you need not put a "\n" while printing since you are not chopping newlines after reading input(using chomp). This will add unnecessary blank lines.
See the modified foreach loop:
Expand|Select|Wrap|Line Numbers
  1. foreach $line (@lines) #go through each line in file
  2. {
  3.     next if ($line=~/^#if FOR_LAB/);
  4.     print DATA $line;
  5. }
  6.  
This will remove lines begining with #if FOR_LAB. If your requirement is to remove all lines from '#if FOR_LAB' to '#endif', use the following method:

Expand|Select|Wrap|Line Numbers
  1. foreach $line (@lines) #go through each line in file
  2. {
  3.     if ($line=~/^#if FOR_LAB/) {
  4.                                 $i=1; next;
  5.                      }
  6.                 if($i==1)
  7.               {
  8.                 next unless($line=~/#endif/); ## skip upto #endif
  9.                 $i=0; ##re-initialize
  10.                 next; ## to skip #endif
  11.                 }
  12.  
  13.     print DATA $line;
  14. }
  15.  
Feb 21 '08 #3

KevinADC
Expert 2.5K+
P: 4,059
Expand|Select|Wrap|Line Numbers
  1. #!/usr/bin/perl
  2.  
  3. $file = 'read';
  4. $wrt = 'write';
  5. open(info, $file);
  6. open(data, ">>$wrt");
  7.  
  8. @lines = <info>;
  9.  
  10. foreach $lin (@lines){
  11. #...................................correct the search condition
  12. if($lin =~ /^#FOR_LAB/){
  13. #..................................make the line blank
  14. $lin = "";
  15. }
  16.  
  17. print data "$lin";
  18. }

regards
rohit

You should read the question more carefully.
Feb 21 '08 #4

P: 18
Nithinpes,
Thanx for that loop code, I was just looking for that kind of job to be done. :)

Rohit,
Well, thank you too for correcting the search condition. Now I gotto know about it. :)
Feb 22 '08 #5

P: 18
One more thing I had to look upon:
The code should be able to do it for multiple files and directories, recursively.

But I would like to see working for single file first, before going further.
I will try writing some code by self. If get stuck, I'll post...

Anubhav Jhamb
Feb 22 '08 #6

P: 89
You should read the question more carefully.
What wrong you find there.
Feb 22 '08 #7

KevinADC
Expert 2.5K+
P: 4,059
What wrong you find there.

I have to remove whole of the sequence of lines:

not just the first 1..
Feb 22 '08 #8

P: 89
I have to remove whole of the sequence of lines:

not just the first 1..
Your question is not clear to me.
write the original text and the modified text.
Feb 25 '08 #9

KevinADC
Expert 2.5K+
P: 4,059
Your question is not clear to me.
write the original text and the modified text.
I did not write a question. It really does not matter anyway, nithinpes already posted a working solution to the original question.
Feb 25 '08 #10

P: 18
Sorry, for repetition of same post..
Please ignore this post... :):)
Feb 25 '08 #11

P: 18
Your question is not clear to me.
write the original text and the modified text.
Rohit,
KevinADC was just telling you to add lines in the loop to make up the task of removing all the sequence of lines in FOR_LAB'ed code. :):)

For instance the Input File content is as below:

Expand|Select|Wrap|Line Numbers
  1. if (file.open(pszFileName))
  2.    {
  3.                         pby = (char*)file.getBufferPointer();
  4. #if FOR_LAB
  5.    DWORD dwSize = file.getFileSize();
  6.    DWORD dwMinFileSize = sizeof(CVTableCfgFileHeader) +
  7.             sizeof(CVTableCfgFileFracHdr) + sizeof(CVTableCfgFileSigDesc);
  8.       if (dwSize < dwMinFileSize)
  9.       {
  10.          Error.Log(__FILE__, "CVT config file %s is too short.", pszFileName);
  11.          return false;     // need at least one signal description to be useful
  12.       }
  13. #endif
  14.  
  15.       pFileHdr = (CVTableCfgFileHeader*)pby;
  16.       pby += sizeof(CVTableCfgFileHeader);
  17. #if FOR_LAB
  18.       if (strcmp(pFileHdr->szSignature, "CVT-CFG") != 0)
  19.       {
  20.          Error.Log(__FILE__, "CVT cfg file %s has wrong signature.", pszFileName);
  21.          return false;
  22.       }
  23.       if (pFileHdr->version != iCVTCFGVERSION)
  24.       {
  25.          Error.Log(__FILE__, "Wrong version of CVT cfg. Found %d expecting %d in %s",
  26.             pFileHdr->version, iCVTCFGVERSION, pszFileName);
  27.          return false;
  28.       }
  29.       if (pFileHdr->iNumFrac > CVTableIndex::iMAX_FRACTIONS)
  30.       {
  31.          Error.Log(__FILE__, "CVT cfg file %s has too many fractions", pszFileName);
  32.          return false;
  33.       }
  34. #endif
  35.          // --- ok... ready to build the whole table ---
  36.  
  37.    CVTableIndex *pIdx = m_hTblIdx.Init(CVTableIndex::pszTBLIDXNAME, 
  38.                                        SharedMemObjHandle<CVTableIndex>::omCREATE_RO);
  39. #if FOR_LAB
  40.       if (pIdx == NULL)
  41.       {
  42.          Error.Log(__FILE__, "Unable to create CVT table index.");
  43.          return false;
  44.       }
  45. #endif
  46.  
  47.    int iTotalSignalCt = 0;
  48.       for (int iFrac=0; iFrac < pFileHdr->iNumFrac; ++iFrac)
  49.       {
  50.          pFracHdr = (CVTableCfgFileFracHdr*)pby;
  51.          pby += sizeof(CVTableCfgFileFracHdr);
  52.  
  53. #if FOR_LAB
  54.          if (*pFracHdr->szFracName == 0 ||
  55.               pFracHdr->iNumSigs > 5000    )
  56.          {
  57.             Error.Log(__FILE__, "CVT cfg file '%s' fraction header %d malformed.", 
  58.                       pszFileName, iFrac);
  59.             break;
  60.          }
  61. #endif

After parsing, the output file should be devoid of FOR_LAB code:

Expand|Select|Wrap|Line Numbers
  1. if (file.open(pszFileName))
  2.    {
  3.                         pby = (char*)file.getBufferPointer();
  4.  
  5.       pFileHdr = (CVTableCfgFileHeader*)pby;
  6.       pby += sizeof(CVTableCfgFileHeader);
  7.          // --- ok... ready to build the whole table ---
  8.  
  9.    CVTableIndex *pIdx = m_hTblIdx.Init(CVTableIndex::pszTBLIDXNAME, 
  10.                                        SharedMemObjHandle<CVTableIndex>::omCREATE_RO);
  11.  
  12.    int iTotalSignalCt = 0;
  13.       for (int iFrac=0; iFrac < pFileHdr->iNumFrac; ++iFrac)
  14.       {
  15.          pFracHdr = (CVTableCfgFileFracHdr*)pby;
  16.          pby += sizeof(CVTableCfgFileFracHdr);
-- Jhamb
Feb 25 '08 #12

P: 89
ok. thanks. As its a hassed statement. i thought you need to remove the hassed statement only.........

#if FOR_LAB.... this one in the whole code reperting any no of time.

ok. Any way thanks to every one.....
Feb 25 '08 #13

P: 18
Hey all,
I am done with the complete task..! :)

Code file name: perl_code.pl
Expand|Select|Wrap|Line Numbers
  1. $Start_Str = shift @ARGV;                  # to pass starting string
  2. $End_Str = shift @ARGV;                    # to pass ending string
  3. foreach $file (@ARGV)                       # to repeat the task for each file
  4. {
  5.    $Flag = 0;
  6.    $filename = "Final_".$file;
  7.    open(DATA, ">$filename");                    # file to write data to
  8.    if (open(INFO, $file)) {
  9.       @line = <INFO>;                             # assigns lines to array
  10.       foreach $lines (@line) {                      # go through each line in file
  11.          if (($lines=~ /$Start_Str/) || ($Flag == 1))
  12.          {
  13.             $Flag = 1;
  14.             if ($lines=~ /$End_Str/ )
  15.             {
  16.                $Flag = 0;
  17.             }
  18.          }
  19.          else
  20.          {
  21.             print DATA ($lines);
  22.          }
  23.       }
  24.    }
  25. }
  26. print "Parsing is completed";
  27. close(INFO);                                  # closes file
  28. close(DATA);
  29.  
This is command line and can be used for any input, for multiple numbers of files, without affecting the surrounding code.

I tried writing the following lines in command prompt:

C:\>PERL perl_code.pl "#if FOR_LAB" endif cvtablebuild.cpp 1.txt
Parsing is completed

Or can try for any input:

C:\>PERL perl_code.pl ANYTHING_ELSE endif cvtablebuild.cpp 1.txt
Parsing is completed

Output files are created as Final_cvtablebuild.cpp and Final_1.txt

1.text:
Expand|Select|Wrap|Line Numbers
  1.    if (file.open(pszFileName))
  2.    {
  3.                         pby = (char*)file.getBufferPointer();
  4. #if FOR_LAB
  5.    DWORD dwSize = file.getFileSize();
  6.    DWORD dwMinFileSize = sizeof(CVTableCfgFileHeader) +
  7.             sizeof(CVTableCfgFileFracHdr) + sizeof(CVTableCfgFileSigDesc);
  8.       if (dwSize < dwMinFileSize)
  9.       {
  10.          Error.Log(__FILE__, "CVT config file %s is too short.", pszFileName);
  11.          return false;     // need at least one signal description to be useful
  12.       }
  13. #endif
  14.  
  15.       pFileHdr = (CVTableCfgFileHeader*)pby;
  16.       pby += sizeof(CVTableCfgFileHeader);
  17. #if FOR_LAB
  18.       if (strcmp(pFileHdr->szSignature, "CVT-CFG") != 0)
  19.       {
  20.          Error.Log(__FILE__, "CVT cfg file %s has wrong signature.", pszFileName);
  21.          return false;
  22.       }
  23.       if (pFileHdr->version != iCVTCFGVERSION)
  24.       {
  25.          Error.Log(__FILE__, "Wrong version of CVT cfg. Found %d expecting %d in %s",
  26.             pFileHdr->version, iCVTCFGVERSION, pszFileName);
  27.          return false;
  28.       }
  29.       if (pFileHdr->iNumFrac > CVTableIndex::iMAX_FRACTIONS)
  30.       {
  31.          Error.Log(__FILE__, "CVT cfg file %s has too many fractions", pszFileName);
  32.          return false;
  33.       }
  34. #endif
  35.          // --- ok... ready to build the whole table ---
  36.  
  37.    CVTableIndex *pIdx = m_hTblIdx.Init(CVTableIndex::pszTBLIDXNAME, 
  38.                                        SharedMemObjHandle<CVTableIndex>::omCREATE_RO);
  39. #if FOR_LAB
  40.       if (pIdx == NULL)
  41.       {
  42.          Error.Log(__FILE__, "Unable to create CVT table index.");
  43.          return false;
  44.       }
  45. #endif
  46.  
  47.    int iTotalSignalCt = 0;
  48.       for (int iFrac=0; iFrac < pFileHdr->iNumFrac; ++iFrac)
  49.       {
  50.          pFracHdr = (CVTableCfgFileFracHdr*)pby;
  51.          pby += sizeof(CVTableCfgFileFracHdr);
  52.  
  53. #if FOR_LAB
  54.          if (*pFracHdr->szFracName == 0 ||
  55.               pFracHdr->iNumSigs > 5000    )
  56.          {
  57.             Error.Log(__FILE__, "CVT cfg file '%s' fraction header %d malformed.", 
  58.                       pszFileName, iFrac);
  59.             break;
  60.          }
  61. #endif
  62.  
Final_1.txt:
Expand|Select|Wrap|Line Numbers
  1.    if (file.open(pszFileName))
  2.    {
  3.                         pby = (char*)file.getBufferPointer();
  4.  
  5.       pFileHdr = (CVTableCfgFileHeader*)pby;
  6.       pby += sizeof(CVTableCfgFileHeader);
  7.          // --- ok... ready to build the whole table ---
  8.  
  9.    CVTableIndex *pIdx = m_hTblIdx.Init(CVTableIndex::pszTBLIDXNAME, 
  10.                                        SharedMemObjHandle<CVTableIndex>::omCREATE_RO);
  11.  
  12.    int iTotalSignalCt = 0;
  13.       for (int iFrac=0; iFrac < pFileHdr->iNumFrac; ++iFrac)
  14.       {
  15.          pFracHdr = (CVTableCfgFileFracHdr*)pby;
  16.          pby += sizeof(CVTableCfgFileFracHdr);
  17.  
Thanks to everyone who helped.. :):)
Please let me know if you find something more to add in the code.
Attached Files
File Type: txt 1.txt (2.0 KB, 419 views)
File Type: txt Final_1.txt (612 Bytes, 410 views)
File Type: txt perl code.txt (926 Bytes, 445 views)
Feb 26 '08 #14

Post your reply

Sign in to post your reply or Sign up for a free account.