By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
437,693 Members | 1,972 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 437,693 IT Pros & Developers. It's quick & easy.

Sort program runs but is slow

P: 1
What it does:

runs through a file, sorts, and splits it alphabetically into files with words that are equal or less than 200.

In this example I'm using the dict file, and I use it several times, so excuse the redunancy since I didn't want to include the actual source files.

It works, but it takes a very long time.

Can someone help me get it working faster? Thanks much.

Gzip'd file: http://www.mediafire.com/download.php?zykjlzmktjn


Code:

Expand|Select|Wrap|Line Numbers
  1. #!/usr/bin/perl -w
  2.  
  3. use Text::CSV;
  4. use File::Copy;
  5. use File::stat;
  6.  
  7. use POSIX qw(strftime);
  8.  
  9. my $green_dir  = "tmp/list/green";
  10. my $file_green = "tmp/all_green.txt";
  11.  
  12. my $blue_dir  = "tmp/list/blue";
  13. my $file_blue = "tmp/all_blue.txt";
  14.  
  15. my $green_blue_dir  = "tmp/list/green_blue";
  16. my $file_green_blue = "tmp/all_green_blue.txt";
  17.  
  18. my $allelse_dir  = "tmp/list/allelse";
  19. my $file_allelse = "tmp/allelse.txt";
  20.  
  21. my $all_dir  = "tmp/list/all";
  22. my $file_all = "tmp/all.txt";
  23.  
  24. my $max = 200;
  25.  
  26. $cnt  = 0;
  27. $cnt2 = 2;
  28.  
  29. sub rem_green {
  30.     $buf = "rm -f $green_dir/*";
  31.     system($buf);
  32. }
  33.  
  34. sub rem_blue {
  35.     $buf = "rm -f $blue_dir/*";
  36.     system($buf);
  37. }
  38.  
  39. sub rem_green_blue {
  40.     $buf = "rm -f $green_blue_dir/*";
  41.     system($buf);
  42. }
  43.  
  44. sub rem_allelse {
  45.     $buf = "rm -f $allelse_dir/*";
  46.     system($buf);
  47. }
  48.  
  49. sub rem_all {
  50.     $buf = "rm -f $all_dir/*";
  51.     system($buf);
  52. }
  53.  
  54. @files = (
  55.     'A', 'C', 'B', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
  56.     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  57.     'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
  58. );
  59.  
  60. @fl = (
  61.     'A', 'C', 'B', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
  62.     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  63.     'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
  64. );
  65.  
  66. if ( $ARGV[0] eq "-o" ) {
  67.     $file_name = $file_blue;
  68.     $file_dir  = $blue_dir;
  69.     rem_blue();
  70. }
  71.  
  72. if ( $ARGV[0] eq "-p" ) {
  73.     $file_name = $file_green;
  74.     $file_dir  = $green_dir;
  75.     rem_green();
  76. }
  77.  
  78. if ( $ARGV[0] eq "-po" ) {
  79.     $file_name = $file_green_blue;
  80.     $file_dir  = $green_blue_dir;
  81.     rem_green_blue();
  82. }
  83.  
  84. if ( $ARGV[0] eq "-a" ) {
  85.     $file_name = $file_all;
  86.     $file_dir  = $all_dir;
  87.     rem_all();
  88. }
  89.  
  90. if ( $ARGV[0] eq "-ae" ) {
  91.     $file_name = $file_allelse;
  92.     $file_dir  = $allelse_dir;
  93.     rem_allelse();
  94. }
  95.  
  96. $files_cnt = 0;
  97. $fl_cnt    = 0;
  98. foreach (@files) {
  99.     $file = $_;
  100.     $file .= "_1.txt";
  101.     chomp($file);
  102.     unlink("$file_dir/$file");
  103.     open( IN, "<", "$file_name" );
  104.     while (<IN>) {
  105.         $word = $_;
  106.         chomp($word);
  107.         foreach (@fl) {
  108.             $fl = $_;
  109.             chomp($fl);
  110.             if ( $word =~ /^[$fl]/ && $file =~ /^[$fl]/ ) {
  111.                 if ( $cnt == $max ) {
  112.                     $file =~ s/_.*//;
  113.                     $file .= "_$cnt2.txt";
  114.                     $cnt2++;
  115.                     $cnt = 0;
  116.                     unlink("$file_dir/$file");
  117.                 }
  118.                                 print "$file_dir $file\n";
  119.                 open( OUT, ">>", "$file_dir/$file" ) or die $!;
  120.                 print OUT "$word\n";
  121.                 close(OUT);
  122.                 $cnt++;
  123.                 $fl_cnt++;
  124.             }
  125.             $fl_cnt = 0;
  126.         }
  127.         $files_cnt++;
  128.     }
  129.     $files_cnt = 0;
  130.     $cnt       = 0;
  131.     $cnt2      = 2;
  132. }
  133. close(IN);
May 9 '10 #1
Share this question for a faster answer!
Share on Google+

Post your reply

Sign in to post your reply or Sign up for a free account.