By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
424,647 Members | 1,604 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 424,647 IT Pros & Developers. It's quick & easy.

Arraylist.remove problem?? Please help.

P: n/a
I have a loop that is set to run as long as the arraylist is > 0.
at the beginning of this loop I grab the first object and then
remove it. I then go into another loop that checks to see if there
are more objects that match the first object that i grabbed. If
they match then I put them in an array. I would like to remove each
match from the arraylist as I find them to speed things up and so
that they don't get checked again. If I try to do that it seems to
mess everything up. Please can someone help me. Also they are not
exact matches that I am doing...they are objects and I do a regex on
the first object that i grab and then check each one to see if it
contains the new regexed value in a varaible of each object.
--
----------------------------------------------
Posted with NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
----------------------------------------------

May 31 '06 #1
Share this Question
Share on Google+
31 Replies


P: n/a

Ex*******@extremest.com wrote:
I have a loop that is set to run as long as the arraylist is > 0.
at the beginning of this loop I grab the first object and then
remove it. I then go into another loop that checks to see if there
are more objects that match the first object that i grabbed. If
they match then I put them in an array. I would like to remove each
match from the arraylist as I find them to speed things up and so
that they don't get checked again. If I try to do that it seems to
mess everything up. Please can someone help me. Also they are not
exact matches that I am doing...they are objects and I do a regex on
the first object that i grab and then check each one to see if it
contains the new regexed value in a varaible of each object.
--


It sounds like a fairly small stretch of code that is the problem.
Could you extract it into a small, stand-alone program that
demonstrates the problem and then post the whole thing here, so that we
can paste it into VS and try it for ourselves?

May 31 '06 #2

P: n/a
ok I am new to c# so take it easy on me....lol..
here is the whole prog. It is a console app so is not to big...only
wrote it today so it is still pretty sloppy...if there is a better
way of doing anything in it please let me know.
using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace createfiles
{
class Program
{
static MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString = "server=
127.0.0.1;uid=root;pwd=password;database=test;";
static ArrayList master;
static string group;
static string table;
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new Regex("\\([0-9]*/[0-9].*
\\)",RegexOptions.IgnoreCase);
struct Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}

static void Main(string[] args)
{
for (int x = 2; x < groups.Length; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
Console.WriteLine("Have this many headers
{0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}",
one.numb, one.subject);
find();
master.Clear();
}

}
static void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + "
where subject like '%(%/%)%' order by subject desc";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{
int foundm = 0;
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
Match m = seg.Match(start.subject);
string segsplit = m.ToString();
segsplit = segsplit.Replace("(", "");
segsplit = segsplit.Replace(")", "");
string[] segments = segsplit.Split('/');
int max = int.Parse(segments[1]);
max += 1;
int counter = 1;
Header[] found = new Header[max];
int index = int.Parse(segments[0]);
int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
string testsubject = seg.Replace
(start.subject, "");
if (test.subject.Contains
(testsubject))
{
//master.Remove(test);
Match t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(", "");
tsegsplit = tsegsplit.Replace
(")", "");
string[] tsegments =
tsegsplit.Split('/');
index = int.Parse(tsegments
[0]);
//Console.WriteLine(counter);
if (index < max)
{
found[index] = test;
counter++;
}
}

}
//Console.WriteLine("counter = {0}",
counter);
int testmax = max-1;
if (counter == testmax)
{
foundm++;
for (int t = 1; t < found.Length;
t++)
{
Console.WriteLine("We Have a
Match {0}", found[t].subject);
}
}
}
}
}

}
}
--
----------------------------------------------
Posted with NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
----------------------------------------------

May 31 '06 #3

P: n/a
I can't see anything specifically wrong with your code. The RemoveAt(0)
should remove one item each time around the major loop, so the array
should eventually become empty.

I would recommend running your program under the debugger and watch
what it does. It is highly likely that you'll see the mistake quite
quickly.

Unfortunately I can't test it, as I don't have MySql installed.

I did some rearrangement to make it easier to read / debug. Caveat: I
can't compile this code because I don't have MySql, so I make no
guarantees that it will compile / run. I also changed your Header
struct to a class. I see no advantage in using "struct" for Header.
Following is the rearranged code.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;
namespace createfiles
{
class Program
{
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new
Regex("\\([0-9]*/[0-9].*\\)",RegexOptions.IgnoreCase);
class Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}
static void Main(string[] args)
{
for (int x = 2; x < groups.Length; x++)
{
string table = reg.Replace(groups[x], "");
ArrayList master = getheaders(table);
Console.WriteLine("Have this many headers {0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}", one.numb, one.subject);
find(master);
}
}
static ArrayList getheaders(string table)
{
MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + " where subject like
'%(%/%)%' order by subject desc";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
ArrayList master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();
return master;
}
static void find(ArrayList master)
{
int foundm = 0;
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
int index;
int max;
ExtractMessageNumber(start.subject, out index, out max);
max += 1;
int counter = 1;
Header[] found = new Header[max];
int temp = master.Count;
if (index < max)
{
found[index] = start;
string testsubject = ExtractMainSubject(start.subject);
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains(testsubject))
{
//master.Remove(test);
int testIndex;
int testMax;
ExtractMessageNumber(test.subject, out testIndex, out testMax);
//Console.WriteLine(counter);
if (testIndex < max)
{
found[testIndex] = test;
counter++;
}
}
}
//Console.WriteLine("counter = {0}", counter);
int testmax = max-1;
if (counter == testmax)
{
foundm++;
for (int t = 1; t < found.Length; t++)
{
Console.WriteLine("We Have a Match {0}", found[t].subject);
}
}
}
}
}

private void ExtractMessageNumber(string subject, out int number, out
int max)
{
Match m = seg.Match(subject);
string segsplit = m.ToString().Replace("(", "").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
}

private string ExtractMainSubject(string subject)
{
return seg.Replace(subject, "");
}
}
}

May 31 '06 #4

P: n/a
thanks a lot for taking the time to help me. I will try it all out
when I get off work. I have managed to fi the remove problem. the
first ione was not the issue it was in the for loop to remove the
matches. I have another post going taht is for optimizing the code.
No one has been able to give me an example of what I need to do so
thank you very much for taking the time too.
Bruce Wood wrote:
I can't see anything specifically wrong with your code. The RemoveAt(0)
should remove one item each time around the major loop, so the array
should eventually become empty.

I would recommend running your program under the debugger and watch
what it does. It is highly likely that you'll see the mistake quite
quickly.

Unfortunately I can't test it, as I don't have MySql installed.

I did some rearrangement to make it easier to read / debug. Caveat: I
can't compile this code because I don't have MySql, so I make no
guarantees that it will compile / run. I also changed your Header
struct to a class. I see no advantage in using "struct" for Header.
Following is the rearranged code.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;
namespace createfiles
{
class Program
{
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new
Regex("\\([0-9]*/[0-9].*\\)",RegexOptions.IgnoreCase);
class Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}
static void Main(string[] args)
{
for (int x = 2; x < groups.Length; x++)
{
string table = reg.Replace(groups[x], "");
ArrayList master = getheaders(table);
Console.WriteLine("Have this many headers {0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}", one.numb, one.subject);
find(master);
}
}
static ArrayList getheaders(string table)
{
MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + " where subject like
'%(%/%)%' order by subject desc";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
ArrayList master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();
return master;
}
static void find(ArrayList master)
{
int foundm = 0;
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
int index;
int max;
ExtractMessageNumber(start.subject, out index, out max);
max += 1;
int counter = 1;
Header[] found = new Header[max];
int temp = master.Count;
if (index < max)
{
found[index] = start;
string testsubject = ExtractMainSubject(start.subject);
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains(testsubject))
{
//master.Remove(test);
int testIndex;
int testMax;
ExtractMessageNumber(test.subject, out testIndex, out testMax);
//Console.WriteLine(counter);
if (testIndex < max)
{
found[testIndex] = test;
counter++;
}
}
}
//Console.WriteLine("counter = {0}", counter);
int testmax = max-1;
if (counter == testmax)
{
foundm++;
for (int t = 1; t < found.Length; t++)
{
Console.WriteLine("We Have a Match {0}", found[t].subject);
}
}
}
}
}

private void ExtractMessageNumber(string subject, out int number, out
int max)
{
Match m = seg.Match(subject);
string segsplit = m.ToString().Replace("(", "").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
}

private string ExtractMainSubject(string subject)
{
return seg.Replace(subject, "");
}
}
}


May 31 '06 #5

P: n/a
ok problem...my compiler says this

Error 1 An object reference is required for the nonstatic field,
method, or property 'createfiles.Program.ExtractMessageNumber(string,
out int, out int)' C:\Documents and Settings\Extremest\My
Documents\Visual Studio
2005\Projects\createfiles\createfiles\Program.cs 79 5 createfiles

I am not sure what to do...cause i have never used a function like that.

May 31 '06 #6

P: n/a
ok I went back to how mine was and I took and replaced the struct with
class..which breaks it for some reason and I have also added the 2
replace sequences into the one like you had....giong to go through the
rest of what you have and see what else I can do.

May 31 '06 #7

P: n/a
ok this is what I have right now. This is working it is just slow. I
have a php script that i can run to do this and it is way faster than
this thing is and it is also doing the update db and the insert of the
total file parts at the end that this thing don't even do yet.
Please I need some help to get this sucker to wrok faster. I tried to
change the struct to a class. If I do I don't get any matches anymore
and also the first header that it grabs is different all of a sudden
with less headers also. So for some reason either my code is just not
setup for it to be a class or something.
using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;
namespace createfiles
{
class Program
{
static MySql.Data.MySqlClient.MySqlConnection conn = new
MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd = new
MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
static ArrayList master;
static string group;
static string table;
static string[] groups = { "alt.binaries.games.xbox",
"alt.binaries.games.xbox360", "alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new
Regex("\\([0-9]*/[0-9]*\\)",RegexOptions.IgnoreCase);
struct Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}
static void Main(string[] args)
{
for (int x = 1; x < 2; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
Console.WriteLine("Have this many headers {0}",
master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}", one.numb,
one.subject);
find();
master.Clear();
}
}
static void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + " where
subject like '%(%/%)%'";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();
}
static void find()
{
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
Match m = seg.Match(start.subject);
string segsplit = m.ToString();
segsplit = segsplit.Replace("(", "").Replace(")", "");
string[] segments = segsplit.Split('/');
int max = int.Parse(segments[1]);
max += 1;
int counter = 1;
Header[] found = new Header[max];
string testsubject = seg.Replace(start.subject, "");
int index = int.Parse(segments[0]);
int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains(testsubject))
{
//master.Remove(test);
master.RemoveAt(x);
x = x - 1;
Match t = seg.Match(test.subject);
string tsplit = t.ToString();
string tsegsplit = tsplit.Replace("(",
"").Replace(")", "");
string[] tsegments = tsegsplit.Split('/');
index = int.Parse(tsegments[0]);
//Console.WriteLine(counter);
if (index < max)
{
found[index] = test;
counter++;
}
}
}
//Console.WriteLine("counter = {0}", counter);
int testmax = max-1;
if (counter == testmax)
{
master.TrimToSize();
Console.WriteLine("We Have a Match {0}",
found[1].subject);
}
}
}
}
}
}

May 31 '06 #8

P: n/a

<dn**********@charter.net> wrote...
I tried to change the struct to a class.
If I do I don't get any matches anymore
and also the first header that it grabs
is different all of a sudden
with less headers also. So for some
reason either my code is just not
setup for it to be a class or something.
There's a significant difference between using a struct and a class.

As instances of a struct are value types, they get copied into whatever
container you put them, while when using an instance of a class, only the
reference to the instance gets copied.

If you use class instead of struct...

[snip]

class Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}

[snip]

....you also need to make sure that you put *separate* instances of it into
the list, or else you're just modifying the single instance. This means that
you should instantiate the Header within the loop instead, when using a
class.
static void getheaders()
{
[snip]
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();
}


....should then be...

static void getheaders()
{

[snip]

master = new ArrayList();
while (reader.Read())
{
Header h = new Header(); // <-- Here instead...
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();
}
----------------------------

I believe that's why you got a mismatch of the number of headers, when you
were trying to have Header as a class instead of struct.

// Bjorn A
May 31 '06 #9

P: n/a
ok that did work thanks. Would there be a speed gain to have it as a
class instead of a structure?

May 31 '06 #10

P: n/a
Extremest wrote:
ok that did work thanks. Would there be a speed gain to have it as a
class instead of a structure?


In your case, perhaps a slight gain, but nothing significant.

Really, in order to find out where the speed problems are you should
use a profiler to see what is taking up the time. However, since I
can't run it, and so can't profile it, I'll have to guess.

The most important thing to remember about speed is that most
performance gains are to be had at the design level, not by tweaking
code. In your other thread, Nicholas Paldino gave you some suggestions
about how to speed up the code by using regular string functions
instead of Regex. Here I'll talk about larger scale design issues.

I would take a hard look at your whole loop-within-loop algorithm. Look
at what happens to, say, a message that is near the end of your list,
but is only (1/1): you'll be unpacking its subject line over and over
again as you loop through each message looking for its other variants.
Yes, the whole Regex thing may be slowing you down, but doing it over
and over and over again when you don't have to is probably killing you.

You solve this by making "Header" smarter. Instead of just using it as
a dumping bin for a bunch of data, turn it into a real class, and start
using more sophisticated techniques with it. To start with, give it a
constructor and make the fields private, accessible via properties.
Then, in the constructor, pick apart the subject line into the main
subject and the message number / max. This way this string processing
happens _exactly once_ for each message: when you "construct" it, not
every time you want to make a comparison.

class Header
{
private string numb;
private string subject;
private int messageNumber;
private int maxMessages;
private string realSubject;
private string date;
private string from;
private string msg_id;
private string bytes;

public Header(string numb, string subject, string date, string
from, string msg_id, string bytes)
{
this.numb = numb;
this.subject = subject;
this.date = date;
this.from = from;
this.msg_id = msg_id;
this.bytes = bytes;

// Now pull apart the message title... move the
ExtractMessageNumber and ExtractMainSubject methods into the Header
class, too!
ExtractMessageNumber(this.subject, out this.messageNumber, out
this.maxMessages);
this.realSubject = ExtractMainSubject(this.subject);
}

public string Number
{
get { return this.numb; }
}

public string Subject
{
get { return this.subject; }
}

public int MessageNumber
{
get { return this.messageNumber; }
}

// ... etc. I'll leave the rest for you to do
}

As noted, you have to move the "private void ExtractMessageNumber"
method and the "private string ExtractMainSubject" method inside the
Header class. However, the advantage you get is that now in your main
routine you just say, for example,

start.MessageNumber

and you get the message number instantly.

Now, I said that you should take a look at your algorithm. What you
really want to do is group Header objects by their main titles, so why
not make a class to hold groups of Headers? It's just a special type of
ArrayList:

class RelatedHeaders : IEnumerable
{
private string _realSubject = null;
private ArrayList _list = new ArrayList();

public RelatedHeaders(Header firstHeader)
{
this._realSubject = firstHeader.RealSubject;
this._list.Add(newHeader);
}

public void Add(Header newHeader)
{
if (newHeader.RealSubject != this._realSubject)
{
throw new InvalidArgumentException(String.Format("New
header has subject '{0}', but should be '{1}'.", newHeader.RealSubject,
this._realSubject), "newHeader");
}
this._list.Add(newHeader);
}

public void GetEnumerator()
{
return this._list.GetEnumerator();
}
}

Now, the cute trick with this collection of related headers is that you
can hash it in a hash table. Because the collection knows that all
Headers in it have the same "real subject", you can hash them and look
them up. Now your algorithm changes to this:

Hashtable master = new Hashtable();
while (reader.Read())
{
Header head = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), ... );
RelatedHeaders group = (RelatedHeaders)master[head.RealSubject];
if (group == null)
{
group = new RelatedHeaders(head);
master.Add(head.RealSubject, group);
}
else
{
group.Add(head);
}
}

The only thing you lose here is sort order, so when you want the
results out you have to use the hash table's .Values.CopyTo(array)
method to copy the groups to an array and then sort by title, or date,
or whatever.

However, I predict that this approach will be far faster than searching
through the array over and over. Even just making the Header class
smarter so that the subject is dissected only once will produce a
performance gain.

Remember: most gains are at the design level, not at the code tweaking
level. Good luck.

Jun 1 '06 #11

P: n/a
ok I am trying to implement at least the class for now. Only problem I
have is that c# 2.0 I guess is what i am using don't except the
functions that you have with the in and out stuff. Can you please help
me here.

Jun 1 '06 #12

P: n/a
Dude you are the man. I put in the new class like yea said and made a
couple of other adjustments and it is a whole lot faster now. I will
see if I can figure out this group thing. I really am lost on that.
The class wasn't that bad. I have used them in c++ and a little in
php. The group will take some work.

Jun 1 '06 #13

P: n/a
it is running pretty good now I have put in the insert and delete setup
or the db into it...obvioulsy it slowed it down quite a bit but is
still running faster now then it was before with out them. So thank
you very much. Will see what i can do with the group thing.

Jun 1 '06 #14

P: n/a

Extremest wrote:
it is running pretty good now I have put in the insert and delete setup
or the db into it...obvioulsy it slowed it down quite a bit but is
still running faster now then it was before with out them. So thank
you very much. Will see what i can do with the group thing.


It probably doesn't help that in my haste I wrote:

public void GetEnumerator()
{
return this._list.GetEnumerator();
}

when I should have written:

public IEnumerator GetEnumerator()
{
return this._list.GetEnumerator();
}

Jun 1 '06 #15

P: n/a
I have a question for yea. I am redoing my header prog that gets the
headers to the way this is that way when I pull from the db I already
have the min and max. Question is some headers don't have what the
extractmessagenumbers function does. I want to catch that when it does
it and destroy it then....How can I do that.

Jun 1 '06 #16

P: n/a

Extremest wrote:
I have a question for yea. I am redoing my header prog that gets the
headers to the way this is that way when I pull from the db I already
have the min and max. Question is some headers don't have what the
extractmessagenumbers function does. I want to catch that when it does
it and destroy it then....How can I do that.


I would change the Extract method to look like this:

private bool ExtractMessageNumber(string subject, out
int number, out
int max)
{
Match m = seg.Match(subject);
if (m.Success)
{
string segsplit = m.ToString().Replace("(",
"").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
return true;
}
else
{
number = 0;
max = 0;
return false;
}
}

Now, in your constructor, when you call ExtractMessageNumber, you can
do something like this:

if (!ExtractMessageNumber(this.subject, out this.messageNumber, out
this.maxMessages))
{
throw new ArgumentException(String.Format("Message subject
'{0}' does not contain a message number and a maximum.", subject),
"subject");
}

Then, when you create the headers, you could trap the exceptions:

Header head;
try
{
head = new Header(... the usual stuff ...);
master.Add(head);
}
catch (ArgumentException)
{ }

This will attempt to create a new header, but if the subject is
malformed it will fail with an exception, which you trap and ignore. If
the subject is well-formed, then you'll get a header back and you then
add it to the master list.

The only drawback with this method is that there is a performance
penalty every time that you throw an exception. So, if you have large
numbers of malformed headers then you'll notice a performance hit.
However, assuming that there aren't too many malformed headers, it
should be faster than the alternative.

The alternative is to parse each subject twice: once to see if you
should bother constructing a header, and then once inside the
constructor. So, you could make ExtractMessageNumber public, and do
this:

int num;
int max;
if (ExtractMessageNumber(subject, out num, out max))
{
Header head = new Header(... the usual stuff ...);
master.Add(head);
}

This parses every header twice, but avoids the overhead of an exception
for each invalid header.

I predict that the first method will be more performant, because the
proportion of invalid headers is probably quite small. Of course,
you're best to try both methods and see which one runs faster.

Jun 1 '06 #17

P: n/a
sorry have been on my honeymoon. I am wanting to try and do your
grouping thing. What I was thinking was a hashtable to arraylist that
would hold some class that is a group. then go through adding to it.
Not sure how to do this and have been looking at what you have put in
above but some of it is new to me and i don't quite understand it.

Jun 14 '06 #18

P: n/a
also forgot was going to have the group class set so that the instead
of an arraylist use an array. Figured that would be faster and then on
creation of it have it check the header going in for the size that the
array needs to be.

Jun 14 '06 #19

P: n/a
Why don't you post the code you have at the moment and maybe I can help
you add the Hashtable.

Congratulations, by the way. Where did you go on your honeymoon?

Extremest wrote:
sorry have been on my honeymoon. I am wanting to try and do your
grouping thing. What I was thinking was a hashtable to arraylist that
would hold some class that is a group. then go through adding to it.
Not sure how to do this and have been looking at what you have put in
above but some of it is new to me and i don't quite understand it.


Jun 14 '06 #20

P: n/a
We went to Florida for about 10 days. Pretty much went all over the
state from beach to beach. Was very relaxing hated to have to come
back to work. Here is my code so far. If you can just help point me
in the right direction I should be able to figure it out. I am usually
a pretty quick learner. I know that the big slow down for it is the
loop. so if the group thing can become a reality I know it will speed
it up big time. Thanks for any help you can give and for what help you
have already given.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace createfiles
{
class Program
{
static ArrayList master;
static string group;
static string table;
static bool prepared = false;
static int numbfound = 1;
static MySql.Data.MySqlClient.MySqlConnection conn = new
MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd = new
MySql.Data.MySqlClient.MySqlCommand();
static MySql.Data.MySqlClient.MySqlConnection connu = new
MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmdu = new
MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
static string[] groups = {"alt.binaries.games.xbox",
"alt.binaries.games.xbox360", "alt.binaries.boneless",
"alt.binaries.nl"};
static Regex reg = new Regex("\\.");
static Regex seg = new
Regex("\\([0-9]*/[0-9]*\\)",RegexOptions.IgnoreCase);
class Header
{
private string numb;
private string subject;
private int messageNumber;
private int maxMessages;
private string realSubject;
private string date;
private string from;
private string msg_id;
private string bytes;
public Header(string numb, string subject, string date,
string
from, string msg_id, string bytes)
{
this.numb = numb;
this.subject = subject;
this.date = date;
this.from = from;
this.msg_id = msg_id;
this.bytes = bytes;
ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
this.realSubject = ExtractMainSubject(this.subject);
}

// Now pull apart the message title... move the
private void ExtractMessageNumber(string subject, out int
number, out int max)
{
Match m = seg.Match(subject);
string segsplit = m.ToString().Replace("(",
"").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
}
private string ExtractMainSubject(string subject)
{
return seg.Replace(subject, "");
}

//ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
//this.realSubject = ExtractMainSubject(this.subject);

public string Number
{
get { return this.numb; }
}
public string Subject
{
get { return this.subject; }
}
public int MessageNumber
{
get { return this.messageNumber; }
}

public int MaxMessages
{
get { return this.maxMessages; }
}
public string RealSubject
{
get { return this.realSubject; }
}
public string Date
{
get { return this.date; }
}
public string From
{
get { return this.from; }
}
public string Msg_id
{
get { return this.msg_id; }
}
public string Bytes
{
get { return this.bytes; }
}
}
static void Main(string[] args)
{
while (numbfound > 0)
{
numbfound = 0;
for (int x = 0; x < groups.Length; x++)
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
connu.ConnectionString = myConnectionString;
connu.Open();
cmdu.Connection = conn;
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
//Console.WriteLine("Have this many headers {0}",
master.Count);
//Header one = (Header)master[0];
//Console.WriteLine("first one {0} {1}",
one.Number, one.Subject);
find();
master.Clear();
prepared = false;
conn.Close();
connu.Close();
}
Console.WriteLine(numbfound);
}

}
static void getheaders()
{
MySql.Data.MySqlClient.MySqlConnection conn = new
MySql.Data.MySqlClient.MySqlConnection();
MySql.Data.MySqlClient.MySqlCommand cmd = new
MySql.Data.MySqlClient.MySqlCommand();
string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + " where
subject like '%(%/%)%' and subject like '%\"%\"%' limit 400000";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();

master = new ArrayList();
while (reader.Read())
{
Header h = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), reader.GetValue(3).ToString(),
reader.GetValue(2).ToString(), reader.GetValue(4).ToString(),
reader.GetValue(5).ToString());
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{
while (master.Count > 0)
{
Header start = (Header)master[master.Count-1];
master.RemoveAt(master.Count-1);
int max = start.MaxMessages;
max += 1;
int counter = 1;
Header[] found = new Header[max];
string testsubject = start.RealSubject;
int index = start.MessageNumber;
//int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = master.Count - 1; x >= 0; x--)
{
Header test = (Header)master[x];
if (test.RealSubject == testsubject)
{
master.RemoveAt(x);
index = test.MessageNumber;
if (index < max)
{
found[index] = test;
counter++;
}
}

}
int temp = max - 1;
//Console.WriteLine("temp {0} and counter {1}",
temp, counter);
if (counter == temp)
{
int count = 0;
foreach (object o in found)
{
if (o == null)
++count;
}
if (count == 1)
{
numbfound++;
update(found);
insert(found);
}
}
}
}
}
static void insert(Header[] found)
{
string msg_id = null;
string bytes = null;
int totalbytes = 0;
for(int x=1; x<found.Length;x++)
{
msg_id += found[x].Msg_id + "|";
bytes += found[x].Bytes +"|";
totalbytes += int.Parse(found[x].Bytes);
}
if (!prepared)
{
cmd.CommandText = "insert into `files`
(`subject`,`from`,`date`,`msg_ids`,`bytes`,`totalb ytes`,`groups`)
values (?subject, ?from, ?date, ?msg_ids, ?bytes, ?totalbytes,
?groups)";
cmd.Prepare();
prepared = true;
}
cmd.Parameters.Add("?subject", found[1].RealSubject);
cmd.Parameters.Add("?from", found[1].From);
cmd.Parameters.Add("?date", found[1].Date);
cmd.Parameters.Add("?msg_ids", msg_id);
cmd.Parameters.Add("?bytes", bytes);
cmd.Parameters.Add("?totalbytes", totalbytes);
cmd.Parameters.Add("?groups", group + "|");
//cmd.Parameters.Add("?nzb", '0');
cmd.ExecuteNonQuery();
}
static void update(Header[] found)
{
for (int x = 1; x < found.Length; x++)
{
cmdu.CommandText = "delete from `" + table + "` where
`numb` = '" + found[x].Number + "'";
cmdu.ExecuteNonQuery();
}
}
}
}

Jun 14 '06 #21

P: n/a

"Extremest" wrote...

[snipped things I'm jealous of...]
I know that the big slow down for it is the loop.
so if the group thing can become a reality
I know it will speed it up big time.


I have only glanced on this thread before, so I don't think I have followed
all intricate details on what you want to do, but when you say that "the big
slow down for it is the loop", I looked at it, and saw some curious things.

Why are you opening and closing the connection so many times?

Network access is costly, even if it's only on the local machine.

And from what I understand, you use the same connection string for both
"conn" and "connu", so I don't see the point in having both.

Just to illustrate, I have hastily made some alterations in your "Main" and
"getheaders", which "should" work more efficiently.

static void Main(string[] args)
{
while (numbfound > 0)
{
numbfound = 0;

// Open the connection only one time for the loop

conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmdu.Connection = conn;

for (int x = 0; x < groups.Length; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
find();
master.Clear();
prepared = false;
}

// And then we close it...

conn.Close();
Console.WriteLine(numbfound);
}
}

static void getheaders()
{
// No need to open a new connection, as conn and cmd are static

cmd.CommandText =
"select * from " + table +
" where subject like '%(%/%)%'" +
" and subject like '%\"%\"%' limit 400000";

MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
master = new ArrayList();
while (reader.Read())
{
Header h = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), reader.GetValue(3).ToString(),
reader.GetValue(2).ToString(), reader.GetValue(4).ToString(),
reader.GetValue(5).ToString());
master.Add(h);
}
reader.Close();
}

----------------------------------

There are probably many other things to make it faster too,
but these were some obvious ones...

/// Bjorn A
Jun 14 '06 #22

P: n/a
ok tried to do that and for some reason it can't open the reader now.
Bjorn Abelli wrote:
"Extremest" wrote...

[snipped things I'm jealous of...]
I know that the big slow down for it is the loop.
so if the group thing can become a reality
I know it will speed it up big time.


I have only glanced on this thread before, so I don't think I have followed
all intricate details on what you want to do, but when you say that "the big
slow down for it is the loop", I looked at it, and saw some curious things.

Why are you opening and closing the connection so many times?

Network access is costly, even if it's only on the local machine.

And from what I understand, you use the same connection string for both
"conn" and "connu", so I don't see the point in having both.

Just to illustrate, I have hastily made some alterations in your "Main" and
"getheaders", which "should" work more efficiently.

static void Main(string[] args)
{
while (numbfound > 0)
{
numbfound = 0;

// Open the connection only one time for the loop

conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmdu.Connection = conn;

for (int x = 0; x < groups.Length; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
find();
master.Clear();
prepared = false;
}

// And then we close it...

conn.Close();
Console.WriteLine(numbfound);
}
}

static void getheaders()
{
// No need to open a new connection, as conn and cmd are static

cmd.CommandText =
"select * from " + table +
" where subject like '%(%/%)%'" +
" and subject like '%\"%\"%' limit 400000";

MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
master = new ArrayList();
while (reader.Read())
{
Header h = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), reader.GetValue(3).ToString(),
reader.GetValue(2).ToString(), reader.GetValue(4).ToString(),
reader.GetValue(5).ToString());
master.Add(h);
}
reader.Close();
}

----------------------------------

There are probably many other things to make it faster too,
but these were some obvious ones...

/// Bjorn A


Jun 14 '06 #23

P: n/a
ok did some other messing around with it and got it to work like that.
That should speed things up slightly. Main one I think is going to be
the grouping. If I can figure that out it should speed up
tremendously.

Jun 14 '06 #24

P: n/a
ok I started over and have your group class put in and it seems to
work...not sure. I only have it grabbing the headers now and putting
them in then cleaning out and starting over. I have to figure out how
to redo the find function. I don't exactly know how to go through it
just yet and I also am going to add a little bit more functionality to
the group class so that it can do some stuff at the beginning so that I
don't have to do it in a loop.

Jun 15 '06 #25

P: n/a
I doubt that there will be any performance difference at all between
ArrayList and Array. I use a normal array only when I know the number
of entries (for sure) up front. In this case you don't. Any performance
gains are not to be had at that level.

Extremest wrote:
also forgot was going to have the group class set so that the instead
of an arraylist use an array. Figured that would be faster and then on
creation of it have it check the header going in for the size that the
array needs to be.


Jun 15 '06 #26

P: n/a
Could you post the revised code?

Extremest wrote:
ok I started over and have your group class put in and it seems to
work...not sure. I only have it grabbing the headers now and putting
them in then cleaning out and starting over. I have to figure out how
to redo the find function. I don't exactly know how to go through it
just yet and I also am going to add a little bit more functionality to
the group class so that it can do some stuff at the beginning so that I
don't have to do it in a loop.


Jun 15 '06 #27

P: n/a
this is all I have so far. Haven't added in anything else yet. Don't
know how to go through a hashtable yet. Trying to learn how now.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace groupheaders
{
class Program
{
static Hashtable master;
static MySql.Data.MySqlClient.MySqlConnection conn = new
MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd = new
MySql.Data.MySqlClient.MySqlCommand();
static MySql.Data.MySqlClient.MySqlCommand cmdu = new
MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
static string[] groups = { "alt.binaries.games.xbox",
"alt.binaries.games.xbox360", "alt.binaries.boneless",
"alt.binaries.nl" };
static Regex seg = new Regex("\\([0-9]*/[0-9]*\\)",
RegexOptions.IgnoreCase);
static string group;
static string table;
static Regex reg = new Regex("\\.");
class RelatedHeaders : IEnumerable
{
private string _realSubject = null;
private ArrayList _list = new ArrayList();
public RelatedHeaders(Header firstHeader)
{
this._realSubject = firstHeader.RealSubject;
this._list.Add(firstHeader);
}
public void Add(Header newHeader)
{
if (newHeader.RealSubject != this._realSubject)
{
throw new ArgumentException(String.Format("New
header has subject '{0}', but should be '{1}'.", newHeader.RealSubject,
this._realSubject), "newHeader");
}
this._list.Add(newHeader);
}
public IEnumerator GetEnumerator()
{
return this._list.GetEnumerator();
}
}
class Header
{
private string numb;
private string subject;
private int messageNumber;
private int maxMessages;
private string realSubject;
private string date;
private string from;
private string msg_id;
private string bytes;
static Regex seg = new Regex("\\([0-9]*/[0-9]*\\)",
RegexOptions.IgnoreCase);

public Header(string numb, string subject, string date,
string
from, string msg_id, string bytes)
{
this.numb = numb;
this.subject = subject;
this.date = date;
this.from = from;
this.msg_id = msg_id;
this.bytes = bytes;
ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
this.realSubject = ExtractMainSubject(this.subject);
}

// Now pull apart the message title... move the
private void ExtractMessageNumber(string subject, out int
number, out int max)
{
Match m = seg.Match(subject);
string segsplit = m.ToString().Replace("(",
"").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
}
private string ExtractMainSubject(string subject)
{
return seg.Replace(subject, "");
}

//ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
//this.realSubject = ExtractMainSubject(this.subject);

public string Number
{
get { return this.numb; }
}
public string Subject
{
get { return this.subject; }
}
public int MessageNumber
{
get { return this.messageNumber; }
}

public int MaxMessages
{
get { return this.maxMessages; }
}
public string RealSubject
{
get { return this.realSubject; }
}
public string Date
{
get { return this.date; }
}
public string From
{
get { return this.from; }
}
public string Msg_id
{
get { return this.msg_id; }
}
public string Bytes
{
get { return this.bytes; }
}
}
static void Main(string[] args)
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmdu.Connection = conn;
for (int x = 0; x < groups.Length; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
//Console.WriteLine("Have this many headers {0}",
master.Count);
//Header one = (Header)master[0];
//Console.WriteLine("first one {0} {1}", one.Number,
one.Subject);
//find();
master.Clear();
//prepared = false;
}

}
static void getheaders()
{
cmd.CommandText = "select * from " + table + " where
subject like '%(%/%)%' and subject like '%\"%\"%' limit 200000";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
master = new Hashtable();
while (reader.Read())
{
Header h = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), reader.GetValue(3).ToString(),
reader.GetValue(2).ToString(), reader.GetValue(4).ToString(),
reader.GetValue(5).ToString());
RelatedHeaders group =
(RelatedHeaders)master[h.RealSubject];
if (group == null)
{
group = new RelatedHeaders(h);
master.Add(h.RealSubject, group);
}
else
{
group.Add(h);
}
}
reader.Close();
}
}
}

Jun 15 '06 #28

P: n/a
ok this is prolly kinda rough but I am just learning how to use these.
This is the code that I have figured out to at least see each header in
the group list so far. Can prolly work out how to do the rest in a
bit. I have to figure a way to get the array for each one though so
taht the number that it has for the segment is the index of it and also
start doing the rest. But a good start for not knowing anything about
it.

RelatedHeaders[] t = new RelatedHeaders[master.Count];
master.Values.CopyTo(t,0);
for (int s = 0; s < t.Length; s++)
{
RelatedHeaders temp = t[s];
ArrayList p = temp.GetList();
for (int y = p.Count - 1; y >= 0; y--)
{
Header test = (Header)p[y];
Console.WriteLine(test.Subject);
}
}

Jun 15 '06 #29

P: n/a
ok this is prolly kinda rough but I am just learning how to use these.
This is the code that I have figured out to at least see each header in
the group list so far. Can prolly work out how to do the rest in a
bit. I have to figure a way to get the array for each one though so
taht the number that it has for the segment is the index of it and also
start doing the rest. But a good start for not knowing anything about
it.

RelatedHeaders[] t = new RelatedHeaders[master.Count];
master.Values.CopyTo(t,0);
for (int s = 0; s < t.Length; s++)
{
RelatedHeaders temp = t[s];
ArrayList p = temp.GetList();
for (int y = p.Count - 1; y >= 0; y--)
{
Header test = (Header)p[y];
Console.WriteLine(test.Subject);
}
}

Jun 15 '06 #30

P: n/a
ok this is what I have so far. Does anyone see anything wrong with it
or any ways to make it better.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace groupheaders
{
class Program
{
static Hashtable master;
static MySql.Data.MySqlClient.MySqlConnection conn = new
MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd = new
MySql.Data.MySqlClient.MySqlCommand();
static MySql.Data.MySqlClient.MySqlCommand cmdu = new
MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =
"server=127.0.0.1;uid=root;pwd=password;database=t est;";
static string[] groups = { "alt.binaries.games.xbox",
"alt.binaries.games.xbox360", "alt.binaries.boneless",
"alt.binaries.nl" };
static Regex seg = new Regex("\\([0-9]*/[0-9]*\\)",
RegexOptions.IgnoreCase);
static string group;
static string table;
static Regex reg = new Regex("\\.");
static bool prepared = false;
static int numbfound = 1;
class RelatedHeaders : IEnumerable
{
private string _realSubject = null;
private ArrayList _list = new ArrayList();
public RelatedHeaders(Header firstHeader)
{
this._realSubject = firstHeader.RealSubject;
this._list.Add(firstHeader);
}
public void Add(Header newHeader)
{
if (newHeader.RealSubject != this._realSubject)
{
throw new ArgumentException(String.Format("New
header has subject '{0}', but should be '{1}'.", newHeader.RealSubject,
this._realSubject), "newHeader");
}
this._list.Add(newHeader);
}

public ArrayList GetList()
{
return this._list;
}

public IEnumerator GetEnumerator()
{
return this._list.GetEnumerator();
}
}
class Header
{
private string numb;
private string subject;
private int messageNumber;
private int maxMessages;
private string realSubject;
private string date;
private string from;
private string msg_id;
private string bytes;
static Regex seg = new Regex("\\([0-9]*/[0-9]*\\)",
RegexOptions.IgnoreCase);

public Header(string numb, string subject, string date,
string
from, string msg_id, string bytes)
{
this.numb = numb;
this.subject = subject;
this.date = date;
this.from = from;
this.msg_id = msg_id;
this.bytes = bytes;
ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
this.realSubject = ExtractMainSubject(this.subject);
}

// Now pull apart the message title... move the
private void ExtractMessageNumber(string subject, out int
number, out int max)
{
Match m = seg.Match(subject);
string segsplit = m.ToString().Replace("(",
"").Replace(")", "");
string[] segments = segsplit.Split('/');
number = int.Parse(segments[0]);
max = int.Parse(segments[1]);
}
private string ExtractMainSubject(string subject)
{
return seg.Replace(subject, "");
}

//ExtractMessageNumber(this.subject, out
this.messageNumber, out this.maxMessages);
//this.realSubject = ExtractMainSubject(this.subject);

public string Number
{
get { return this.numb; }
}
public string Subject
{
get { return this.subject; }
}
public int MessageNumber
{
get { return this.messageNumber; }
}

public int MaxMessages
{
get { return this.maxMessages; }
}
public string RealSubject
{
get { return this.realSubject; }
}
public string Date
{
get { return this.date; }
}
public string From
{
get { return this.from; }
}
public string Msg_id
{
get { return this.msg_id; }
}
public string Bytes
{
get { return this.bytes; }
}
}
static void Main(string[] args)
{
while (numbfound > 0)
{
numbfound = 0;
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmdu.Connection = conn;
for (int x = 0; x < groups.Length; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
//Console.WriteLine("Have this many in {0} - {1}",
group, master.Count);
RelatedHeaders[] t = new
RelatedHeaders[master.Count];
master.Values.CopyTo(t, 0);
//master.Clear();
for (int s = 0; s < t.Length; s++)
{
RelatedHeaders temp = t[s];
ArrayList p = temp.GetList();
for (int y = p.Count - 1; y >= 0; y--)
{
Header test = (Header)p[y];
Header[] narray = new
Header[test.MaxMessages + 1];
narray[test.MessageNumber] = test;
find(narray);
}

}
prepared = false;
}
conn.Close();
Console.WriteLine(numbfound);
}
}
static void find(Header[] list)
{
int count = 0;
foreach (object o in list)
{
if (o == null)
++count;
}
if (count == 1)
{
if (list[0] == null)
{
insert(list);
update(list);
numbfound++;
}
}
}
static void insert(Header[] found)
{
string msg_id = null;
string bytes = null;
int totalbytes = 0;
for (int x = 1; x < found.Length; x++)
{
msg_id += found[x].Msg_id + "|";
bytes += found[x].Bytes + "|";
totalbytes += int.Parse(found[x].Bytes);
}
if (!prepared)
{
cmd.CommandText = "insert into `files`
(`subject`,`from`,`date`,`msg_ids`,`bytes`,`totalb ytes`,`groups`)
values (?subject, ?from, ?date, ?msg_ids, ?bytes, ?totalbytes,
?groups)";
cmd.Prepare();
prepared = true;
}
cmd.Parameters.Add("?subject", found[1].RealSubject);
cmd.Parameters.Add("?from", found[1].From);
cmd.Parameters.Add("?date", found[1].Date);
cmd.Parameters.Add("?msg_ids", msg_id);
cmd.Parameters.Add("?bytes", bytes);
cmd.Parameters.Add("?totalbytes", totalbytes);
cmd.Parameters.Add("?groups", group + "|");
//cmd.Parameters.Add("?nzb", '0');
cmd.ExecuteNonQuery();
}
static void update(Header[] found)
{
for (int x = 1; x < found.Length; x++)
{
cmdu.CommandText = "delete from `" + table + "` where
`numb` = '" + found[x].Number + "'";
cmdu.ExecuteNonQuery();
}
}
static void getheaders()
{
cmd.CommandText = "select * from " + table + " where
subject like '%(%/%)%' and subject like '%\"%\"%' order by `numb` desc
limit 500000";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
master = new Hashtable();
while (reader.Read())
{
Header h = new Header(reader.GetValue(0).ToString(),
reader.GetValue(1).ToString(), reader.GetValue(3).ToString(),
reader.GetValue(2).ToString(), reader.GetValue(4).ToString(),
reader.GetValue(5).ToString());
RelatedHeaders group =
(RelatedHeaders)master[h.RealSubject];
if (group == null)
{
group = new RelatedHeaders(h);
master.Add(h.RealSubject, group);
}
else
{
group.Add(h);
}
}
reader.Close();
}
}
}

Jun 15 '06 #31

P: n/a
ok not sure what is up but it is only finding ones that are 1 part.
Anything else it leaves alone. Have to figure that out. Also would
like to add some of the functions into the related class to do some of
the work so that when it comes time it would not be so much with all
these loops.

Jun 16 '06 #32

This discussion thread is closed

Replies have been disabled for this discussion.