I have a class "Data" and I store Data pointers in an STL set. But I have
millions of inserts and many more lookups, and my profiler found that they
cost a lot of runtime.
Therefore, I want to substitute the set<Data*> with a hash_set<Data*>:
typedef hash_set<const Data*, hash<const Data*>, eqData> DataPointerHashSet;
// typedef set<Data*> DataPointerHashSet; // (see complete code below)
But it doesn't work! Everything is fine when I use set<Data*>, but when I
plug in hash_set<...> the program gives a wrong answer and then crashes.
I tried to make a mini example, see code below. But the problem is, the
mini example seems to work fine. Just my original example crashes.
Can anyone see problems with my hash_set usage in the mini example below?
Problems that might occur only in certain situations (collisions)? I don't
really know how to write the hash function, so I adapted it from an example
that I found somewhere.
#include <iostream>
#if __GNUC__ < 3 && __GNUC__ >= 2 && __GNUC_MINOR__ >= 95
# include <hash_set>
# include <functional>
# define gnu_namespace std
#elif __GNUC__ >= 3
# include <ext/hash_set>
# if __GNUC_MINOR__ == 0
# include <functional>
# define gnu_namespace std
# else
# include <ext/functional>
# define gnu_namespace __gnu_cxx
# endif
#else
# include <hash_set.h>
# include <functional.h>
# define gnu_namespace std
#endif
using namespace gnu_namespace;
#include <set>
using namespace std;
class Data {
private:
string someData;
unsigned index;
public:
const unsigned getIndex() const { return index; }
Data(const string& s) : someData(s) {
static unsigned index; // a unique index for each Data obj
this->index = index++;
}
};
bool operator == (const Data& n1, const Data& n2 ){
return n1.getIndex() == n2.getIndex();
}
namespace gnu_namespace {
template<>
struct hash<const Data*> {
hash<unsigned> hasher;
size_t operator()(const Data* n) const {
return hasher(n->getIndex()); // wrong hash function??
}
};
}
struct eqData{
bool operator()(const Data* s1, const Data* s2) const{
return *s1 == *s2;
}
};
typedef hash_set<const Data*, hash<const Data*>, eqData>
DataPointerHashSet;
// typedef set<Data*> DataPointerHashSet;
int main(){
DataPointerHashSet s;
const unsigned size = 10000;
Data* pointers[size];
for(int i = 0; i < size; ++i){
pointers[i] = new Data("bla");
s.insert(pointers[i]);
}
for(int i = 0; i < size; ++i){
if(s.find(pointers[i]) == s.end()){
cerr << "ERROR" << endl;
exit(EXIT_FAILURE);
}
}
for(int i = 0; i < size; ++i){
delete pointers[i];
}
}