Jojo wrote:
Is there any way to get to the left-hand side of an operator? Consider
the following (this is not meant to be perfect code, just an example of
the problem):
class Matrix
{
public:
int data[1024];
Matrix() {}
Matrix(int value)
{
for (unsigned i = 0; i < sizeof(data)/sizeof(int); i++)
data[i] = value;
}
void add(const Matrix& obj, Matrix* output)
{
for (unsigned i = 0; i < sizeof(data)/sizeof(int); i++)
output->data[i] = data[i] + obj.data[i];
}
Matrix operator +(const Matrix& obj)
{
Matrix temp; // "unnecessar y" creation of temp variable
for (unsigned i = 0; i < sizeof(data)/sizeof(int); i++)
temp.data[i] = data[i] + obj.data[i];
return temp; // "unnecessar y" extra copy of output
}
};
For nice looking syntax you _really_ want to use the operator+ like:
matrix3 = matrix1 + matrix2;
However, that is some 50% slower than the _much_ uglier:
matrix1.add(mat rix2, &matrix3);
If only there were a way to get to the left-hand argument of the
operator+ then it could be fast and easy to use. Consider the following
code which is not valid C++ and will not compile for this example:
Matrix as M
operator+(const Matrix& obj)
{
for (unsigned i = 0; i < sizeof(data)/sizeof(int); i++)
M.data[i] = data[i] + obj.data[i];
}
That would be fast and clean to use. Is there any way to accomplish
this? Otherwise the situation is just ugly and there is no point in
using operator overloading for these types of situations (which really
defeats the purpose of operator overloading in the first place).
I guess somebody else already mentioned expression templates. Here is some
code for experimentation :
unsigned long const rank = 1024;
unsigned long const runs = 12345;
template < typename VectorExprA,
typename VectorExprB >
struct VectorSum {
typedef typename VectorExprA::va lue_type value_type;
VectorExprA const & a;
VectorExprB const & b;
VectorSum( VectorExprA const & a_,
VectorExprB const & b_ )
: a ( a_ )
, b ( b_ )
{}
value_type operator[] ( unsigned long i ) const {
return( a[i] + b[i] );
}
}; // struct VectorSum<>;
template < typename arithmetic_type ,
unsigned long dim >
class Vector {
public:
typedef arithmetic_type value_type;
private:
value_type entry [dim];
public:
Vector ( value_type val = value_type() ) {
for ( unsigned long i = 0; i < dim; ++i ) {
entry[i] = val;
}
}
Vector & operator= ( Vector const & other ) {
for ( unsigned int i = 0; i < dim; ++i ) {
this->entry[i] = other.entry[i];
}
return( *this );
}
template < typename VectorExpr >
Vector & operator= ( VectorExpr const & expr ) {
for ( unsigned long i = 0; i < dim; ++i ) {
this->entry[i] = expr[i];
}
return( *this );
}
value_type const & operator[] ( unsigned long i ) const {
return( entry[i] );
}
value_type & operator[] ( unsigned long i ) {
return( entry[i] );
}
void add ( Vector const & other, Vector & result ) const {
for( unsigned int i = 0; i < dim; ++i ) {
result.entry[i] = this->entry[i] + other.entry[i];
}
}
}; // class Vector<>;
template < typename VectorExprA,
typename VectorExprB >
inline
VectorSum< VectorExprA, VectorExprB > operator+ ( VectorExprA const & a,
VectorExprB const & b ) {
return( VectorSum< VectorExprA, VectorExprB >( a, b ) );
}
#include <iostream>
#include <ctime>
#include <cstdlib>
void test_operator1 ( void ) {
Vector< double, rank > a ( 0 );
Vector< double, rank > b ( 1 );
std::clock_t ticks = std::clock();
{
for ( unsigned int i = 0; i < runs; ++i ) {
a = a + b;
}
}
ticks = std::clock() - ticks;
std::cout << "opp1: a[0] = " << a[0] << " time: " << ticks << '\n';
}
void test_operator2 ( void ) {
Vector< double, rank > a ( 0 );
Vector< double, rank > b ( 1 );
std::clock_t ticks = std::clock();
{
for ( unsigned int i = 0; i < runs; ++i ) {
a = a + b + b;
}
}
ticks = std::clock() - ticks;
std::cout << "opp2: a[0] = " << a[0] << " time: " << ticks << '\n';
}
void test_add1 ( void ) {
Vector< double, rank > a ( 0 );
Vector< double, rank > b ( 1 );
std::clock_t ticks = std::clock();
{
for ( unsigned int i = 0; i < runs; ++i ) {
a.add( b, a );
}
}
ticks = std::clock() - ticks;
std::cout << "add1: a[0] = " << a[0] << " time: " << ticks << '\n';
}
void test_add2 ( void ) {
Vector< double, rank > a ( 0 );
Vector< double, rank > b ( 1 );
std::clock_t ticks = std::clock();
{
for ( unsigned int i = 0; i < runs; ++i ) {
a.add( b, a );
a.add( b, a );
}
}
ticks = std::clock() - ticks;
std::cout << "add2: a[0] = " << a[0] << " time: " << ticks << '\n';
}
int main ( void ) {
std::srand( 24163 );
while ( true ) {
switch ( std::rand() % 4 ) {
case 0 :
test_operator1( );
break;
case 1 :
test_operator2( );
break;
case 2 :
test_add1();
break;
case 3 :
test_add2();
break;
}
}
}
On my machine (OS: Linux; CPU: intel; CC: g++-4.0.1 all optimization turned
on), opp1 and add1 are essentially equivalent and opp2 beats add2.
Best
Kai-Uwe Bux