/* There is a working version of Crout in here. Maybe it is helpful.
** Aside: Dieter's routine is a work of art, simple - elegant - fast.
** IMO-YMMV.
*/
/*
* --------------------------------------------------------------
* TEST_FPU A number-crunching benchmark using matrix inversion.
* Implemented by: David Frank
Da********@aol.com
* Gauss routine by: Tim Prince
N8**@aol.com
* Crout routine by: James Van Buskirk
to****@ix.netcom.com
* F90->C source by: Sergey N. Voronkov
se**@ggd.nsu.ru
* Pointer exchange version by: Dieter Buerssner
bu***@gmx.de
* --------------------------------------------------------------
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
/*
* Compiling with NI = 1001 (default) generates pool(51,51,1000) =
20mb.
* If test system pages due to insufficient memory (disk i/o activity
* occurs), abort run and compile with NI = 200, benchmark will
adjust
* time x 5.
*/
#define NI 1001
#define NN 51
#define RK8 double
/* below are additional C routines supplied by translator */
void memflt()
{
fputs("Memory allocation error\n", stderr);
exit(EXIT_FAILURE);
}
void alloc_arrays(RK8 ** p[NI], RK8 *** a, RK8 *** b)
{
int i,
j;
for (i = 0; i < NI; i++) {
if ((p[i] = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL)
memflt();
for (j = 0; j < NN; j++)
if ((p[i][j] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL)
memflt();
}
if ((*a = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL ||
(*b = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL)
memflt();
for (i = 0; i < NN; i++)
if (((*a)[i] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL ||
((*b)[i] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL)
memflt();
}
void random_number(RK8 ** pool[NI])
{
int i,
j,
k;
for (i = 0; i < NI; i++)
for (j = 0; j < NN; j++)
for (k = 0; k < NN; k++)
pool[i][j][k] = (RK8) (rand()) / RAND_MAX;
}
RK8
timesec()
{
return (RK8) (clock()) / CLOCKS_PER_SEC;
}
/* prototype the invert functions that follow exec source */
void Gauss(RK8 ** a, RK8 ** b, int n);
void Crout(RK8 ** a, RK8 ** b, int n);
int rgaussi(RK8 ** a, RK8 ** b, int n);
int main()
{
RK8 **pool[NI]; /* pool of matrices to invert */
RK8 **a,
**ai; /* working matrices use < 256k */
RK8 avg_err,
total_time,
time1;
int i,
j,
n;
char *revision = "01/10/98"; /* Gauss speedup mod */
char invert_id[3][8] =
{
"Gauss", "Crout", "Dieter"};
struct tm *ptm;
time_t crtime;
FILE *fp;
/* Begin by allocating matrix arrays */
alloc_arrays(pool, &a, &ai);
puts("Benchmark running, hopefully as only ACTIVE task");
if ((fp = fopen("test_fpc.dat", "w")) == NULL) {
fprintf(stderr, "Can't open output file!\n");
return EXIT_FAILURE;
}
crtime = time(NULL);
ptm = gmtime(&crtime);
fprintf(fp, "Date run = %2d/%2d/%2d\n",
ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_year);
fputs("Pls supply info below, send to
Da********@aol.com\n"
"Tester name/ net address = \n"
"CPU mfg/id/Mhz = \n"
"MEM/CACHE = \n"
"O.S. / COMPILER = \n"
"Additional comments = \n\n\n\n\n", fp);
fprintf(fp, "Results for %s revision using TEST_FPU.C \n",
revision);
srand(time(NULL)); /* set seed to random number based on time */
random_number(pool); /* fill pool with random data ( 0. -1. ) */
for (n = 0; n < 3; n++) { /* for Gauss,Crout algorithms */
time1 = timesec(); /* start benchmark n time */
for (i = 0; i < NI; i++) {
/* get next matrix to invert */
for (j = 0; j < NN; j++)
memcpy(a[j], pool[i][j], sizeof(RK8) * NN);
switch (n) {
case 0:
Gauss(a, ai, NN); /* invert a -ai ; destructs a */
Gauss(ai, a, NN); /* invert ai -a */
break;
case 1:
Crout(a, ai, NN); /* invert a -ai ; nondestructs a
*/
Crout(ai, a, NN); /* invert ai -a */
break;
case 2:
rgaussi(a, ai, NN); /* invert a -ai ; nondestructs a
*/
rgaussi(ai, a, NN); /* invert ai -a */
break;
}
}
total_time = timesec() - time1; /* = elapsed time sec. */
/* check accuracy last matrix invert. */
avg_err = 0;
for (i = 0; i < NN; i++)
for (j = 0; j < NN; j++)
avg_err += fabs(a[i][j] - pool[NI - 1][i][j]);
if (NI == 200)
fprintf(fp, "\n%s 5 x 200 x 2 inverts = %6.1f sec.\n",
invert_id[n], 5 * total_time);
else
fprintf(fp, "\n%s 1000 x 2 inverts = %6.1f sec.\n",
invert_id[n], total_time);
fputs("Accuracy of 2 computed numbers\n", fp);
fprintf(fp, "Original = %18.15f %18.15f\n",
pool[NI - 1][0][0], pool[NI - 1][NN - 1][NN - 1]);
fprintf(fp, "Computed = %18.15f %18.15f\n",
a[0][0], a[NN - 1][NN - 1]);
fprintf(fp, "Avg Err. = %18.15f\n", avg_err / (NN * NN));
} /* for Gauss,Crout algorithms */
puts("Results written to: TEST_FPC.DAT");
return EXIT_SUCCESS;
}
/*
* --------------------------------------
* Invert matrix a -b by Gauss method
* --------------------------------------
*/
void Gauss(RK8 ** a, RK8 ** b, int n)
{
RK8 d,
temp = 0,
c;
int i,
j,
k,
m,
nn,
*ipvt;
if ((ipvt = (int *) malloc(n * sizeof(int))) == NULL)
memflt();
nn = n;
for (i = 0; i < nn; i++)
ipvt[i] = i;
for (k = 0; k < nn; k++) {
temp = 0.;
m = k;
for (i = k; i < nn; i++) {
d = a[k][i];
if (fabs(d) temp) {
temp = fabs(d);
m = i;
}
}
if (m != k) {
j = ipvt[k];
ipvt[k] = ipvt[m];
ipvt[m] = j;
for (j = 0; j < nn; j++) {
temp = a[j][k];
a[j][k] = a[j][m];
a[j][m] = temp;
}
}
d = 1 / a[k][k];
for (j = 0; j < k; j++) {
c = a[j][k] * d;
for (i = 0; i < nn; i++)
a[j][i] -= a[k][i] * c;
a[j][k] = c;
}
for (j = k + 1; j < nn; j++) {
c = a[j][k] * d;
for (i = 0; i < nn; i++)
a[j][i] -= a[k][i] * c;
a[j][k] = c;
}
for (i = 0; i < nn; i++)
a[k][i] = -a[k][i] * d;
a[k][k] = d;
}
for (i = 0; i < nn; i++)
memcpy(b[ipvt[i]], a[i], sizeof(RK8) * nn);
free(ipvt);
}
/*
* --------------------------------------
* Invert matrix a -b by Crout method
* --------------------------------------
*/
void Crout(RK8 ** a, RK8 ** b, int n)
{
int i,
j; /* Current row & column */
int maxlok; /* Location of maximum pivot */
int *index; /* Partial pivot record */
RK8 *temp = 0,
the_max;
RK8 tmp,
*ptr;
RK8 *matr = 0;
int k,
ind,
ind2;
if ((index = (int *) malloc(n * sizeof(int))) == NULL ||
(temp = (RK8 *) malloc(n * sizeof(RK8))) == NULL ||
(matr = (RK8 *) malloc(n * n * sizeof(RK8))) == NULL)
memflt();
/* Initialize everything */
for (i = 0; i < n; i++)
index[i] = i;
/* Shuffle matrix */
for (j = 0; j < n; j++) {
for (i = 0; i < j; i++)
b[j][i] = a[j][i];
for (i = j; i < n; i++)
b[j][i] = a[i - j][n - j - 1];
}
/* LU decomposition; reciprocals of diagonal elements in L matrix */
for (j = 0; j < n; j++) {
/* Get current column of L matrix */
for (i = j; i < n; i++) {
tmp = 0;
ind = n - i - 1;
for (k = 0; k < j; k++)
tmp += b[ind][ind + k] * b[j][k];
b[ind][ind + j] -= tmp;
}
maxlok = 0;
the_max = fabs(b[0][j]);
for (i = 1; i < n - j; i++)
if (fabs(b[i][j + i]) >= the_max) {
the_max = fabs(b[i][j + i]);
maxlok = i;
}
/* det = det*b(j+maxlok-1,maxlok) */
b[maxlok][j + maxlok] = 1 / b[maxlok][j + maxlok];
/* Swap biggest element to current pivot position */
if (maxlok + 1 != n - j) {
ind = n - maxlok - 1;
ind2 = index[j];
index[j] = index[ind];
index[ind] = ind2;
for (k = n - maxlok; k < n; k++) {
tmp = b[k][j];
b[k][j] = b[k][ind];
b[k][ind] = tmp;
}
memcpy(temp, &(b[maxlok][maxlok]), sizeof(RK8) * (n -
maxlok));
ptr = &(b[n - j - 1][n - j - 1]);
memmove(&(b[maxlok][maxlok]), ptr, sizeof(RK8) * (j + 1));
for (k = j + 1; k < n - maxlok; k++)
b[maxlok][maxlok + k] = b[k][j];
memcpy(ptr, temp, (j + 1) * sizeof(RK8));
for (k = j + 1; k < n - maxlok; k++)
b[k][j] = temp[k];
}
/* Get current row of U matrix */
ind = n - j - 1;
for (i = j + 1; i < n; i++) {
tmp = 0.;
for (k = 0; k < j; k++)
tmp += b[i][k] * b[ind][ind + k];
b[i][j] = b[ind][n - 1] * (b[i][j] - tmp);
}
} /* END DO LU_outer */
/* Invert L matrix */
for (j = 0; j < n - 1; j++) {
temp[0] = b[n - j - 1][n - 1];
for (i = j + 1; i < n; i++) {
ind = n - i - 1;
tmp = 0.;
for (k = 0; k < i - j; k++)
tmp += temp[k] * b[ind][ind + j + k];
b[ind][ind + j] = -tmp * b[ind][n - 1];
temp[i - j] = b[ind][ind + j];
}
}
/* Reshuffle matrix */
for (i = 0; i < (n + 1) / 3; i++) {
memcpy(temp, &(b[i][2 * (i + 1) - 1]), sizeof(RK8) * (n + 2 -
3 * (i +1)));
for (j = 2 * (i + 1) - 1; j < n - i; j++)
b[i][j] = b[n - j - 1][n - j + i - 1];
ind = n - i - 1;
for (j = i; j < n + 1 - 2 * (i + 1); j++)
b[j][i + j] = b[n - i - j - 1][ind];
for (k = 0; k < n + 2 - 3 * (i + 1); k++)
b[i + 1 + k][ind] = temp[k];
}
/* Invert U matrix */
for (i = 0; i < n - 1; i++) {
for (j = i + 1; j < n; j++) {
tmp = 0.;
for (k = 0; k < j - i - 1; k++)
tmp += temp[k] * b[j][i + 1 + k];
b[j][i] = -b[j][i] - tmp;
temp[j - i - 1] = b[j][i];
}
}
/* Multiply inverses in reverse order */
for (i = 0; i < n - 1; i++) {
for (k = 0; k < n - i - 1; k++)
temp[k] = b[i + 1 + k][i];
for (j = 0; j <= i; j++) {
tmp = 0.;
for (k = 0; k < n - i - 1; k++)
tmp += temp[k] * b[j][i + 1 + k];
b[j][i] += tmp;
}
for (j = i + 1; j < n; j++) {
tmp = 0.;
for (k = j; k < n; k++)
tmp += temp[k - i - 1] * b[j][k];
b[j][i] = tmp;
}
}
/* Straighten out the columns of the result */
for (i = 0; i < n; i++)
memcpy(matr + n * i, b[i], sizeof(RK8) * n);
for (i = 0; i < n; i++)
memcpy(b[index[i]], matr + n * i, sizeof(RK8) * n);
free(index);
free(temp);
free(matr);
}
/*
** This routine is due to
bu***@gmx.de (Dieter Buerssner)
** Destroys a, return 0: success, 1: zero pivot, 2: out of mem.
*/
int rgaussi(RK8 ** a, RK8 ** b, int n)
{
int i,
j,
k,
maxj,
t;
RK8 maxel,
pivinv,
tmaxel,
aji;
RK8 *tp,
*ai,
*aj;
/* C99: int ipiv[n]; */
int *ipiv = malloc(n * sizeof *ipiv);
if (ipiv == NULL)
return 2;
for (i = 0; i < n; i++)
ipiv[i] = i;
for (i = 0; i < n; i++) {
maxj = -1;
maxel = 0.0;
/* find pivot element */
for (j = i; j < n; j++)
if ((tmaxel = fabs(a[j][i])) maxel) {
maxj = j;
maxel = tmaxel;
}
if (maxj < 0) {
free(ipiv);
return 1;
}
/* exchange rows */
if (maxj != i) {
/* just exchange pointers for a */
tp = a[maxj];
a[maxj] = a[i];
a[i] = tp;
t = ipiv[maxj];
ipiv[maxj] = ipiv[i];
ipiv[i] = t;
}
ai = a[i];
pivinv = 1.0 / ai[i];
ai[i] = 1.0;
for (k = 0; k < n; k++)
ai[k] *= pivinv;
for (j = 0; j < n; j++) {
if (j != i) {
aj = a[j];
aji = aj[i];
aj[i] = 0.0;
for (k = 0; k < n; k++)
aj[k] -= aji * ai[k];
}
}
}
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
b[i][ipiv[j]] = a[i][j];
free(ipiv);
return 0;
}