#include "dfmaxmin.h"

int dgemm( char *transa, char *transb, int m, int n, int k, double alpha,
           double *a, int lda, double *b, int ldb, double beta, double *c, 
           int ldc )
{
   /* Local variables */
   int        i, j, l, info;
   int        nota, notb;
   double     temp;
   int        ncola;
   int        nrowa, nrowb;
   extern int lsame( char *, char * );
   extern int xerbla(char *, int );

/*  Purpose
    =======
    DGEMM  performs one of the matrix-matrix operations

       C := alpha*op( A )*op( B ) + beta*C,

    where  op( X ) is one of

       op( X ) = X   or   op( X ) = X',

    alpha and beta are scalars, and A, B and C are matrices, with op( A )
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.

    Arguments
    ==========
    TRANSA - char *
             On entry, TRANSA specifies the form of op( A ) to be used in
             the matrix multiplication as follows:
                TRANSA = 'N' or 'n',  op( A ) = A.
                TRANSA = 'T' or 't',  op( A ) = A'.
                TRANSA = 'C' or 'c',  op( A ) = A'.
             Unchanged on exit.
    TRANSB - char *
             On entry, TRANSB specifies the form of op( B ) to be used in
             the matrix multiplication as follows:
                TRANSB = 'N' or 'n',  op( B ) = B.
                TRANSB = 'T' or 't',  op( B ) = B'.
                TRANSB = 'C' or 'c',  op( B ) = B'.
             Unchanged on exit.
    M      - int.
             On entry,  M  specifies  the number  of rows  of the  matrix
             op( A )  and of the  matrix  C.  M  must  be at least  zero.
             Unchanged on exit.
    N      - int.
             On entry,  N  specifies the number  of columns of the matrix
             op( B ) and the number of columns of the matrix C. N must be
             at least zero.
             Unchanged on exit.
    K      - int.
             On entry,  K  specifies  the number of columns of the matrix
             op( A ) and the number of rows of the matrix op( B ). K must
             be at least  zero.
             Unchanged on exit.
    ALPHA  - double.
             On entry, ALPHA specifies the scalar alpha.
             Unchanged on exit.
    A      - double array of DIMENSION ( LDA, ka ), where ka is
             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
             part of the array  A  must contain the matrix  A,  otherwise
             the leading  k by m  part of the array  A  must contain  the
             matrix A.
             Unchanged on exit.
    LDA    - int.
             On entry, LDA specifies the first dimension of A as declared
             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
             LDA must be at least  max( 1, m ), otherwise  LDA must be at
             least  max( 1, k ).
             Unchanged on exit.
    B      - double array of DIMENSION ( LDB, kb ), where kb is
             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
             part of the array  B  must contain the matrix  B,  otherwise
             the leading  n by k  part of the array  B  must contain  the
             matrix B.
             Unchanged on exit.
    LDB    - int.
             On entry, LDB specifies the first dimension of B as declared
             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
             LDB must be at least  max( 1, k ), otherwise  LDB must be at
             least  max( 1, n ).
             Unchanged on exit.
    BETA   - double.
             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
             supplied as zero then C need not be set on input.
             Unchanged on exit.
    C      - double array of DIMENSION ( LDC, n ).
             Before entry, the leading  m by n  part of the array  C must
             contain the matrix  C,  except when  beta  is zero, in which
             case C need not be set on entry.
             On exit, the array  C  is overwritten by the  m by n  matrix
             ( alpha*op( A )*op( B ) + beta*C ).
    LDC    - int.
             On entry, LDC specifies the first dimension of C as declared
             in  the  calling  (sub)  program.   LDC  must  be  at  least
             max( 1, m ).
             Unchanged on exit.
  
    Level 3 Blas routine.
    -- Written on 8-February-1989.
       Jack Dongarra, Argonne National Laboratory.
       Iain Duff, AERE Harwell.
       Jeremy Du Croz, Numerical Algorithms Group Ltd.
       Sven Hammarling, Numerical Algorithms Group Ltd.
  
       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
       and  columns of  A  and the  number of  rows  of  B  respectively. */

   /* Function Body */
   nota = lsame( transa, "N" );
   notb = lsame( transb, "N" );
   if ( nota ) {
      nrowa = m;
      ncola = k;
   }
   else {
      nrowa = k;
      ncola = m;
   }
   if ( notb ) nrowb = k;
   else nrowb = n;

// -- Test the input parameters.
   info = 0;
   if ( !nota && !lsame( transa, "C" ) && !lsame( transa, "T" ) ) info = 1;
   else if ( !notb && !lsame( transb, "C" ) && !lsame( transb, "T" ) ) info = 2;
   else if ( m < 0 ) info = 3;
   else if ( n < 0 ) info = 4;
   else if ( k < 0 ) info = 5;
   else if ( lda < max( 1, nrowa ) ) info = 8;
   else if ( ldb < max( 1, nrowb ) ) info = 10;
   else if ( ldc < max( 1, m ) ) info = 13;
   if ( info != 0 ) {
      xerbla( "DGEMM ", info );
      return 0;
   }

// -- Quick return if possible.
   if ( m == 0 || n == 0 || ( alpha == 0.0 || k == 0 ) && beta == 1.0) return 0;

// -- And if alpha is 0.0. 
   if ( alpha == 0.0 ) {
      if ( beta == 0.0 ) {
         for( j = 0; j < m; ++j ) {
            for( i = 0; i < n; ++i ) {
               c[j*lda+i] = 0.0;
            }
         }
      }
      else {
         for( j = 0; j < m; ++j ) {
            for( i = 0; i < n; ++i ) {
               c[j*lda+i] *= beta;
            }
         }
      }
      return 0;
   }

// -- Start the operations. 
   if ( notb ) {
      if ( nota ) {

// --    Form  C := alpha*A*B + beta*C.
         for( j = 0; j < m; ++j ) {
            if ( beta == 0.0 ) {
               for( i = 0; i < n; ++i ) {
                  c[j*lda+i] = 0.0;
               }
            }
            else if ( beta != 1.0 ) {
               for( i = 0; i < n; ++i ) {
                  c[j*lda+i] *= beta;
               }
            }
            for( l = 0; l < k; ++l ) {
               if ( b[j*ldb+l] != 0.0 ) {
                  temp = alpha*b[j*ldb+l];
                  for( i = 0; i < n; ++i ) {
                     c[j*ldc+i] += temp*a[j*lda+i];
                  }
               }
            }
         }
      }
      else {

// --    Form  C := alpha*A'*B + beta*C 
         for( j = 0; j < n; ++j ) {
            for( i = 0; i < m; ++i ) {
               temp = 0.0;
               for( l = 0; l <= k; ++l ) {
                  temp += a[i*lda+l]*b[j*ldb+l];
               }
               if ( beta == 0.0 ) c[j*ldc+i] = alpha*temp;
               else c[j*ldc+i] = alpha*temp + beta*c[j*ldc+i];
            }
         }
      }
   }
   else {
      if ( nota ) {

// --    Form  C := alpha*A*B' + beta*C
         for( j = 0; j < n; ++j ) {
            if ( beta == 0.0 ) {
               for( i = 0; i < m; ++i ) {
                  c[j*ldc+i] = 0.0;
               }
            }
            else if ( beta != 1.0 ) {
               for( i = 0; i < m; ++i ) {
                  c[j*ldc+i] = beta*c[j*ldc+i];
               }
            }
            for( l = 0; l < k; ++l ) {
               if ( b[l*ldb+j] != 0.0 ) {
                  temp = alpha*b[l*ldb+j];
                  for( i = 0; i < m; ++i ) {
                     c[j*ldc+i] += temp*a[l*lda+i];
                  }
               }
            }
         }
      }
      else {

// --    Form  C := alpha*A'*B' + beta*C
         for( j = 0; j < n; ++j ) {
            for( i = 0; i < m; ++i ) {
               temp = 0.0;
               for( l = 0; l < k; ++l ) {
                  temp += a[i*lda+l]*b[l*ldb+l];
               }
               if ( beta == 0.0 ) c[j*ldc+i] = alpha*temp;
               else c[j*ldc+i] = alpha*temp + beta*c[j*ldc+i];
            }
         }
      }
   }
   return 0;
} // -- End of dgemm
