#include
#include
#include
int svd( int m, int n, double **A, double *W, int matu,
         double **U, int matv, double **V, double *rv1 );
double sign( double x, double y );
double checkSVD( int m, int n, double **A, double **U, double *W, double **V );

/***************************************************************
      Singular Value Decomposition subroutine from:                   
      Forsythe, Malcolm, & Moler. "Computer Methods for 
	  Mathematical Computations", Prentice-Hall, Inc., 1977. 
	                    
      This subroutine is a translation to C of the Fortran 
	  subroutine which in turn was a translation of the Algol 
	  procedure SVD, Num. Math. 14, 403-420 (1970) by Golub 
	  and Reinsch, Handbook for Auto. Comp., vol II-Linear 
	  Algebra, 134-151 (1971).

      This subroutine determines the singular value
	                        T 
	  decomposition A =U S V of a real m by n rectangular 
	  matrix. Householder bidiagonalization and a variant of 
	  the QR algorithm are used.
	  
	  Input: 
      m  is the number of rows of A (and U). 
	     
      n  is the number of columns of A (and U) and the 
	     order of V.
	  A  contains the rectangular input matrix mxn to be 
	     decomposed: A[i][j] -- the element of A with row i 
		 and column j
      matu should be set to 1 if the U matrix in the
         decomposition is desired, and to 0 otherwise.
      matv should be set to 1 if the V matrix in the
         decomposition is desired, and to 0 otherwise.
		 
!>>>>>NOTE(!!!): m should be >= n --> Dimensions of arrays: 
	    A[m*n], U[m*n], W[n], V[n*n], rv1[n]   
	  
      Output:
      A  is unaltered (unless overwritten by U or V).
      W contains the n (non-negative) singular values of 
	     A (the diagonal elements S[i][i] of S).  They are 
		 unordered. If an error exit is made, the singular 
		 values should be correct for indices ierr+1,
		 ierr+2,...,n.
      U  contains the matrix U (orthogonal column vectors) 
	     of the decomposition if matu has been set to 1:
		 U[i][j] -- the element of U with row i and column j.  
		 Otherwise U is used as a temporary array.  U may 
		 coincide with A. If an error exit is made, the 
		 columns of U corresponding to indices of correct 
		 singular values should be correct.
	  V  contains the matrix V (orthogonal) of the 
	     decomposition if matv has been set to 1: V[i][j] -- 
		 the element of V with row i and column j. Otherwise 
		 V is not referenced. V may also coincide with A if 
		 U is not needed. If an error exit is made, the 
		 columns of V corresponding to indices of correct 
		 singular values should be correct.
      ierr is set to:
	     zero for normal return,
         k if the k-th singular value has not been
		   determined after 30 iterations, and 
!>>>>>> -1 if the number of rows m in the matrix A is smaller 
		   than the number of its columns.  
      rv1 is a temporary storage array.

      Questions and comments related to the fortran version
	  should be directed to Burton S. Garbow, Mathematics and 
	  Computer Science Div, Argonne National Laboratory, USA

      The fortran version was dated August 1983.
	  Straightforward (non-optimised) C representation: 
	                                July 2006 (Auckland) 
***************************************************************/
int svd( int m,      int n,    double **A, double *W,  int matu,
         double **U, int matv, double **V, double *rv1          ) {

    int       i, ierr, ii, its, i1, j, k, k1, l, l1;
	const int iter_thresh30 = 30;
    double    c, f, g, h, s, scale, tst1, tst2, x, y, z;

    if ( m < n ) {
	  printf( 
	   "\nError in SVD(): the number of rows m=%d < the number of columns n=%d!", 
	        m, n );
	   return -1;
	}
	
	ierr = 0;
	for( i = 0; i < m; i++ ) {
       for( j = 0; j < n; j++ ) {
          U[i][j] = A[i][j];
       }
    }
//     .......... Householder reduction to bidiagonal form ..........
    g = 0.0; scale = 0.0; x = 0.0;

    for( i = 0; i < n; i++ ){                      // loop 300                     
       l = i + 1; 
       rv1[i] = scale * g;
       g = 0.0; s = 0.0; scale = 0.0;
       if (i >= m ) goto label210;
	   for( k = i; k < m; k++ ) {                 // loop 120
		  scale += fabs( U[k][i] );
	   }
	   if (scale == 0.0e0) goto label210;
	   for( k = i; k < m; k++ ){                  // loop 130
		  U[k][i] /= scale;
		  s += ( U[k][i] * U[k][i] );
	   }
	   f = U[i][i];  g = sign( sqrt(s), -f );
	   h = f * g - s; U[i][i] = f - g;
	   if ( i == n - 1 ) goto label190;    
	   for( j = l; j < n; j++ ){                  // loop 150       
		  s = 0.0;
		  for( k = i; k < m; k++ ) {              // loop 140
			 s += ( U[k][i] * U[k][j] ); 
		  }
		  f = s / h;
		  for( k = i; k < m; k++ ){               // loop 150 
			 U[k][j] += ( f * U[k][i] ); 
		  }
	   }
	     
       label190:
	   for( k = i; k < m; k++ ){                  // loop 200
		  U[k][i] *= scale;
	   }
       
	   label210:
	   W[i] = scale * g;
	   g = 0.0; s = 0.0; scale = 0.0;
	   
	   if (i > m - 1 || i == n - 1 ) goto label290;
	   
	   for( k = l; k < n; k++ ){                  // loop 220
		  scale += fabs( U[i][k] ); 
	   }
	   if (scale == 0.0) goto label290;
	   for( k = l; k < n; k++ ){                  // loop 230
		  U[i][k] /= scale;
		  s += ( U[i][k] * U[i][k] );
	   }
	   f = U[i][l];    
	   g = sign( sqrt(s), -f );
	   h = f * g - s;  
	   U[i][l] = f - g;
	   
	   for( k = l; k < n; k++ ){                  // loop 240
		  rv1[k] = U[i][k] / h;
	   }
	   if (i == m - 1 ) goto label270; 
	   for( j = l; j < m; j++ ){                  // loop 260
		  s = 0.0;
		  for( k = l; k < n; k++ ){               // loop 250
			  s += U[j][k] * U[i][k]; 
		  }
		  for( k = l; k < n; k++ ){               // loop 260
			 U[j][k] += s * rv1[k];
		  }
	   }
	   
	   label270:
	   for( k = l; k < n; k++ ){                  // loop 280
		  U[i][k] *= scale;
	   }

       label290:
	   if ( fabs( W[i] ) + fabs( rv1[i] )  > x )
		  x = fabs( W[i] ) + fabs( rv1[i] );
    }                                             // - end of loop  300
//     .......... accumulation of right-hand transformations ..........
	if ( matv == 1 ) {   
//     .......... for i=n step -1 until 1 do -- ..........
	   for( i = n - 1; i >= 0; i-- ){              // loop 400
		  if (i == n - 1 ) goto label390;
		  if (g == 0.0 ) goto label360; 
		  for( j = l; j < n; j++ ){                // loop 320
//     .......... double division avoids possible underflow ..........
			 V[j][i] = ( U[i][j] / U[i][l] ) / g; 
		  }
		  for( j = l; j < n; j++ ){                // loop 350 
			 s = 0.0;
			 for( k = l; k < n; k++ ){             // loop 340
			    s += ( U[i][k] * V[k][j] );
			 }
			 for( k = l; k < n; k++ ){             // loop 350
			    V[k][j] += ( s * V[k][i] ); 
			 }
		  }
          label360:        
		  for( j = l; j < n; j++ ){                 // loop 380
		     V[i][j] = V[j][i] = 0.0;
		  }
	    
		  label390:    
		  V[i][i] = 1.0;
		  g = rv1[i];
		  l = i;
	  }                                            // - end of loop 400 
    }      
// .......... accumulation of left-hand transformations ..........
	if ( matu == 1 ) {
//     ..........for i=min(m,n) step -1 until 1 do -- ..........
		for( i = m - 1; i >= 0; i-- ){          // loop 500
		   l = i + 1;
		   g = W[i];
		   if ( i == n - 1 ) goto label430; 
		   for( j = l; j < n; j++ ){             // loop 420
			  U[i][j] = 0.0;
		   }
		   label430:
		   if ( g == 0.0 ) goto label475; 
		   if ( i == m - 1 ) goto label460; 
		   for( j = l; j < n; j++ ){             // loop 450 
			  s = 0.0;
			  for( k = l; k < m; k++ ){          // loop 440
				 s += ( U[k][i] * U[k][j] );
			  }
//     .......... double division avoids possible underflow ..........
			  f = ( s / U[i][i] ) / g;
			  for( k = i; k < m; k++ ){          // loop 450         				  
				 U[k][j] += ( f * U[k][i] ); 
			  }
		   }                                     // end of loop 450
			 
		   label460:
		   for( j = i; j < m; j++ ){             // loop 470
			  U[j][i] /= g; 
		   }               
		   goto label490;
		 
		   label475:
		   for( j = i; j < m; j++ ){             // loop 480
			  U[j][i] = 0.0;
		   }
             
		   label490:
		   U[i][i] += 1.0;
		}                                        // end of loop 500
	}  
//     .......... diagonalization of the bidiagonal form ..........
	tst1 = x;
//     .......... for k=n step -1 until 1 do -- ..........
     // do 700 kk = 1, n
	for( k = n - 1; k >= 0; k-- ){               // loop 700
	   k1 = k - 1;
	   its = 0;
//     .......... test for splitting.
//                for l=k step -1 until 1 do -- ..........
	   label520: 
	   for( l = k; l >= 0; l-- ){                // loop 530
		  l1 = l - 1;
		  tst2 = tst1 + fabs( rv1[l] );
		  if (tst2 == tst1) goto label565;
//     .......... rv1[0] is always zero, so there is no exit
//                through the bottom of the loop ..........
		  tst2 = tst1 + fabs( W[ l1 ] ); 
		  if (tst2 == tst1) break;
	   }
//     .......... cancellation of rv1(l) if l greater than 0 ..........
	   c = 0.0; s = 1.0;
	   for( i = l; i <= k; i++ ){                // loop 560
		  f = s * rv1[i];        
		  rv1[i] *= c;
		  tst2 = tst1 + fabs(f); 
		  if (tst2 == tst1) break;
		  g = W[i];              
		  h = sqrt( f*f + g*g );
		  W[i] = h;
		  c = g / h;             
		  s = -f / h;
		  if ( matu == 0 ) continue;
		  for( j = 0; j < m; j++ ){              // loop 550
			 y = U[j][l1]; 
			 z = U[j][i]; 
			 U[j][l1] = y * c + z * s;
			 U[j][i] = -y * s + z * c;
          }
	   }                                         // end of loop 560
//     .......... test for convergence ..........
	   label565: 
	   z = W[k];
	   if (l == k) goto label650;
//     .......... shift from bottom 2 by 2 minor ..........
	   if (its == iter_thresh30) {
		  ierr = k; 
//     .......... set error -- no convergence to a
//                singular value after 30 iterations ..........
		  return ierr; 
	   }
	   its++;
	   x = W[l];  
	   y = W[k1]; 
	   g = rv1[k1]; 
	   h = rv1[k]; 
	   f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
	   g = sqrt( f*f + 1.0 );
	   f = x - (z / x) * z + (h / x) * (y / (f + sign(g, f)) - h);
//     .......... next qr transformation ..........
	   c = 1.0;
       s = 1.0;
	   for( i1 = l; i1 <= k1; i1++ ){             // loop 600
		  i = i1 + 1;
		  g = rv1[i]; y = W[i];
		  h = s * g; 
		  g = c * g;     
		  z = sqrt( f*f + h*h);
		  rv1[i1] = z; 
		  c = f / z;        
		  s = h / z;
		  f = x * c + g * s; 
		  g = -x * s + g * c;
		  h = y * s; 
		  y *= c;
		  if ( matv == 1 ) {
		     for( j = 0; j < n; j++ ){              // loop 570
				x = V[j][i1]; z = V[j][i];
				V[j][i1] = x * c + z * s;
				V[j][i] = -x * s + z * c;
             }
          }			 
		  z = sqrt( f*f + h*h);
		  W[i1] = z;
//     .......... rotation can be arbitrary if z is zero ..........
		  if ( z == 0.0 ) goto label580;
		  c = f / z; 
		  s = h / z;
		  label580:
		  f = c * g + s * y;     
		  x = -s * g + c * y;
		  if ( matu == 0 ) continue;
		  for( j = 0; j < m; j++ ){                 // loop 590
			 y = U[j][i1];    
			 z = U[j][i]; 
			 U[j][i1] = y * c + z * s;
			 U[j][i] = -y * s + z * c;
		  }
	   }                                            // end of loop 600
	   rv1[l] = 0.0; 
	   rv1[k] = f; 
	   W[k] = x;
	   goto label520;
//     .......... convergence ..........
	   label650: 
	   if ( z >= 0.0 ) continue;
//     .......... w(k) is made non-negative ..........
	   W[k] = -z;
	   if ( matv == 0 ) continue;
	   for( j = 0; j < n; j++ ){                    // loop 690
		  V[j][k] = - V[j][k];
	   }
	}                                              // end of loop 700	  
    return ierr;
}

/********************************************************
 This subroutine performs the Fortran function sign(x,y):
 
 Input: real numbers x and y
 Output: +|x| if y >= 0.0 and 
         -|x| if y < 0.0
*********************************************************/
double sign( double x, double y ){
   double res;
   res = fabs( x );
   return ( y >= 0.0 )? res : -res;
}


/****************************************************************
 Test program:
 Out = 0
 A:   2.0  3.0  0.0  2.0  2.0  0.0  3.0  0.0  1.0  3.0  2.0  2.0
 U:  -0.5  0.1 -0.6 -0.4  0.4 -0.3 -0.4  0.6  0.7 -0.6 -0.7  0.3
 W:  6.3801470 1.0755882 2.4772635
 V:  -0.8  0.5  0.4 -0.6 -0.3 -0.8 -0.3 -0.8  0.5
 S:   2.0  3.0  0.0  2.0  2.0 -0.0  3.0 -0.0  1.0  3.0  2.0  2.0
 Sq.error =      0.00000
 ****************************************************************/
int main(){
   double **A, *W, **U, **V, *rv1;
   double ini[4][3] = { 2., 3., 0., 2., 2., 0., 3., 0., 1., 3., 2., 2. };
   int    i, j, ierr, m = 4, n = 3, matu = 1, matv = 1, ierrout;
   double sqerr; 
   
   A = calloc( m, sizeof( double* ) );
   for( i = 0; i < m; i++ ) {
     A[i] = calloc( n, sizeof( double ) );
	 for( j = 0; j < n; j++ ) {
	   A[i][j] = ini[i][j];
	 }
   }
   U = calloc( m, sizeof( double* ) );
   for( i = 0; i < m; i++ )
     U[i] = calloc( n, sizeof( double ) );
   V = calloc( n, sizeof( double* ) );
   for( i = 0; i < n; i++ )
     V[i] = calloc( n, sizeof( double ) );
   W = calloc( n, sizeof( double ) );
   rv1 = calloc( n, sizeof( double ) );
   
   /*******SVD******/
   ierrout = svd( m, n, A, W, matu, U, matv, V, rv1 );
  
   printf( "\nOut = %d", ierrout );
  
   if ( ierrout >= 0 ) {
     printf( "\nA: ");
     for( i = 0; i < m; i++ ) {
	   for( j = 0; j < n; j++ ) {
	     printf( " %4.1lf", A[i][j] );
	   }
     }  
      /*******SVD: matrices ******/
     printf( "\nU: ");
     for( i = 0; i < m; i++ ) {
	   for( j = 0; j < n; j++ ) {
	     printf( " %4.1lf", U[i][j] );
	   }
     }  
     printf( "\nW: ");
     for( i = 0; i < n; i++ ) {
	     printf( " %9.7lf", W[i] );
     }
     printf( "\nV: ");
     for( i = 0; i < n; i++ ) {
	   for( j = 0; j < n; j++ ) {
	      printf( " %4.1lf", V[i][j] );
	   }
     }  
        /*******SVD: squared error of decomposition******/   
     sqerr = checkSVD( m, n, A, U, W,  V ); 
   }
}

/******************************************************
                                T
 Squared error between A and USV
********************************************************/
double checkSVD( int m, int n, double **A, 
                 double **U, double *W, double **V ){
   int i, j, k;
   double dsq, p, s;
   
   dsq = 0.0;
   p   = 0.0;
   printf("\nS: ");
   for( i = 0; i < m; i++ ) {
     for( j = 0; j < n; j++ ) {
       s = 0.0;
	   for( k = 0; k < n; k++ ){
	      s += ( U[i][k] * W[k] * V[j][k] );
	   }
	   printf( " %4.1lf", s );
	   dsq = A[i][j] - s;
	   dsq *= dsq;
	   p += dsq;
	 }
   }
   printf( "\nSq.error = %10.5lf", p );
   
   return p;
}