/***************************************************************************
 *   Copyright (C) 2006 by Jeziorski, Weintraub, Benkard and Van Roy       *
 *   przemekj@stanford.edu                                                 *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#ifndef MTLSPARSEMATRIX_H
#define MTLSPARSEMATRIX_H

#include<iostream>
#include<matrixMTL.h>
#include<vectorMTL.h>
#include<stdio.h>

namespace MTL {

#ifdef USE_MUMPS

extern "C"
{
#include "dmumps_c.h"
      typedef int MPI_Comm;
      int MPI_Init(int *pargc, char ***pargv);
      int MPI_Comm_rank(int  comm, int  *rank);
      int MPI_Finalize(void);
//#include "mpi.h"
}
#define JOB_INIT -1
#define JOB_END -2
#define USE_COMM_WORLD -987654

#endif //USE_MUMPS

template <class T> class MatrixMTL;
template <class T> class SparseMatrixMTL;
template <class T> class VectorMTL;

template <typename T> std::ostream& operator<<(std::ostream&, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator+(const SparseMatrixMTL<T>&, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator+(const SparseMatrixMTL<T>&, const T);
// template <typename T> inline const SparseMatrixMTL<T> operator+(const T, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator-(const SparseMatrixMTL<T>&, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator-(const SparseMatrixMTL<T>&, const T);
// template <typename T> inline const SparseMatrixMTL<T> operator-(const T, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator*(const SparseMatrixMTL<T>&, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator*(const SparseMatrixMTL<T>&, const T);
// template <typename T> inline const SparseMatrixMTL<T> operator*(const T, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator/(const SparseMatrixMTL<T>&, const SparseMatrixMTL<T>&);
// template <typename T> inline const SparseMatrixMTL<T> operator/(const SparseMatrixMTL<T>&, const T);
// template <typename T> inline const SparseMatrixMTL<T> operator/(const T, const SparseMatrixMTL<T>&);

template <typename T> T diffNorm(const SparseMatrixMTL<T> &input1, const SparseMatrixMTL<T> &input2);
template <typename T> T norm(const SparseMatrixMTL<T> &input);

/**
Sparse matrix implementation using Sparse Row format (CSR)

	@author Jeziorski, Weintraub, Benkard and Van Roy <przemekj@stanford.edu>
*/
template <class T>
class SparseMatrixMTL {
  friend class MatrixMTL<T>;

private:
  int size;
public:
  T *data;
  int nrows;
  int ncolumns;
  int *columns;
  int *rows;

  /**
	Overloaded IOStream operator to display a Sparse Matrix
  */
  friend std::ostream& operator<< <T>(std::ostream &, const SparseMatrixMTL<T> &);

/**
  Empty constructor
*/
  SparseMatrixMTL() {
    data = NULL;
    columns = NULL;
    rows = NULL;
//    cout << "test"<< endl;
  }

/** Defaulf constructor
    \param nrows_init Numer of rows
    \param ncolumns_init Numer of columns
    \param size_init Number of nonzero elements
*/

  SparseMatrixMTL(const int nrows_init, const int ncolumns_init, const int size_init) {
    nrows = nrows_init;
    ncolumns = ncolumns_init;
    size = size_init;

    rows = new int[size];
    columns = new int[size];
    data = new T[size];
  }

  inline T* getData() {
    return data;
  }

  int getSize() {
    return size;
  }

  int getNRows() {
    return nrows;
  }

  int getNColumns() {
    return ncolumns;
  }


  inline SparseMatrixMTL<T> &operator +=(const SparseMatrixMTL<T> &input) {
    for (int i=0; i<size; i++) {
      data[i]+=input.data[i];
    }
    return *this;
  }

  inline SparseMatrixMTL<T> &operator -=(const SparseMatrixMTL<T> &input) {
    for (int i=0; i<size; i++) {
      data[i]-=input.data[i];
    }
    return *this;
  }

  inline SparseMatrixMTL<T> &operator /=(const T input) {
    for (int i=0; i<size; i++) {
      data[i]/=input;
    }
    return *this;
  }

/**
  Copy constructor
*/
  SparseMatrixMTL(const SparseMatrixMTL<T> &input) {
    nrows = input.nrows;
    ncolumns = input.ncolumns;
    size = input.size;

    rows = new int[size];
    columns = new int[size];
    data = new T[size];

    for(int i=0;i<size;i++) {
      rows[i] = input.rows[i];
      columns[i] = input.columns[i];
      data[i] = input.data[i];
    }
  }

  void init(const int nrows_init, const int ncolumns_init, const int size_init) {
    nrows = nrows_init;
    ncolumns = ncolumns_init;
    size = size_init;

    if(rows!=NULL) {
      delete[] rows;
    }
    if(columns!=NULL) {
      delete[] columns;
    }
    if(data!=NULL) {
      delete[] data;
    }

    rows = new int[size];
    columns = new int[size];
    data = new T[size];
  }

/** Convert to band matrix
    \param n Order of the matrix
    \param band Number of elements in the band; this number must be odd
*/
  SparseMatrixMTL(const int n, const int band) {
    nrows = n;
    ncolumns = n;
    int half_band = (band-1)/2;

    size = (band+half_band)*half_band+(n-2*half_band)*band;

    rows = new int[size];
    columns = new int[size];
    data = new T[size];

    // Create first row
    int element = 0;
    for(int irow=0;irow<half_band;irow++) {
      for(int i=0;i<half_band+1+irow;i++) {
        rows[element]=irow;
        columns[element] = i;
        element++;
      }
    }

    // Middle rows
    int temp = n-half_band;
    for(int irow=half_band, j=0;irow<temp;irow++, j++) {
      for(int i=0;i<band;i++) {
        rows[element]=irow;
        columns[element] = i+j;
        element++;
      }
    }

    // Last row
    for(int irow=0;irow<half_band;irow++) {
      for(int i=n-band+irow+1;i<n;i++) {
        rows[element]=irow+temp;
        columns[element] = i;
        element++;
      }
    }
  }

  void bandMatrix(const int n, const int band) {
    nrows = n;
    ncolumns = n;
    int half_band = (band-1)/2;

    size = (band+half_band)*half_band+(n-2*half_band)*band;

    if(rows!=NULL) {
      delete[] rows;
    }
    if(columns!=NULL) {
      delete[] columns;
    }
    if(data!=NULL) {
      delete[] data;
    }

    rows = new int[size];
    columns = new int[size];
    data = new T[size];

    // Create first row
    int element = 0;
    for(int irow=0;irow<half_band;irow++) {
      for(int i=0;i<half_band+1+irow;i++) {
        rows[element]=irow;
        columns[element] = i;
        element++;
      }
    }

    // Middle rows
    int temp = n-half_band;
    for(int irow=half_band, j=0;irow<temp;irow++, j++) {
      for(int i=0;i<band;i++) {
        rows[element]=irow;
        columns[element] = i+j;
        element++;
      }
    }

    // Last row
    for(int irow=0;irow<half_band;irow++) {
      for(int i=n-band+irow+1;i<n;i++) {
        rows[element]=irow+temp;
        columns[element] = i;
        element++;
      }
    }
  }


/** 
  Get pointer to columns array
*/

  int *getColumns() {
    return columns;
  }

/** 
  Get pointer to rows array
*/

  int *getRows() {
    return rows;
  }

  inline T &operator[](const int input) {
    return data[input];
  }

  inline SparseMatrixMTL<T> &operator *=(const T input) {
    for (int i=0; i<size; i++) {
      data[i]*=input;
    }

    return *this;
  }

/**
	Returns the row of the sparse matrix
	\param number Row number
*/
  const VectorMTL<T> getRow(const int number);

/** Solve system of equations Ax=b
  \param b Left hand size
  \param solution solution vactor
*/
  int solve(VectorMTL<T> &b, VectorMTL<T> &solution) {
    #ifdef USE_MUMPS
    DMUMPS_STRUC_C id;
    int irn[size];
    int jcn[size];


    double a[size];
    double *rhs;

    for(int i=0;i<size;i++) {
      irn[i] = rows[i]+1;
      jcn[i] = columns[i]+1;
      a[i] = data[i];
    }

    int MPI_COMM_WORLD=1;
    int myid, ierr;

    int argc=1;
    char *name = "MTL";
    char **argv = &name;

    ierr = MPI_Init(&argc, &argv);
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myid);

//    a[0]=1.0;a[1]=2.0;
//    rhs[0]=1.0;rhs[1]=4.0;
    solution=b;
    rhs = solution.getData();

    id.job=JOB_INIT; id.par=1; id.sym=0;id.comm_fortran=USE_COMM_WORLD;
    dmumps_c(&id);

    if (myid == 0) {
      id.n = nrows; id.nz = size; id.irn=irn; id.jcn=jcn;
      id.a = a; id.rhs = rhs;
    }

    #define ICNTL(I) icntl[(I)-1] 

    id.ICNTL(1)=-1; id.ICNTL(2)=-1; id.ICNTL(3)=-1; id.ICNTL(4)=0;

    id.job=6;
    dmumps_c(&id);
    id.job=JOB_END; dmumps_c(&id); 
    ierr = MPI_Finalize();
    return ierr;
    #else
    solution=b;
    MatrixMTL<T> temp(*this);
    return temp.solve(solution);
    #endif
  }

  /**
  	Norm of the sparse matrix
        \param input1
   */
  friend T norm<T>(const SparseMatrixMTL<T> &);

  /**
  	Norm of the difference of two sparse matrices
        They have to have the same sparsity structure
	\param input1
        \param input2
   */
  friend T diffNorm<T>(const SparseMatrixMTL<T> &input1, const SparseMatrixMTL<T> &input2);

  /**
	Invert the matrix
	\param solution
  */
  int inverse(MatrixMTL<T> &solution) {
    #ifdef USE_MUMPS
    DMUMPS_STRUC_C id;
    int irn[size];
    int jcn[size];

    double a[size];

    for(int i=0;i<size;i++) {
      irn[i] = columns[i]+1;
      jcn[i] = rows[i]+1;
      a[i] = data[i];
    }

    int MPI_COMM_WORLD=1;
    int myid, ierr;

    int argc=1;
    char *name = "MTL";
    char **argv = &name;

    ierr = MPI_Init(&argc, &argv);
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myid);

    id.job=JOB_INIT; id.par=1; id.sym=0;id.comm_fortran=USE_COMM_WORLD;
    dmumps_c(&id);

    if (myid == 0) {
      id.n = nrows; id.nz = size; id.irn=irn; id.jcn=jcn;
      id.a = a;
    }

    #define ICNTL(I) icntl[(I)-1] 

    id.ICNTL(1)=-1; id.ICNTL(2)=-1; id.ICNTL(3)=-1; id.ICNTL(4)=0;

    id.job=4;
    dmumps_c(&id);

    solution.eye();

    for(int i=0;i<solution.size;i+=ncolumns) {
      id.rhs=solution.data+i;
      id.job = 3;
      dmumps_c(&id);
    }

    id.job=JOB_END; dmumps_c(&id); 
    ierr = MPI_Finalize();
    if(ierr==0) {
      return 1;
    } else {
      return 0;
    }

    #else
    MatrixMTL<T> temp(*this);
    return temp.inverse(solution);
    #endif
  }

  int primesolve(VectorMTL<T> &b, VectorMTL<T> &solution) {
    #ifdef USE_MUMPS
    DMUMPS_STRUC_C id;
    int *irn = (int *) malloc(size*sizeof(int));
    int *jcn = (int *) malloc(size*sizeof(int));


    double *a = (double *) malloc(size*sizeof(double));
    double *rhs;

    for(int i=0;i<size;i++) {
      irn[i] = columns[i]+1;
      jcn[i] = rows[i]+1;
      a[i] = data[i];
    }

    int MPI_COMM_WORLD=1;
    int myid, ierr;

    int argc=1;
    char *name; name = (char *) malloc(1*sizeof(char)); name[0]='M';
    char **argv = &name;

    ierr = MPI_Init(&argc, &argv);
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myid);

//    a[0]=1.0;a[1]=2.0;
//    rhs[0]=1.0;rhs[1]=4.0;
    solution=b;
    rhs = solution.getData();

    id.job=JOB_INIT; id.par=1; id.sym=0;id.comm_fortran=USE_COMM_WORLD;
    dmumps_c(&id);

    if (myid == 0) {
      id.n = nrows; id.nz = size; id.irn=irn; id.jcn=jcn;
      id.a = a; id.rhs = rhs;
    }

    #define ICNTL(I) icntl[(I)-1] 

    id.ICNTL(1)=-1; id.ICNTL(2)=-1; id.ICNTL(3)=-1; id.ICNTL(4)=0;

    id.job=6;
    dmumps_c(&id);
    id.job=JOB_END; dmumps_c(&id); 
    ierr = MPI_Finalize();

    free(name);
    free(irn);
    free(jcn);
    free(a);

    return ierr;
    #else
    MatrixMTL<T> temp(*this);
    temp.transpose();
    solution=b;
    return temp.solve(solution);
    #endif
  }

  int statDistr(VectorMTL<T> &solution) {
    SparseMatrixMTL<double> temp(*this);
    temp.addDiag(-1);
//    cout << "Diag element placed\n";

//    cout << temp;
//    getchar();
    return temp.statDistr2(solution);
  }

  int statDistr2(VectorMTL<T> &solution) {
    #ifdef USE_MUMPS
    DMUMPS_STRUC_C id;

    int *irn = (int *) malloc((size+ncolumns)*sizeof(int));
    int *jcn = (int *) malloc((size+ncolumns)*sizeof(int));


    double *a = (double *) malloc((size+ncolumns)*sizeof(double));

    double *rhs;

    int i=0;
    for(int ii=0;ii<size;ii++) {
      if(columns[ii]<ncolumns-1) {
        irn[i] = columns[ii]+1;
        jcn[i] = rows[ii]+1;
        if(rows[ii]==columns[ii]) {
          a[i] = data[ii];
        } else {
          a[i] = data[ii];
        }
        i++;
      }
    }
//    cout << "Data copied, size = " << i << "\n";

    for(int j=1;j<ncolumns;j++) {
      irn[i] = nrows;
      jcn[i] = j;
      a[i] = 1;
      solution[j-1] = 0;
      i++;
    }
//    cout << "Last row added, size = " << i << "\n";

    irn[i] = nrows;
    jcn[i] = ncolumns;
    a[i] = 1;
    solution[nrows-1] = 1;

    int MPI_COMM_WORLD=1;
    int myid, ierr;

    int argc=1;
    char *name; name = (char *) malloc(1*sizeof(char)); name[0]='M';
    char **argv = &name;

    ierr = MPI_Init(&argc, &argv);
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myid);

//    a[0]=1.0;a[1]=2.0;
//    rhs[0]=1.0;rhs[1]=4.0;
    rhs = solution.getData();

    id.job=JOB_INIT; id.par=1; id.sym=0;id.comm_fortran=USE_COMM_WORLD;
    dmumps_c(&id);

    if (myid == 0) {
      id.n = nrows; id.nz = i+1; id.irn=irn; id.jcn=jcn;
      id.a = a; id.rhs = rhs;
    }

    #define ICNTL(I) icntl[(I)-1] 

    id.ICNTL(1)=-1; id.ICNTL(2)=-1; id.ICNTL(3)=-1; id.ICNTL(4)=0;

    id.job=6;
    dmumps_c(&id);
    id.job=JOB_END; dmumps_c(&id); 
    ierr = MPI_Finalize();

    free(name);

//    cout << "MUMPS done";
//    cout << solution;

    // Sometimes if probability is close to zero it is negative because of
    // numerical errors. This makes it zero and scales the others accordingly.
    double sum=0;
    for(int i=0;i<solution.size;i++) {
      if(solution[i]<0) {
        solution[i]=0;
      } else {
        sum+=solution[i];
      }
    }

    solution/=sum;

//    cout << "Everything done... exit";
//    getchar();
    free(irn);
    free(jcn);
    free(a);
    return 1;
    #else
    MatrixMTL<T> temp(*this);
    return temp.statDistr(solution);
    #endif
}

/*int solve(VectorMTL<T> &b, double *rhs) {
    #ifdef USE_MUMPS
    DMUMPS_STRUC_C id;
    int irn[size];
    int jcn[size];


    double a[size];

    for(int i=0;i<size;i++) {
      irn[i] = rows[i]+1;
      jcn[i] = columns[i]+1;
      a[i] = data[i];
    }

    int MPI_COMM_WORLD;
    int myid, ierr;

    int argc=1;
    char **argv = 0;

    ierr = MPI_Init(&argc, &argv);
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myid);

//    a[0]=1.0;a[1]=2.0;
//    rhs[0]=1.0;rhs[1]=4.0;
    for(int i=0;i<b.getSize();i++) {
      rhs[i]=b[i];
    }

    id.job=JOB_INIT; id.par=1; id.sym=0;id.comm_fortran=USE_COMM_WORLD;
    dmumps_c(&id);

    if (myid == 0) {
      id.n = nrows; id.nz = size; id.irn=irn; id.jcn=jcn;
      id.a = a; id.rhs = rhs;
    }

    #define ICNTL(I) icntl[(I)-1] 

    id.ICNTL(1)=-1; id.ICNTL(2)=-1; id.ICNTL(3)=-1; id.ICNTL(4)=0;

    id.job=6;
    dmumps_c(&id);
    id.job=JOB_END; dmumps_c(&id); 

    return  MPI_Finalize();
    #endif
}*/


/** 
  Destructor
*/
  ~SparseMatrixMTL() {
     if(data!=NULL) delete[] data;
     if(columns!=NULL) delete[] columns;
     if(rows!=NULL) delete[] rows;
  }

  SparseMatrixMTL<T> dot(const SparseMatrixMTL<T> &input);

  VectorMTL<T> dot(const VectorMTL<T> &input) {
    VectorMTL<T> output(ncolumns);
    output=0;
    for(int i=0;i<size;i++) {
      output.data[rows[i]]+=data[i]*input.data[columns[i]];
    }
    return output;
  }

  VectorMTL<T> primedot(const VectorMTL<T> &input) {
    VectorMTL<T> output(ncolumns);
    output=0;
    for(int i=0;i<size;i++) {
      output.data[columns[i]]+=data[i]*input.data[rows[i]];
    }
    return output;
  }

  inline SparseMatrixMTL<T> &operator=(const SparseMatrixMTL<T> &input) {
    size=input.size;
    nrows=input.nrows;
    ncolumns=input.ncolumns;

    if(data!=NULL) delete[] data;
    data = new T[size];

    if(columns!=NULL) delete[] columns;
    columns = new int[size];

    if(rows!=NULL) delete[] rows;
    rows = new int[size];
    for(int i=0;i<size;i++) {
      data[i]=input.data[i];
      columns[i]=input.columns[i];
      rows[i]=input.rows[i];
    }
  }

  inline SparseMatrixMTL<T> &operator=(const T input) {
    for(int i=0;i<size;i++) {
      data[i]=input;
    }
  }

   void addDiag(const T add) {
     int diag = 0;
     for(int i=0;i<size;i++) {
       if((rows[i]==diag) && (columns[i]==diag)) {
         data[i]+=add;
//         cout << "found " << diag << endl;
         diag++;
       } else if((rows[i]==diag) && (columns[i]>diag)) {
//         cout << "not found" << diag << endl;
         int new_size=size+1;
         int *rows_temp = new int[new_size];
         int *columns_temp = new int[new_size];
         double *data_temp = new double[new_size];
         for(int j=0;j<i;j++) {
           rows_temp[j]=rows[j];
           columns_temp[j]=columns[j];
           data_temp[j]=data[j];
         }

         rows_temp[i]=diag;
         columns_temp[i]=diag;
         data_temp[i]=add;

         for(int j=i, j2=i+1;j<size;j++,j2++) {
           rows_temp[j2]=rows[j];
           columns_temp[j2]=columns[j];
           data_temp[j2]=data[j];
         }

         size=new_size;

         diag++;

         delete[] rows;
         delete[] data;
         delete[] columns;

         rows = rows_temp;
         data = data_temp;
         columns = columns_temp;
//         cout << *this;
//         getchar();
       } else if(((i<size-1) && (rows[i]==diag) && (rows[i+1]>diag) && (columns[i]<diag))
                 || ((i==size-1) && (rows[i]==diag) && (columns[i]<diag))) {
//         cout << "2 found" << diag << " " << rows[i] << " "<<  columns[i]<< endl;
         int new_size=size+1;

         int *rows_temp = new int[new_size];
         int *columns_temp = new int[new_size];
         double *data_temp = new double[new_size];
         for(int j=0;j<=i;j++) {
           rows_temp[j]=rows[j];
           columns_temp[j]=columns[j];
           data_temp[j]=data[j];
         }


         rows_temp[i+1]=diag;
         columns_temp[i+1]=diag;
         data_temp[i+1]=add;

         for(int j=i+1, j1=i+2;j<size;j++,j1++) {
           rows_temp[j1]=rows[j];
           columns_temp[j1]=columns[j];
           data_temp[j1]=data[j];
         }

         size=new_size;

         diag++;

         delete[] rows;
         delete[] data;
         delete[] columns;

         rows = rows_temp;
         data = data_temp;
         columns = columns_temp;
//         cout << *this;
//         getchar();
       }
     }
   }
};

template <typename T> 
std::ostream& operator<< (std::ostream &output, const SparseMatrixMTL<T> &p) {
  int k=0;
  for (int i=0; i<p.nrows; i++) {
    for (int j=0; j<p.ncolumns; j++) {
//      output << "(" << i << "," << j << ") ("  << p.rows[k] <<  "," << p.columns[k] << ") " << k<< endl;
      if ((k<p.size) && (p.columns[k]==j) && (p.rows[k]==i)) {
        output << p.data[k] << "\t";
        k++;
      } else {
        output << "0\t";
      }
    }
    output << endl;
  }

  return output;
}

template <class T>
const VectorMTL<T> SparseMatrixMTL<T>::getRow(const int number) {
  VectorMTL<T> output(ncolumns);
  int i;
  for(i=0;i<size;i++) {
    if(rows[i]==number) {
      for(int j=0;j<ncolumns;j++) {
        if((i==size) || (columns[i]>j) || (rows[i]>number)) {
          output[j]=0;
        } else {
          output[j]=data[i];
          i++;
        }
      }

      i=size+1;
    }
  }

  // Did not find anything
  if(i==size) {
    output=0;
  }

  return output;
}

template <class T>
SparseMatrixMTL<T> SparseMatrixMTL<T>::dot(const SparseMatrixMTL<T> &input) {
    int columns_csr[size];
    int rows_csr[nrows+1];

    int columns2_csr[input.size];
    int rows2_csr[input.nrows+1];

    int r=-1;
    int r2=-1;

    int min_size = MIN(size,input.size);

    for(int i=0;i<min_size;i++) {
      columns_csr[i]=columns[i]+1;
      columns2_csr[i]=input.columns[i]+1;
      if(rows[i]>r) {
        r=rows[i];
        rows_csr[r]=i+1;
      }
      if(input.rows[i]>r2) {
        r2=input.rows[i];
        rows2_csr[r2]=i+1;
      } 
    }

    if(size>input.size) {
      for(int i=min_size;i<size;i++) {
        columns_csr[i]=columns[i]+1;
        if(rows[i]>r) {
          r=rows[i];
          rows_csr[r]=i+1;
        }
      }
    } else if(size<input.size) {
      for(int i=min_size;i<input.size;i++) {
        columns2_csr[i]=input.columns[i]+1;
        if(input.rows[i]>r2) {
          r2=input.rows[i];
          rows2_csr[r2]=i+1;
        }
      }
    }

    rows_csr[nrows]=size+1;
    rows2_csr[input.nrows]=input.size+1;

    // Maximum number of nonzero elements in the output matrix
    int nzmax=nrows*input.ncolumns; 
    T c[nzmax];
    int jc[nzmax];
    int ic[nrows+1];
    int iw[size];

    int job = 1;
    int ierr;

    amub_(&nrows, &ncolumns, &job, 
      data, columns_csr, rows_csr, input.data, columns2_csr, rows2_csr, 
      c, jc, ic, &nzmax, iw, &ierr);

    SparseMatrixMTL<T> out;

    out.size=ic[nrows]-1;
    out.nrows=nrows;
    out.ncolumns=input.ncolumns;

    if(out.data!=NULL) delete[] out.data;
    out.data = new T[out.size];

    if(out.columns!=NULL) delete[] out.columns;
    out.columns = new int[out.size];

    if(out.rows!=NULL) delete[] out.rows;
    out.rows = new int[out.size];

    int row_i = 0;
    for(int i=0;i<out.size;i++) {
      out.columns[i]=jc[i]-1;
      if(ic[row_i+1]==i+1) row_i++;
      out.rows[i]=row_i;
      out.data[i]=c[i];
    }

    return out;
}

// template <typename T> 
// inline const SparseMatrixMTL<T> operator *(const SparseMatrixMTL<T> &self, const SparseMatrixMTL<T> &input) {
//     return SparseMatrixMTL<T>(self)*=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator *(const SparseMatrixMTL<T> &self, const T input) {
//     return SparseMatrixMTL<T>(self)*=input;
// }
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator *(const T input, const SparseMatrixMTL<T> &self) {
//     return SparseMatrixMTL<T>(self)*=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator +(const SparseMatrixMTL<T> &self, const SparseMatrixMTL<T> &input) {
//     return SparseMatrixMTL<T>(self)+=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator +(const T input, const SparseMatrixMTL<T> &self) {
//     return SparseMatrixMTL<T>(self)+=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator +(const SparseMatrixMTL<T> &self, const T input) {
//     return SparseMatrixMTL<T>(self)+=input;
// }
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator /(const SparseMatrixMTL<T> &self, const SparseMatrixMTL<T> &input) {
//     return SparseMatrixMTL<T>(self)/=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator /(const SparseMatrixMTL<T> &self, const T input) {
//     return SparseMatrixMTL<T>(self)/=input;
// }
// 
// template <typename T> 
// inline const SparseSparseMatrixMTL<T> operator /(const T lhs, const SparseSparseMatrixMTL<T> &input) {
//     SparseMatrixMTL<T> output(input.nrows,input.ncolumns,input.size);
// 
//     for (int i=0; i<input.size; i++) {
//       output.data[i]-=lhs/input.data[i];
//     }
// 
//     return output;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator -(const SparseMatrixMTL<T> &self, const SparseMatrixMTL<T> &input) {
//     return SparseMatrixMTL<T>(self)-=input;
// }	
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator -(const SparseMatrixMTL<T> &self, const T input) {
//     return SparseMatrixMTL<T>(self)-=input;
// }
// 
// template <typename T> 
// inline const SparseMatrixMTL<T> operator -(const T input, const SparseMatrixMTL<T> &self) {
//     return -SparseMatrixMTL<T>(self)+=input;
// }

template <typename T>
T diffNorm(const SparseMatrixMTL<T> &input1, const SparseMatrixMTL<T> &input2) {
  T norm = fabs(input1.data[0]-input2.data[0]);
  T temp;

  for(int i=1;i<input1.size;i++) {
    if((temp=fabs(input1.data[i]-input2.data[i]))>norm) norm = temp;
  }

  return norm;
}

template <typename T> 
T norm(const SparseMatrixMTL<T> &input) {
    T output = input.data[0];
    T temp;
    for(int i=1;i<input.size;i++) {
      if((temp=fabs(input.data[i]))>output) output=temp;
    }
    return output;
}

template <typename T>
inline const SparseMatrixMTL<T> operator -(const SparseMatrixMTL<T> &input1, const SparseMatrixMTL<T> &input2) {
  return SparseMatrixMTL<T>(input1)-=input2;
}

template <typename T>
inline const SparseMatrixMTL<T> operator /(const SparseMatrixMTL<T> &input1, const T input2) {
  return SparseMatrixMTL<T>(input1)/=input2;
}

}
#endif
