EMAN2
Defines | Typedefs | Functions
lapackblas.h File Reference
#include <cmath>
Include dependency graph for lapackblas.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Defines

#define TRUE_   (1)
#define FALSE_   (0)
#define abs(x)   ((x) >= 0 ? (x) : -(x))
#define dabs(x)   (doublereal)abs(x)
#define f2cmin(a, b)   ((a) <= (b) ? (a) : (b))
#define f2cmax(a, b)   ((a) >= (b) ? (a) : (b))
#define df2cmin(a, b)   (doublereal)f2cmin(a,b)
#define df2cmax(a, b)   (doublereal)f2cmax(a,b)

Typedefs

typedef int integer
typedef float real
typedef double doublereal
typedef int logical
typedef short flag
typedef short ftnlen
typedef short ftnint

Functions

int s_cat (char *, const char **, integer *, integer *, ftnlen)
integer s_cmp (char *, const char *, ftnlen, ftnlen)
void s_copy (char *a, const char *b, ftnlen la, ftnlen lb)
double pow_ri (real *ap, integer *bp)
double r_sign (real *a, real *b)
integer ieeeck_ (integer *ispec, real *zero, real *one)
integer ilaenv_ (integer *ispec, const char *name__, const char *opts, integer *n1, integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen opts_len)
real drand (void)
logical lsame_ (const char *ca, const char *cb)
int saxpy_ (integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy)
int scopy_ (integer *n, real *sx, integer *incx, real *sy, integer *incy)
doublereal sdot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy)
int sgemm_ (const char *transa, const char *transb, integer *m, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc)
int sgemv_ (const char *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy)
int sger_ (integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda)
int slae2_ (real *a, real *b, real *c__, real *rt1, real *rt2)
int slaev2_ (real *a, real *b, real *c__, real *rt1, real *rt2, real *cs1, real *sn1)
doublereal slamch_ (const char *cmach)
doublereal slanst_ (const char *norm, integer *n, real *d__, real *e)
doublereal slansy_ (const char *norm, char *uplo, integer *n, real *a, integer *lda, real *work)
doublereal slapy2_ (real *x, real *y)
int slarfb_ (const char *side, const char *trans, const char *direct, const char *storev, integer *m, integer *n, integer *k, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *ldwork)
int slarf_ (const char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work)
int slarfg_ (integer *n, real *alpha, real *x, integer *incx, real *tau)
int slarft_ (const char *direct, const char *storev, integer *n, integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
int slartg_ (real *f, real *g, real *cs, real *sn, real *r__)
int slascl_ (const char *type__, integer *kl, integer *ku, real *cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, integer *info)
int slaset_ (const char *uplo, integer *m, integer *n, real *alpha, real *beta, real *a, integer *lda)
int slasr_ (const char *side, const char *pivot, const char *direct, integer *m, integer *n, real *c__, real *s, real *a, integer *lda)
int slasrt_ (const char *id, integer *n, real *d__, integer *info)
int slassq_ (integer *n, real *x, integer *incx, real *scale, real *sumsq)
int slatrd_ (char *uplo, integer *n, integer *nb, real *a, integer *lda, real *e, real *tau, real *w, integer *ldw)
doublereal snrm2_ (integer *n, real *x, integer *incx)
int srot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c__, real *s)
int sorg2l_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info)
int sorg2r_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info)
int sorgql_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int sorgqr_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int sorgtr_ (char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int sscal_ (integer *n, real *sa, real *sx, integer *incx)
int ssteqr_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info)
int ssterf_ (integer *n, real *d__, real *e, integer *info)
int sswap_ (integer *n, real *sx, integer *incx, real *sy, integer *incy)
int ssyev_ (char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *info)
int ssymv_ (const char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy)
int ssyr2_ (char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda)
int ssyr2k_ (char *uplo, const char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc)
int ssytd2_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info)
int ssytrd_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, real *work, integer *lwork, integer *info)
int strmm_ (const char *side, const char *uplo, const char *transa, const char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb)
int strmv_ (const char *uplo, const char *trans, const char *diag, integer *n, real *a, integer *lda, real *x, integer *incx)
int xerbla_ (const char *srname, integer *info)
int sstedc_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info)
int sstevd_ (char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info)
int slaeda_ (integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info)
int slaed0_ (integer *icompq, integer *qsiz, integer *n, real *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, real *work, integer *iwork, integer *info)
int slaed1_ (integer *n, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *work, integer *iwork, integer *info)
int slaed2_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info)
int slaed3_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *indx, integer *ctot, real *w, real *s, integer *info)
int slaed4_ (integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam, integer *info)
int slaed5_ (integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam)
int slaed6_ (integer *kniter, logical *orgati, real *rho, real *d__, real *z__, real *finit, real *tau, integer *info)
int slaed7_ (integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *qstore, integer *qptr, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *work, integer *iwork, integer *info)
int slaed8_ (integer *icompq, integer *k, integer *n, integer *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *indxp, integer *indx, integer *info)
int slaed9_ (integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info)
int slacpy_ (const char *uplo, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb)
int slamrg_ (integer *n1, integer *n2, real *a, integer *strd1, integer *strd2, integer *index)
int sorgl2_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info)
int sgesvd_ (char *jobu, char *jobvt, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *info)
int sorglq_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
doublereal slange_ (const char *norm, integer *m, integer *n, real *a, integer *lda, real *work)
int sgebrd_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *lwork, integer *info)
int sgebd2_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
int sormbr_ (const char *vect, const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info)
int sgelqf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int sormlq_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info)
int sormqr_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info)
int sgelq2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info)
int sbdsqr_ (const char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *work, integer *info)
int sgeqrf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int sorml2_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info)
int slabrd_ (integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy)
int sgeqr2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info)
int sorm2r_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info)
int sorgbr_ (const char *vect, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info)
int slasq1_ (integer *n, real *d__, real *e, real *work, integer *info)
int slasq2_ (integer *n, real *z__, integer *info)
int slasq3_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee)
int slasq4_ (integer *i0, integer *n0, real *z__, integer *pp, integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *tau, integer *ttype)
int slasq5_ (integer *i0, integer *n0, real *z__, integer *pp, real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2, logical *ieee)
int slasq6_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2)
int slasv2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax, real *snr, real *csr, real *snl, real *csl)
int slas2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax)

Define Documentation

#define abs (   x)    ((x) >= 0 ? (x) : -(x))

Definition at line 46 of file lapackblas.h.

Referenced by EMAN::EMData::absi(), EMAN::EMData::add_complex_at(), EMAN::ShapeAnalyzer::analyze(), aprq2d(), wustl_mm::GraySkeletonCPP::VolumeSkeletonizer::Are26Neighbors(), circumf(), circumf_rect(), EMAN::PointArray::construct_helix(), EMAN::EMData::extract_plane(), EMAN::EMData::extract_plane_rect(), EMAN::EMData::extract_plane_rect_fast(), EMAN::EMData::extractline(), fftc_d(), fftc_q(), fftr_d(), fftr_q(), EMAN::EMData::find_3d_threshold(), EMAN::nnSSNR_ctfReconstructor::finish(), EMAN::nn4_ctf_rectReconstructor::finish(), EMAN::nn4_ctfReconstructor::finish(), EMAN::nnSSNR_Reconstructor::finish(), EMAN::nn4_rectReconstructor::finish(), EMAN::nn4Reconstructor::finish(), EMAN::PointArray::fit_helix(), EMAN::fourierproduct(), EMAN::EMData::get_complex_at(), EMAN::EMData::get_complex_index(), EMAN::TestUtil::get_pixel_value_by_dist1(), EMAN::TestUtil::get_pixel_value_by_dist2(), EMAN::Util::getBaldwinGridWeights(), EMAN::EMData::getconvpt2d_kbi0(), EMAN::Util::hypot_fast(), EMAN::Util::hypot_fast_int(), EMAN::GaussFFTProjector::interp_ft_3d(), main(), EMAN::EMData::make_footprint(), max2d(), max3d(), EMAN::KmeansSegmentProcessor::process(), EMAN::RotateInFSProcessor::process_inplace(), EMAN::AutoMask3D2Processor::process_inplace(), EMAN::ToMassCenterProcessor::process_inplace(), EMAN::AutoMask2DProcessor::process_inplace(), EMAN::LinearPyramidProcessor::process_inplace(), EMAN::EMData::rotavg(), EMAN::EMData::rotavg_i(), EMAN::EMData::set_complex_at(), EMAN::BoxingTools::set_region(), and slamc2_().

#define dabs (   x)    (doublereal)abs(x)
#define df2cmax (   a,
 
)    (doublereal)f2cmax(a,b)
#define df2cmin (   a,
 
)    (doublereal)f2cmin(a,b)

Definition at line 50 of file lapackblas.h.

Referenced by sbdsqr_(), slaed4_(), slaed6_(), slapy2_(), slas2_(), slasq2_(), slasq3_(), slasq4_(), slasq5_(), and slasq6_().

#define f2cmax (   a,
 
)    ((a) >= (b) ? (a) : (b))
#define f2cmin (   a,
 
)    ((a) <= (b) ? (a) : (b))
#define FALSE_   (0)

Definition at line 44 of file lapackblas.h.

Referenced by slaed4_(), slaed6_(), slamc1_(), slamc2_(), slamch_(), slartg_(), slascl_(), and slasv2_().

#define TRUE_   (1)

Definition at line 43 of file lapackblas.h.

Referenced by slaed4_(), slaed6_(), slamc1_(), slamc2_(), slamch_(), slartg_(), slascl_(), and slasv2_().


Typedef Documentation

typedef double doublereal

Definition at line 36 of file lapackblas.h.

typedef short flag

Definition at line 39 of file lapackblas.h.

typedef short ftnint

Definition at line 41 of file lapackblas.h.

typedef short ftnlen

Definition at line 40 of file lapackblas.h.

typedef int integer

Definition at line 34 of file lapackblas.h.

typedef int logical

Definition at line 37 of file lapackblas.h.

typedef float real

Definition at line 35 of file lapackblas.h.


Function Documentation

real drand ( void  )
integer ieeeck_ ( integer ispec,
real zero,
real one 
)

Definition at line 56 of file lapackblas.cpp.

Referenced by ilaenv_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1998   


    Purpose   
    =======   

    IEEECK is called from the ILAENV to verify that Infinity and   
    possibly NaN arithmetic is safe (i.e. will not trap).   

    Arguments   
    =========   

    ISPEC   (input) INTEGER   
            Specifies whether to test just for inifinity arithmetic   
            or whether to test for infinity and NaN arithmetic.   
            = 0: Verify infinity arithmetic only.   
            = 1: Verify infinity and NaN arithmetic.   

    ZERO    (input) REAL   
            Must contain the value 0.0   
            This is passed to prevent the compiler from optimizing   
            away this code.   

    ONE     (input) REAL   
            Must contain the value 1.0   
            This is passed to prevent the compiler from optimizing   
            away this code.   

    RETURN VALUE:  INTEGER   
            = 0:  Arithmetic failed to produce the correct answers   
            = 1:  Arithmetic produced the correct answers */
    /* System generated locals */
    integer ret_val;
    /* Local variables */
    static real neginf, posinf, negzro, newzro, nan1, nan2, nan3, nan4, nan5, 
            nan6;


    ret_val = 1;

    posinf = *one / *zero;
    if (posinf <= *one) {
        ret_val = 0;
        return ret_val;
    }

    neginf = -(*one) / *zero;
    if (neginf >= *zero) {
        ret_val = 0;
        return ret_val;
    }

    negzro = *one / (neginf + *one);
    if (negzro != *zero) {
        ret_val = 0;
        return ret_val;
    }

    neginf = *one / negzro;
    if (neginf >= *zero) {
        ret_val = 0;
        return ret_val;
    }

    newzro = negzro + *zero;
    if (newzro != *zero) {
        ret_val = 0;
        return ret_val;
    }

    posinf = *one / newzro;
    if (posinf <= *one) {
        ret_val = 0;
        return ret_val;
    }

    neginf *= posinf;
    if (neginf >= *zero) {
        ret_val = 0;
        return ret_val;
    }

    posinf *= posinf;
    if (posinf <= *one) {
        ret_val = 0;
        return ret_val;
    }




/*     Return if we were only asked to check infinity arithmetic */

    if (*ispec == 0) {
        return ret_val;
    }

    nan1 = posinf + neginf;

    nan2 = posinf / neginf;

    nan3 = posinf / posinf;

    nan4 = posinf * *zero;

    nan5 = neginf * negzro;

    nan6 = nan5 * 0.f;

    if (nan1 == nan1) {
        ret_val = 0;
        return ret_val;
    }

    if (nan2 == nan2) {
        ret_val = 0;
        return ret_val;
    }

    if (nan3 == nan3) {
        ret_val = 0;
        return ret_val;
    }

    if (nan4 == nan4) {
        ret_val = 0;
        return ret_val;
    }

    if (nan5 == nan5) {
        ret_val = 0;
        return ret_val;
    }

    if (nan6 == nan6) {
        ret_val = 0;
        return ret_val;
    }

    return ret_val;
} /* ieeeck_ */
integer ilaenv_ ( integer ispec,
const char *  name__,
const char *  opts,
integer n1,
integer n2,
integer n3,
integer n4,
ftnlen  name_len,
ftnlen  opts_len 
)

Definition at line 206 of file lapackblas.cpp.

References c__0, c__1, f2cmin, ieeeck_(), nx, s_cmp(), and s_copy().

Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), sgesvd_(), slaed0_(), slasq2_(), sorgbr_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sormbr_(), sormlq_(), sormqr_(), sstedc_(), ssyev_(), and ssytrd_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    ILAENV is called from the LAPACK routines to choose problem-dependent   
    parameters for the local environment.  See ISPEC for a description of   
    the parameters.   

    This version provides a set of parameters which should give good,   
    but not optimal, performance on many of the currently available   
    computers.  Users are encouraged to modify this subroutine to set   
    the tuning parameters for their particular machine using the option   
    and problem size information in the arguments.   

    This routine will not function correctly if it is converted to all   
    lower case.  Converting it to all upper case is allowed.   

    Arguments   
    =========   

    ISPEC   (input) INTEGER   
            Specifies the parameter to be returned as the value of   
            ILAENV.   
            = 1: the optimal blocksize; if this value is 1, an unblocked   
                 algorithm will give the best performance.   
            = 2: the minimum block size for which the block routine   
                 should be used; if the usable block size is less than   
                 this value, an unblocked routine should be used.   
            = 3: the crossover point (in a block routine, for N less   
                 than this value, an unblocked routine should be used)   
            = 4: the number of shifts, used in the nonsymmetric   
                 eigenvalue routines   
            = 5: the MINIMUM Column dimension for blocking to be used;   
                 rectangular blocks must have dimension at least k by m,   
                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)   
            = 6: the crossover point for the SVD (when reducing an m by n   
                 matrix to bidiagonal form, if f2cmax(m,n)/min(m,n) exceeds   
                 this value, a QR factorization is used first to reduce   
                 the matrix to a triangular form.)   
            = 7: the number of processors   
            = 8: the crossover point for the multishift QR and QZ methods   
                 for nonsymmetric eigenvalue problems.   
            = 9: maximum size of the subproblems at the bottom of the   
                 computation tree in the divide-and-conquer algorithm   
                 (used by xGELSD and xGESDD)   
            =10: ieee NaN arithmetic can be trusted not to trap   
            =11: infinity arithmetic can be trusted not to trap   

    NAME    (input) CHARACTER*(*)   
            The name of the calling subroutine, in either upper case or   
            lower case.   

    OPTS    (input) CHARACTER*(*)   
            The character options to the subroutine NAME, concatenated   
            into a single character string.  For example, UPLO = 'U',   
            TRANS = 'T', and DIAG = 'N' for a triangular routine would   
            be specified as OPTS = 'UTN'.   

    N1      (input) INTEGER   
    N2      (input) INTEGER   
    N3      (input) INTEGER   
    N4      (input) INTEGER   
            Problem dimensions for the subroutine NAME; these may not all   
            be required.   

   (ILAENV) (output) INTEGER   
            >= 0: the value of the parameter specified by ISPEC   
            < 0:  if ILAENV = -k, the k-th argument had an illegal value.   

    Further Details   
    ===============   

    The following conventions have been used when calling ILAENV from the   
    LAPACK routines:   
    1)  OPTS is a concatenation of all of the character options to   
        subroutine NAME, in the same order that they appear in the   
        argument list for NAME, even if they are not used in determining   
        the value of the parameter specified by ISPEC.   
    2)  The problem dimensions N1, N2, N3, N4 are specified in the order   
        that they appear in the argument list for NAME.  N1 is used   
        first, N2 second, and so on, and unused problem dimensions are   
        passed a value of -1.   
    3)  The parameter value returned by ILAENV is checked for validity in   
        the calling subroutine.  For example, ILAENV is used to retrieve   
        the optimal blocksize for STRTRI as follows:   

        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )   
        IF( NB.LE.1 ) NB = MAX( 1, N )   

    ===================================================================== */
    /* Table of constant values */
    static integer c__0 = 0;
    static real c_b162 = 0.f;
    static real c_b163 = 1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer ret_val;
    /* Builtin functions   
       Subroutine */ void s_copy(char *, const char *, ftnlen, ftnlen);
    integer s_cmp(char *, const char *, ftnlen, ftnlen);
    /* Local variables */
    static integer i__;
    static logical cname, sname;
    static integer nbmin;
    static char c1[1], c2[2], c3[3], c4[2];
    static integer ic, nb;
    extern integer ieeeck_(integer *, real *, real *);
    static integer iz, nx;
    static char subnam[6];




    switch (*ispec) {
        case 1:  goto L100;
        case 2:  goto L100;
        case 3:  goto L100;
        case 4:  goto L400;
        case 5:  goto L500;
        case 6:  goto L600;
        case 7:  goto L700;
        case 8:  goto L800;
        case 9:  goto L900;
        case 10:  goto L1000;
        case 11:  goto L1100;
    }

/*     Invalid value for ISPEC */

    ret_val = -1;
    return ret_val;

L100:

/*     Convert NAME to upper case if the first character is lower case. */

    ret_val = 1;
    s_copy(subnam, name__, (ftnlen)6, name_len);
    ic = *(unsigned char *)subnam;
    iz = 'Z';
    if (iz == 90 || iz == 122) {

/*        ASCII character set */

        if (ic >= 97 && ic <= 122) {
            *(unsigned char *)subnam = (char) (ic - 32);
            for (i__ = 2; i__ <= 6; ++i__) {
                ic = *(unsigned char *)&subnam[i__ - 1];
                if (ic >= 97 && ic <= 122) {
                    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
                }
/* L10: */
            }
        }

    } else if (iz == 233 || iz == 169) {

/*        EBCDIC character set */

        if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && 
                ic <= 169) {
            *(unsigned char *)subnam = (char) (ic + 64);
            for (i__ = 2; i__ <= 6; ++i__) {
                ic = *(unsigned char *)&subnam[i__ - 1];
                if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 
                        162 && ic <= 169) {
                    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
                }
/* L20: */
            }
        }

    } else if (iz == 218 || iz == 250) {

/*        Prime machines:  ASCII+128 */

        if (ic >= 225 && ic <= 250) {
            *(unsigned char *)subnam = (char) (ic - 32);
            for (i__ = 2; i__ <= 6; ++i__) {
                ic = *(unsigned char *)&subnam[i__ - 1];
                if (ic >= 225 && ic <= 250) {
                    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
                }
/* L30: */
            }
        }
    }

    *(unsigned char *)c1 = *(unsigned char *)subnam;
    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
    if (! (cname || sname)) {
        return ret_val;
    }
    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);

    switch (*ispec) {
        case 1:  goto L110;
        case 2:  goto L200;
        case 3:  goto L300;
    }

L110:

/*     ISPEC = 1:  block size   

       In these examples, separate code is provided for setting NB for   
       real and complex.  We assume that NB will take the same value in   
       single or double precision. */

    nb = 1;

    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, 
                "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
                3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) 
                == 0) {
            if (sname) {
                nb = 32;
            } else {
                nb = 32;
            }
        } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 32;
            } else {
                nb = 32;
            }
        } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 32;
            } else {
                nb = 32;
            }
        } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        }
    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        }
    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 32;
        } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 64;
        }
    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 64;
        } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 32;
        } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 64;
        }
    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nb = 32;
            }
        } else if (*(unsigned char *)c3 == 'M') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nb = 32;
            }
        }
    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nb = 32;
            }
        } else if (*(unsigned char *)c3 == 'M') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nb = 32;
            }
        }
    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                if (*n4 <= 64) {
                    nb = 1;
                } else {
                    nb = 32;
                }
            } else {
                if (*n4 <= 64) {
                    nb = 1;
                } else {
                    nb = 32;
                }
            }
        }
    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                if (*n2 <= 64) {
                    nb = 1;
                } else {
                    nb = 32;
                }
            } else {
                if (*n2 <= 64) {
                    nb = 1;
                } else {
                    nb = 32;
                }
            }
        }
    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        }
    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nb = 64;
            } else {
                nb = 64;
            }
        }
    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
            nb = 1;
        }
    }
    ret_val = nb;
    return ret_val;

L200:

/*     ISPEC = 2:  minimum block size */

    nbmin = 2;
    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
                ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
                ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
                 {
            if (sname) {
                nbmin = 2;
            } else {
                nbmin = 2;
            }
        } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nbmin = 2;
            } else {
                nbmin = 2;
            }
        } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nbmin = 2;
            } else {
                nbmin = 2;
            }
        } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nbmin = 2;
            } else {
                nbmin = 2;
            }
        }
    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nbmin = 8;
            } else {
                nbmin = 8;
            }
        } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nbmin = 2;
        }
    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nbmin = 2;
        }
    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nbmin = 2;
            }
        } else if (*(unsigned char *)c3 == 'M') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nbmin = 2;
            }
        }
    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nbmin = 2;
            }
        } else if (*(unsigned char *)c3 == 'M') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nbmin = 2;
            }
        }
    }
    ret_val = nbmin;
    return ret_val;

L300:

/*     ISPEC = 3:  crossover point */

    nx = 0;
    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
                ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
                ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
                 {
            if (sname) {
                nx = 128;
            } else {
                nx = 128;
            }
        } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nx = 128;
            } else {
                nx = 128;
            }
        } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
            if (sname) {
                nx = 128;
            } else {
                nx = 128;
            }
        }
    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
        if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nx = 32;
        }
    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
        if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
            nx = 32;
        }
    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nx = 128;
            }
        }
    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
        if (*(unsigned char *)c3 == 'G') {
            if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
                    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
                    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
                     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
                    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
                    ftnlen)2, (ftnlen)2) == 0) {
                nx = 128;
            }
        }
    }
    ret_val = nx;
    return ret_val;

L400:

/*     ISPEC = 4:  number of shifts (used by xHSEQR) */

    ret_val = 6;
    return ret_val;

L500:

/*     ISPEC = 5:  minimum column dimension (not used) */

    ret_val = 2;
    return ret_val;

L600:

/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */

    ret_val = (integer) ((real) f2cmin(*n1,*n2) * 1.6f);
    return ret_val;

L700:

/*     ISPEC = 7:  number of processors (not used) */

    ret_val = 1;
    return ret_val;

L800:

/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */

    ret_val = 50;
    return ret_val;

L900:

/*     ISPEC = 9:  maximum size of the subproblems at the bottom of the   
                   computation tree in the divide-and-conquer algorithm   
                   (used by xGELSD and xGESDD) */

    ret_val = 25;
    return ret_val;

L1000:

/*     ISPEC = 10: ieee NaN arithmetic can be trusted not to trap   

       ILAENV = 0 */
    ret_val = 1;
    if (ret_val == 1) {
        ret_val = ieeeck_(&c__0, &c_b162, &c_b163);
    }
    return ret_val;

L1100:

/*     ISPEC = 11: infinity arithmetic can be trusted not to trap   

       ILAENV = 0 */
    ret_val = 1;
    if (ret_val == 1) {
        ret_val = ieeeck_(&c__1, &c_b162, &c_b163);
    }
    return ret_val;

/*     End of ILAENV */

} /* ilaenv_ */
logical lsame_ ( const char *  ca,
const char *  cb 
)

Definition at line 814 of file lapackblas.cpp.

Referenced by sbdsqr_(), sgemm_(), sgemv_(), sgesvd_(), slacpy_(), slamch_(), slange_(), slanst_(), slansy_(), slarf_(), slarfb_(), slarft_(), slascl_(), slaset_(), slasr_(), slasrt_(), slatrd_(), sorgbr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), sstevd_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    LSAME returns .TRUE. if CA is the same letter as CB regardless of   
    case.   

    Arguments   
    =========   

    CA      (input) CHARACTER*1   
    CB      (input) CHARACTER*1   
            CA and CB specify the single characters to be compared.   

   ===================================================================== 
  


       Test if the characters are equal */
    /* System generated locals */
    logical ret_val;
    /* Local variables */
    static integer inta, intb, zcode;


    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
    if (ret_val) {
        return ret_val;
    }

/*     Now test for equivalence if both characters are alphabetic. */

    zcode = 'Z';

/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
       machines, on which ICHAR returns a value with bit 8 set.   
       ICHAR('A') on Prime machines returns 193 which is the same as   
       ICHAR('A') on an EBCDIC machine. */

    inta = *(unsigned char *)ca;
    intb = *(unsigned char *)cb;

    if (zcode == 90 || zcode == 122) {

/*        ASCII is assumed - ZCODE is the ASCII code of either lower o
r   
          upper case 'Z'. */

        if (inta >= 97 && inta <= 122) {
            inta += -32;
        }
        if (intb >= 97 && intb <= 122) {
            intb += -32;
        }

    } else if (zcode == 233 || zcode == 169) {

/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower
 or   
          upper case 'Z'. */

        if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 
                >= 162 && inta <= 169) {
            inta += 64;
        }
        if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 
                >= 162 && intb <= 169) {
            intb += 64;
        }

    } else if (zcode == 218 || zcode == 250) {

/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII cod
e   
          plus 128 of either lower or upper case 'Z'. */

        if (inta >= 225 && inta <= 250) {
            inta += -32;
        }
        if (intb >= 225 && intb <= 250) {
            intb += -32;
        }
    }
    ret_val = inta == intb;

/*     RETURN   

       End of LSAME */

    return ret_val;
} /* lsame_ */
double pow_ri ( real ap,
integer bp 
)

Definition at line 918 of file lapackblas.cpp.

References x.

Referenced by slaed6_(), slamc2_(), slamch_(), and slartg_().

{
double pow, x;
integer n;
unsigned long u;

pow = 1;
x = *ap;
n = *bp;

if(n != 0)
        {
        if(n < 0)
                {
                n = -n;
                x = 1/x;
                }
        for(u = n; ; )
                {
                if(u & 01)
                        pow *= x;
                if(u >>= 1)
                        x *= x;
                else
                        break;
                }
        }
return(pow);
}
double r_sign ( real a,
real b 
)

Definition at line 984 of file lapackblas.cpp.

References x.

Referenced by sbdsqr_(), slaed3_(), slaed9_(), slarfg_(), slasv2_(), ssteqr_(), and ssterf_().

{
double x;
x = (*a >= 0 ? *a : - *a);
return( *b >= 0 ? x : -x);
}
int s_cat ( char *  ,
const char **  ,
integer ,
integer ,
ftnlen   
)

Definition at line 37 of file lapackblas.cpp.

Referenced by sgesvd_(), sormbr_(), sormlq_(), and sormqr_().

{
   ftnlen i, n, nc;
   const char *f__rp;

   n = (int)*np;
   for(i = 0 ; i < n ; ++i) {
      nc = ll;
      if(rnp[i] < nc) nc = rnp[i];
      ll -= nc;
      f__rp = rpp[i];
      while(--nc >= 0)  *lp++ = *f__rp++;
   }
   while(--ll >= 0)
   *lp++ = ' ';
   return 0; 
}
integer s_cmp ( char *  ,
const char *  ,
ftnlen  ,
ftnlen   
)

Definition at line 1071 of file lapackblas.cpp.

References b.

Referenced by dcsrch_(), ilaenv_(), lnsrlb_(), mainlb_(), prn3lb_(), and setulb_().

{
register unsigned char *a, *aend, *b, *bend;
a = (unsigned char *)a0;
b = (unsigned char *)b0;
aend = a + la;
bend = b + lb;

if(la <= lb)
        {
        while(a < aend)
                if(*a != *b)
                        return( *a - *b );
                else
                        { ++a; ++b; }

        while(b < bend)
                if(*b != ' ')
                        return( ' ' - *b );
                else    ++b;
        }

else
        {
        while(b < bend)
                if(*a == *b)
                        { ++a; ++b; }
                else
                        return( *a - *b );
        while(a < aend)
                if(*a != ' ')
                        return(*a - ' ');
                else    ++a;
        }
return(0);
}
void s_copy ( char *  a,
const char *  b,
ftnlen  la,
ftnlen  lb 
)

Definition at line 1121 of file lapackblas.cpp.

Referenced by dcsrch_(), errclb_(), ilaenv_(), lnsrlb_(), mainlb_(), and prn2lb_().

{
        register char *aend;
        const register char *bend;

        aend = a + la;

        if(la <= lb)
#ifndef NO_OVERWRITE
                if (a <= b || a >= b + la)
#endif
                        while(a < aend)
                                *a++ = *b++;
#ifndef NO_OVERWRITE
                else
                        for(b += la; a < aend; )
                                *--aend = *--b;
#endif

        else {
                bend = b + lb;
#ifndef NO_OVERWRITE
                if (a <= b || a >= bend)
#endif
                        while(b < bend)
                                *a++ = *b++;
#ifndef NO_OVERWRITE
                else {
                        a += lb;
                        while(b < bend)
                                *--a = *--bend;
                        a += lb;
                        }
#endif
                while(a < aend)
                        *a++ = ' ';
                }
        }
int saxpy_ ( integer n,
real sa,
real sx,
integer incx,
real sy,
integer incy 
)

Definition at line 994 of file lapackblas.cpp.

Referenced by EMAN::PCA::dopca_ooc(), EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_().

{
    /* System generated locals */
    integer i__1;
    /* Local variables */
    static integer i__, m, ix, iy, mp1;
/*     constant times a vector plus a vector.   
       uses unrolled loop for increments equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   
       Parameter adjustments */
    --sy;
    --sx;
    /* Function Body */
    if (*n <= 0) {
        return 0;
    }
    if (*sa == 0.f) {
        return 0;
    }
    if (*incx == 1 && *incy == 1) {
        goto L20;
    }
/*        code for unequal increments or equal increments   
            not equal to 1 */
    ix = 1;
    iy = 1;
    if (*incx < 0) {
        ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
        iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        sy[iy] += *sa * sx[ix];
        ix += *incx;
        iy += *incy;
/* L10: */
    }
    return 0;
/*        code for both increments equal to 1   
          clean-up loop */
L20:
    m = *n % 4;
    if (m == 0) {
        goto L40;
    }
    i__1 = m;
    for (i__ = 1; i__ <= i__1; ++i__) {
        sy[i__] += *sa * sx[i__];
/* L30: */
    }
    if (*n < 4) {
        return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i__ = mp1; i__ <= i__1; i__ += 4) {
        sy[i__] += *sa * sx[i__];
        sy[i__ + 1] += *sa * sx[i__ + 1];
        sy[i__ + 2] += *sa * sx[i__ + 2];
        sy[i__ + 3] += *sa * sx[i__ + 3];
/* L50: */
    }
    return 0;
} /* saxpy_ */
int sbdsqr_ ( const char *  uplo,
integer n,
integer ncvt,
integer nru,
integer ncc,
real d__,
real e,
real vt,
integer ldvt,
real u,
integer ldu,
real c__,
integer ldc,
real work,
integer info 
)

Definition at line 22954 of file lapackblas.cpp.

References c___ref, dabs, df2cmax, df2cmin, f2cmax, lsame_(), r_sign(), slamch_(), slartg_(), slas2_(), slasq1_(), slasr_(), slasv2_(), sqrt(), srot_(), sscal_(), sswap_(), u_ref, vt_ref, and xerbla_().

Referenced by sgesvd_().

{
    /* System generated locals */
    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
            i__2;
    real r__1, r__2, r__3, r__4;
    doublereal d__1;

    /* Builtin functions */
    //    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    static real abse;
    static integer idir;
    static real abss;
    static integer oldm;
    static real cosl;
    static integer isub, iter;
    static real unfl, sinl, cosr, smin, smax, sinr;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
            integer *, real *, real *), slas2_(real *, real *, real *, real *,
             real *);
    static real f, g, h__;
    static integer i__, j, m;
    static real r__;
    extern logical lsame_(const char *, const char *);
    static real oldcs;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer oldll;
    static real shift, sigmn, oldsn;
    static integer maxit;
    static real sminl;
    extern /* Subroutine */ int slasr_(const char *, const char *, const char *, integer *, 
            integer *, real *, real *, real *, integer *);
    static real sigmx;
    static logical lower;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
            integer *), slasq1_(integer *, real *, real *, real *, integer *),
             slasv2_(real *, real *, real *, real *, real *, real *, real *, 
            real *, real *);
    static real cs;
    static integer ll;
    static real sn, mu;
    extern doublereal slamch_(const char *);
    extern /* Subroutine */ int xerbla_(const char *, integer *);
    static real sminoa;
    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
            );
    static real thresh;
    static logical rotate;
    static real sminlo;
    static integer nm1;
    static real tolmul;
    static integer nm12, nm13, lll;
    static real eps, sll, tol;


#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SBDSQR computes the singular value decomposition (SVD) of a real   
    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'   
    denotes the transpose of P), where S is a diagonal matrix with   
    non-negative diagonal elements (the singular values of B), and Q   
    and P are orthogonal matrices.   

    The routine computes S, and optionally computes U * Q, P' * VT,   
    or Q' * C, for given real input matrices U, VT, and C.   

    See "Computing  Small Singular Values of Bidiagonal Matrices With   
    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,   
    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,   
    no. 5, pp. 873-912, Sept 1990) and   
    "Accurate singular values and differential qd algorithms," by   
    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics   
    Department, University of California at Berkeley, July 1992   
    for a detailed description of the algorithm.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  B is upper bidiagonal;   
            = 'L':  B is lower bidiagonal.   

    N       (input) INTEGER   
            The order of the matrix B.  N >= 0.   

    NCVT    (input) INTEGER   
            The number of columns of the matrix VT. NCVT >= 0.   

    NRU     (input) INTEGER   
            The number of rows of the matrix U. NRU >= 0.   

    NCC     (input) INTEGER   
            The number of columns of the matrix C. NCC >= 0.   

    D       (input/output) REAL array, dimension (N)   
            On entry, the n diagonal elements of the bidiagonal matrix B.   
            On exit, if INFO=0, the singular values of B in decreasing   
            order.   

    E       (input/output) REAL array, dimension (N)   
            On entry, the elements of E contain the   
            offdiagonal elements of the bidiagonal matrix whose SVD   
            is desired. On normal exit (INFO = 0), E is destroyed.   
            If the algorithm does not converge (INFO > 0), D and E   
            will contain the diagonal and superdiagonal elements of a   
            bidiagonal matrix orthogonally equivalent to the one given   
            as input. E(N) is used for workspace.   

    VT      (input/output) REAL array, dimension (LDVT, NCVT)   
            On entry, an N-by-NCVT matrix VT.   
            On exit, VT is overwritten by P' * VT.   
            VT is not referenced if NCVT = 0.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.   
            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.   

    U       (input/output) REAL array, dimension (LDU, N)   
            On entry, an NRU-by-N matrix U.   
            On exit, U is overwritten by U * Q.   
            U is not referenced if NRU = 0.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= max(1,NRU).   

    C       (input/output) REAL array, dimension (LDC, NCC)   
            On entry, an N-by-NCC matrix C.   
            On exit, C is overwritten by Q' * C.   
            C is not referenced if NCC = 0.   

    LDC     (input) INTEGER   
            The leading dimension of the array C.   
            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.   

    WORK    (workspace) REAL array, dimension (4*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  If INFO = -i, the i-th argument had an illegal value   
            > 0:  the algorithm did not converge; D and E contain the   
                  elements of a bidiagonal matrix which is orthogonally   
                  similar to the input matrix B;  if INFO = i, i   
                  elements of E have not converged to zero.   

    Internal Parameters   
    ===================   

    TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))   
            TOLMUL controls the convergence criterion of the QR loop.   
            If it is positive, TOLMUL*EPS is the desired relative   
               precision in the computed singular values.   
            If it is negative, abs(TOLMUL*EPS*sigma_max) is the   
               desired absolute accuracy in the computed singular   
               values (corresponds to relative accuracy   
               abs(TOLMUL*EPS) in the largest singular value.   
            abs(TOLMUL) should be between 1 and 1/EPS, and preferably   
               between 10 (for fast convergence) and .1/EPS   
               (for there to be some accuracy in the results).   
            Default is to lose at either one eighth or 2 of the   
               available decimal digits in each computed singular value   
               (whichever is smaller).   

    MAXITR  INTEGER, default = 6   
            MAXITR controls the maximum number of passes of the   
            algorithm through its inner loop. The algorithms stops   
            (and so fails to converge) if the number of passes   
            through the inner loop exceeds MAXITR*N**2.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    lower = lsame_(uplo, "L");
    if (! lsame_(uplo, "U") && ! lower) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*ncvt < 0) {
        *info = -3;
    } else if (*nru < 0) {
        *info = -4;
    } else if (*ncc < 0) {
        *info = -5;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < f2cmax(1,*n)) {
        *info = -9;
    } else if (*ldu < f2cmax(1,*nru)) {
        *info = -11;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < f2cmax(1,*n)) {
        *info = -13;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SBDSQR", &i__1);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    if (*n == 1) {
        goto L160;
    }

/*     ROTATE is true if any singular vectors desired, false otherwise */

    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;

/*     If no singular vectors desired, use qd algorithm */

    if (! rotate) {
        slasq1_(n, &d__[1], &e[1], &work[1], info);
        return 0;
    }

    nm1 = *n - 1;
    nm12 = nm1 + nm1;
    nm13 = nm12 + nm1;
    idir = 0;

/*     Get machine constants */

    eps = slamch_("Epsilon");
    unfl = slamch_("Safe minimum");

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal   
       by applying Givens rotations on the left */

    if (lower) {
        i__1 = *n - 1;
        for (i__ = 1; i__ <= i__1; ++i__) {
            slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
            d__[i__] = r__;
            e[i__] = sn * d__[i__ + 1];
            d__[i__ + 1] = cs * d__[i__ + 1];
            work[i__] = cs;
            work[nm1 + i__] = sn;
/* L10: */
        }

/*        Update singular vectors if desired */

        if (*nru > 0) {
            slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], 
                    ldu);
        }
        if (*ncc > 0) {
            slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
                     ldc);
        }
    }

/*     Compute singular values to relative accuracy TOL   
       (By setting TOL to be negative, algorithm will compute   
       singular values to absolute accuracy ABS(TOL)*norm(input matrix))   

   Computing MAX   
   Computing MIN */
    d__1 = (doublereal) eps;
//chao changed pow_dd to pow
    r__3 = 100.f, r__4 = pow(d__1, c_b15);
    r__1 = 10.f, r__2 = df2cmin(r__3,r__4);
    tolmul = df2cmax(r__1,r__2);
    tol = tolmul * eps;

/*     Compute approximate maximum, minimum singular values */

    smax = 0.f;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
        r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
        smax = df2cmax(r__2,r__3);
/* L20: */
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
        r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
        smax = df2cmax(r__2,r__3);
/* L30: */
    }
    sminl = 0.f;
    if (tol >= 0.f) {

/*        Relative accuracy desired */

        sminoa = dabs(d__[1]);
        if (sminoa == 0.f) {
            goto L50;
        }
        mu = sminoa;
        i__1 = *n;
        for (i__ = 2; i__ <= i__1; ++i__) {
            mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - 
                    1], dabs(r__1))));
            sminoa = df2cmin(sminoa,mu);
            if (sminoa == 0.f) {
                goto L50;
            }
/* L40: */
        }
L50:
        sminoa /= sqrt((real) (*n));
/* Computing MAX */
        r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
        thresh = df2cmax(r__1,r__2);
    } else {

/*        Absolute accuracy desired   

   Computing MAX */
        r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
        thresh = df2cmax(r__1,r__2);
    }

/*     Prepare for main iteration loop for the singular values   
       (MAXIT is the maximum number of passes through the inner   
       loop permitted before nonconvergence signalled.) */

    maxit = *n * 6 * *n;
    iter = 0;
    oldll = -1;
    oldm = -1;

/*     M points to last element of unconverged part of matrix */

    m = *n;

/*     Begin main iteration loop */

L60:

/*     Check for convergence or exceeding iteration count */

    if (m <= 1) {
        goto L160;
    }
    if (iter > maxit) {
        goto L200;
    }

/*     Find diagonal block of matrix to work on */

    if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
        d__[m] = 0.f;
    }
    smax = (r__1 = d__[m], dabs(r__1));
    smin = smax;
    i__1 = m - 1;
    for (lll = 1; lll <= i__1; ++lll) {
        ll = m - lll;
        abss = (r__1 = d__[ll], dabs(r__1));
        abse = (r__1 = e[ll], dabs(r__1));
        if (tol < 0.f && abss <= thresh) {
            d__[ll] = 0.f;
        }
        if (abse <= thresh) {
            goto L80;
        }
        smin = df2cmin(smin,abss);
/* Computing MAX */
        r__1 = f2cmax(smax,abss);
        smax = df2cmax(r__1,abse);
/* L70: */
    }
    ll = 0;
    goto L90;
L80:
    e[ll] = 0.f;

/*     Matrix splits since E(LL) = 0 */

    if (ll == m - 1) {

/*        Convergence of bottom singular value, return to top of loop */

        --m;
        goto L60;
    }
L90:
    ++ll;

/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */

    if (ll == m - 1) {

/*        2 by 2 block, handle separately */

        slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
                 &sinl, &cosl);
        d__[m - 1] = sigmx;
        e[m - 1] = 0.f;
        d__[m] = sigmn;

/*        Compute singular vectors, if desired */

        if (*ncvt > 0) {
            srot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, &
                    sinr);
        }
        if (*nru > 0) {
            srot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, &
                    sinl);
        }
        if (*ncc > 0) {
            srot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, &
                    sinl);
        }
        m += -2;
        goto L60;
    }

/*     If working on new submatrix, choose shift direction   
       (from larger end diagonal element towards smaller) */

    if (ll > oldm || m < oldll) {
        if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {

/*           Chase bulge from top (big end) to bottom (small end) */

            idir = 1;
        } else {

/*           Chase bulge from bottom (big end) to top (small end) */

            idir = 2;
        }
    }

/*     Apply convergence tests */

    if (idir == 1) {

/*        Run convergence test in forward direction   
          First apply standard test to bottom of matrix */

        if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs(
                r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= 
                thresh) {
            e[m - 1] = 0.f;
            goto L60;
        }

        if (tol >= 0.f) {

/*           If relative accuracy desired,   
             apply convergence criterion forward */

            mu = (r__1 = d__[ll], dabs(r__1));
            sminl = mu;
            i__1 = m - 1;
            for (lll = ll; lll <= i__1; ++lll) {
                if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
                    e[lll] = 0.f;
                    goto L60;
                }
                sminlo = sminl;
                mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = 
                        e[lll], dabs(r__1))));
                sminl = df2cmin(sminl,mu);
/* L100: */
            }
        }

    } else {

/*        Run convergence test in backward direction   
          First apply standard test to top of matrix */

        if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
                r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) {
            e[ll] = 0.f;
            goto L60;
        }

        if (tol >= 0.f) {

/*           If relative accuracy desired,   
             apply convergence criterion backward */

            mu = (r__1 = d__[m], dabs(r__1));
            sminl = mu;
            i__1 = ll;
            for (lll = m - 1; lll >= i__1; --lll) {
                if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
                    e[lll] = 0.f;
                    goto L60;
                }
                sminlo = sminl;
                mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
                        lll], dabs(r__1))));
                sminl = df2cmin(sminl,mu);
/* L110: */
            }
        }
    }
    oldll = ll;
    oldm = m;

/*     Compute shift.  First, test if shifting would ruin relative   
       accuracy, and if so set the shift to zero.   

   Computing MAX */
    r__1 = eps, r__2 = tol * .01f;
    if (tol >= 0.f && *n * tol * (sminl / smax) <= df2cmax(r__1,r__2)) {

/*        Use a zero shift to avoid loss of relative accuracy */

        shift = 0.f;
    } else {

/*        Compute the shift from 2-by-2 block at end of matrix */

        if (idir == 1) {
            sll = (r__1 = d__[ll], dabs(r__1));
            slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
        } else {
            sll = (r__1 = d__[m], dabs(r__1));
            slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
        }

/*        Test if shift negligible, and if so set to zero */

        if (sll > 0.f) {
/* Computing 2nd power */
            r__1 = shift / sll;
            if (r__1 * r__1 < eps) {
                shift = 0.f;
            }
        }
    }

/*     Increment iteration count */

    iter = iter + m - ll;

/*     If SHIFT = 0, do simplified QR iteration */

    if (shift == 0.f) {
        if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

            cs = 1.f;
            oldcs = 1.f;
            i__1 = m - 1;
            for (i__ = ll; i__ <= i__1; ++i__) {
                r__1 = d__[i__] * cs;
                slartg_(&r__1, &e[i__], &cs, &sn, &r__);
                if (i__ > ll) {
                    e[i__ - 1] = oldsn * r__;
                }
                r__1 = oldcs * r__;
                r__2 = d__[i__ + 1] * sn;
                slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
                work[i__ - ll + 1] = cs;
                work[i__ - ll + 1 + nm1] = sn;
                work[i__ - ll + 1 + nm12] = oldcs;
                work[i__ - ll + 1 + nm13] = oldsn;
/* L120: */
            }
            h__ = d__[m] * cs;
            d__[m] = h__ * oldcs;
            e[m - 1] = h__ * oldsn;

/*           Update singular vectors */

            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
                        vt_ref(ll, 1), ldvt);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
                        + 1], &u_ref(1, ll), ldu);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
                        + 1], &c___ref(ll, 1), ldc);
            }

/*           Test convergence */

            if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
                e[m - 1] = 0.f;
            }

        } else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

            cs = 1.f;
            oldcs = 1.f;
            i__1 = ll + 1;
            for (i__ = m; i__ >= i__1; --i__) {
                r__1 = d__[i__] * cs;
                slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
                if (i__ < m) {
                    e[i__] = oldsn * r__;
                }
                r__1 = oldcs * r__;
                r__2 = d__[i__ - 1] * sn;
                slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
                work[i__ - ll] = cs;
                work[i__ - ll + nm1] = -sn;
                work[i__ - ll + nm12] = oldcs;
                work[i__ - ll + nm13] = -oldsn;
/* L130: */
            }
            h__ = d__[ll] * cs;
            d__[ll] = h__ * oldcs;
            e[ll] = h__ * oldsn;

/*           Update singular vectors */

            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
                        nm13 + 1], &vt_ref(ll, 1), ldvt);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
                        1, ll), ldu);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
                        c___ref(ll, 1), ldc);
            }

/*           Test convergence */

            if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
                e[ll] = 0.f;
            }
        }
    } else {

/*        Use nonzero shift */

        if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

            f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
                    ll]) + shift / d__[ll]);
            g = e[ll];
            i__1 = m - 1;
            for (i__ = ll; i__ <= i__1; ++i__) {
                slartg_(&f, &g, &cosr, &sinr, &r__);
                if (i__ > ll) {
                    e[i__ - 1] = r__;
                }
                f = cosr * d__[i__] + sinr * e[i__];
                e[i__] = cosr * e[i__] - sinr * d__[i__];
                g = sinr * d__[i__ + 1];
                d__[i__ + 1] = cosr * d__[i__ + 1];
                slartg_(&f, &g, &cosl, &sinl, &r__);
                d__[i__] = r__;
                f = cosl * e[i__] + sinl * d__[i__ + 1];
                d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
                if (i__ < m - 1) {
                    g = sinl * e[i__ + 1];
                    e[i__ + 1] = cosl * e[i__ + 1];
                }
                work[i__ - ll + 1] = cosr;
                work[i__ - ll + 1 + nm1] = sinr;
                work[i__ - ll + 1 + nm12] = cosl;
                work[i__ - ll + 1 + nm13] = sinl;
/* L140: */
            }
            e[m - 1] = f;

/*           Update singular vectors */

            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
                        vt_ref(ll, 1), ldvt);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
                        + 1], &u_ref(1, ll), ldu);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
                        + 1], &c___ref(ll, 1), ldc);
            }

/*           Test convergence */

            if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
                e[m - 1] = 0.f;
            }

        } else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

            f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
                    m]) + shift / d__[m]);
            g = e[m - 1];
            i__1 = ll + 1;
            for (i__ = m; i__ >= i__1; --i__) {
                slartg_(&f, &g, &cosr, &sinr, &r__);
                if (i__ < m) {
                    e[i__] = r__;
                }
                f = cosr * d__[i__] + sinr * e[i__ - 1];
                e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
                g = sinr * d__[i__ - 1];
                d__[i__ - 1] = cosr * d__[i__ - 1];
                slartg_(&f, &g, &cosl, &sinl, &r__);
                d__[i__] = r__;
                f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
                d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
                if (i__ > ll + 1) {
                    g = sinl * e[i__ - 2];
                    e[i__ - 2] = cosl * e[i__ - 2];
                }
                work[i__ - ll] = cosr;
                work[i__ - ll + nm1] = -sinr;
                work[i__ - ll + nm12] = cosl;
                work[i__ - ll + nm13] = -sinl;
/* L150: */
            }
            e[ll] = f;

/*           Test convergence */

            if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
                e[ll] = 0.f;
            }

/*           Update singular vectors if desired */

            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
                        nm13 + 1], &vt_ref(ll, 1), ldvt);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
                        1, ll), ldu);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
                        c___ref(ll, 1), ldc);
            }
        }
    }

/*     QR iteration finished, go back and check convergence */

    goto L60;

/*     All singular values converged, so make them positive */

L160:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (d__[i__] < 0.f) {
            d__[i__] = -d__[i__];

/*           Change sign of singular vectors, if desired */

            if (*ncvt > 0) {
                sscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt);
            }
        }
/* L170: */
    }

/*     Sort the singular values into decreasing order (insertion sort on   
       singular values, but only one transposition per singular vector) */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Scan for smallest D(I) */

        isub = 1;
        smin = d__[1];
        i__2 = *n + 1 - i__;
        for (j = 2; j <= i__2; ++j) {
            if (d__[j] <= smin) {
                isub = j;
                smin = d__[j];
            }
/* L180: */
        }
        if (isub != *n + 1 - i__) {

/*           Swap singular values and vectors */

            d__[isub] = d__[*n + 1 - i__];
            d__[*n + 1 - i__] = smin;
            if (*ncvt > 0) {
                sswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1),
                         ldvt);
            }
            if (*nru > 0) {
                sswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), &
                        c__1);
            }
            if (*ncc > 0) {
                sswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1),
                         ldc);
            }
        }
/* L190: */
    }
    goto L220;

/*     Maximum number of iterations exceeded, failure to converge */

L200:
    *info = 0;
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (e[i__] != 0.f) {
            ++(*info);
        }
/* L210: */
    }
L220:
    return 0;

/*     End of SBDSQR */

} /* sbdsqr_ */
int scopy_ ( integer n,
real sx,
integer incx,
real sy,
integer incy 
)

Definition at line 1163 of file lapackblas.cpp.

Referenced by EMAN::PCA::Lanczos_ooc(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed8_(), slaed9_(), slaeda_(), slarfb_(), and slasq1_().

{
    /* System generated locals */
    integer i__1;
    /* Local variables */
    static integer i__, m, ix, iy, mp1;
/*     copies a vector, x, to a vector, y.   
       uses unrolled loops for increments equal to 1.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   
       Parameter adjustments */
    --sy;
    --sx;
    /* Function Body */
    if (*n <= 0) {
        return 0;
    }
    if (*incx == 1 && *incy == 1) {
        goto L20;
    }
/*        code for unequal increments or equal increments   
            not equal to 1 */
    ix = 1;
    iy = 1;
    if (*incx < 0) {
        ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
        iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        sy[iy] = sx[ix];
        ix += *incx;
        iy += *incy;
/* L10: */
    }
    return 0;
/*        code for both increments equal to 1   
          clean-up loop */
L20:
    m = *n % 7;
    if (m == 0) {
        goto L40;
    }
    i__1 = m;
    for (i__ = 1; i__ <= i__1; ++i__) {
        sy[i__] = sx[i__];
/* L30: */
    }
    if (*n < 7) {
        return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i__ = mp1; i__ <= i__1; i__ += 7) {
        sy[i__] = sx[i__];
        sy[i__ + 1] = sx[i__ + 1];
        sy[i__ + 2] = sx[i__ + 2];
        sy[i__ + 3] = sx[i__ + 3];
        sy[i__ + 4] = sx[i__ + 4];
        sy[i__ + 5] = sx[i__ + 5];
        sy[i__ + 6] = sx[i__ + 6];
/* L50: */
    }
    return 0;
} /* scopy_ */
doublereal sdot_ ( integer n,
real sx,
integer incx,
real sy,
integer incy 
)

Definition at line 1236 of file lapackblas.cpp.

Referenced by EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_().

{
    /* System generated locals */
    integer i__1;
    real ret_val;
    /* Local variables */
    static integer i__, m;
    static real stemp;
    static integer ix, iy, mp1;
/*     forms the dot product of two vectors.   
       uses unrolled loops for increments equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   
       Parameter adjustments */
    --sy;
    --sx;
    /* Function Body */
    stemp = 0.f;
    ret_val = 0.f;
    if (*n <= 0) {
        return ret_val;
    }
    if (*incx == 1 && *incy == 1) {
        goto L20;
    }
/*        code for unequal increments or equal increments   
            not equal to 1 */
    ix = 1;
    iy = 1;
    if (*incx < 0) {
        ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
        iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        stemp += sx[ix] * sy[iy];
        ix += *incx;
        iy += *incy;
/* L10: */
    }
    ret_val = stemp;
    return ret_val;
/*        code for both increments equal to 1   
          clean-up loop */
L20:
    m = *n % 5;
    if (m == 0) {
        goto L40;
    }
    i__1 = m;
    for (i__ = 1; i__ <= i__1; ++i__) {
        stemp += sx[i__] * sy[i__];
/* L30: */
    }
    if (*n < 5) {
        goto L60;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i__ = mp1; i__ <= i__1; i__ += 5) {
        stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
                i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + 
                4] * sy[i__ + 4];
/* L50: */
    }
L60:
    ret_val = stemp;
    return ret_val;
} /* sdot_ */
int sgebd2_ ( integer m,
integer n,
real a,
integer lda,
real d__,
real e,
real tauq,
real taup,
real work,
integer info 
)

Definition at line 21343 of file lapackblas.cpp.

References a_ref, f2cmax, f2cmin, slarf_(), slarfg_(), and xerbla_().

Referenced by sgebrd_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SGEBD2 reduces a real general m by n matrix A to upper or lower   
    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.   

    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows in the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns in the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the m by n general matrix to be reduced.   
            On exit,   
            if m >= n, the diagonal and the first superdiagonal are   
              overwritten with the upper bidiagonal matrix B; the   
              elements below the diagonal, with the array TAUQ, represent   
              the orthogonal matrix Q as a product of elementary   
              reflectors, and the elements above the first superdiagonal,   
              with the array TAUP, represent the orthogonal matrix P as   
              a product of elementary reflectors;   
            if m < n, the diagonal and the first subdiagonal are   
              overwritten with the lower bidiagonal matrix B; the   
              elements below the first subdiagonal, with the array TAUQ,   
              represent the orthogonal matrix Q as a product of   
              elementary reflectors, and the elements above the diagonal,   
              with the array TAUP, represent the orthogonal matrix P as   
              a product of elementary reflectors.   
            See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    D       (output) REAL array, dimension (min(M,N))   
            The diagonal elements of the bidiagonal matrix B:   
            D(i) = A(i,i).   

    E       (output) REAL array, dimension (min(M,N)-1)   
            The off-diagonal elements of the bidiagonal matrix B:   
            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;   
            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.   

    TAUQ    (output) REAL array dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix Q. See Further Details.   

    TAUP    (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix P. See Further Details.   

    WORK    (workspace) REAL array, dimension (max(M,N))   

    INFO    (output) INTEGER   
            = 0: successful exit.   
            < 0: if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    The matrices Q and P are represented as products of elementary   
    reflectors:   

    If m >= n,   

       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are real scalars, and v and u are real vectors;   
    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);   
    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);   
    tauq is stored in TAUQ(i) and taup in TAUP(i).   

    If m < n,   

       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are real scalars, and v and u are real vectors;   
    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);   
    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);   
    tauq is stored in TAUQ(i) and taup in TAUP(i).   

    The contents of A on exit are illustrated by the following examples:   

    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):   

      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )   
      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )   
      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )   
      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )   
      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )   
      (  v1  v2  v3  v4  v5 )   

    where d and e denote diagonal and off-diagonal elements of B, vi   
    denotes an element of the vector defining H(i), and ui an element of   
    the vector defining G(i).   

    =====================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__;
    extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 
            integer *, real *, real *, integer *, real *), xerbla_(
            const char *, integer *), slarfg_(integer *, real *, real *, 
            integer *, real *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    }
    if (*info < 0) {
        i__1 = -(*info);
        xerbla_("SGEBD2", &i__1);
        return 0;
    }

    if (*m >= *n) {

/*        Reduce to upper bidiagonal form */

        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {

/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)   

   Computing MIN */
            i__2 = i__ + 1;
            i__3 = *m - i__ + 1;
            slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1,
                     &tauq[i__]);
            d__[i__] = a_ref(i__, i__);
            a_ref(i__, i__) = 1.f;

/*           Apply H(i) to A(i:m,i+1:n) from the left */

            i__2 = *m - i__ + 1;
            i__3 = *n - i__;
            slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tauq[i__], 
                    &a_ref(i__, i__ + 1), lda, &work[1]);
            a_ref(i__, i__) = d__[i__];

            if (i__ < *n) {

/*              Generate elementary reflector G(i) to annihilate   
                A(i,i+2:n)   

   Computing MIN */
                i__2 = i__ + 2;
                i__3 = *n - i__;
                slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n))
                        , lda, &taup[i__]);
                e[i__] = a_ref(i__, i__ + 1);
                a_ref(i__, i__ + 1) = 1.f;

/*              Apply G(i) to A(i+1:m,i+1:n) from the right */

                i__2 = *m - i__;
                i__3 = *n - i__;
                slarf_("Right", &i__2, &i__3, &a_ref(i__, i__ + 1), lda, &
                        taup[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]);
                a_ref(i__, i__ + 1) = e[i__];
            } else {
                taup[i__] = 0.f;
            }
/* L10: */
        }
    } else {

/*        Reduce to lower bidiagonal form */

        i__1 = *m;
        for (i__ = 1; i__ <= i__1; ++i__) {

/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)   

   Computing MIN */
            i__2 = i__ + 1;
            i__3 = *n - i__ + 1;
            slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &
                    taup[i__]);
            d__[i__] = a_ref(i__, i__);
            a_ref(i__, i__) = 1.f;

/*           Apply G(i) to A(i+1:m,i:n) from the right   

   Computing MIN */
            i__2 = i__ + 1;
            i__3 = *m - i__;
            i__4 = *n - i__ + 1;
            slarf_("Right", &i__3, &i__4, &a_ref(i__, i__), lda, &taup[i__], &
                    a_ref(f2cmin(i__2,*m), i__), lda, &work[1]);
            a_ref(i__, i__) = d__[i__];

            if (i__ < *m) {

/*              Generate elementary reflector H(i) to annihilate   
                A(i+2:m,i)   

   Computing MIN */
                i__2 = i__ + 2;
                i__3 = *m - i__;
                slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__)
                        , &c__1, &tauq[i__]);
                e[i__] = a_ref(i__ + 1, i__);
                a_ref(i__ + 1, i__) = 1.f;

/*              Apply H(i) to A(i+1:m,i+1:n) from the left */

                i__2 = *m - i__;
                i__3 = *n - i__;
                slarf_("Left", &i__2, &i__3, &a_ref(i__ + 1, i__), &c__1, &
                        tauq[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]);
                a_ref(i__ + 1, i__) = e[i__];
            } else {
                tauq[i__] = 0.f;
            }
/* L20: */
        }
    }
    return 0;

/*     End of SGEBD2 */

} /* sgebd2_ */
int sgebrd_ ( integer m,
integer n,
real a,
integer lda,
real d__,
real e,
real tauq,
real taup,
real work,
integer lwork,
integer info 
)

Definition at line 21027 of file lapackblas.cpp.

References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), nx, sgebd2_(), sgemm_(), slabrd_(), and xerbla_().

Referenced by sgesvd_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SGEBRD reduces a general real M-by-N matrix A to upper or lower   
    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.   

    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows in the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns in the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N general matrix to be reduced.   
            On exit,   
            if m >= n, the diagonal and the first superdiagonal are   
              overwritten with the upper bidiagonal matrix B; the   
              elements below the diagonal, with the array TAUQ, represent   
              the orthogonal matrix Q as a product of elementary   
              reflectors, and the elements above the first superdiagonal,   
              with the array TAUP, represent the orthogonal matrix P as   
              a product of elementary reflectors;   
            if m < n, the diagonal and the first subdiagonal are   
              overwritten with the lower bidiagonal matrix B; the   
              elements below the first subdiagonal, with the array TAUQ,   
              represent the orthogonal matrix Q as a product of   
              elementary reflectors, and the elements above the diagonal,   
              with the array TAUP, represent the orthogonal matrix P as   
              a product of elementary reflectors.   
            See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    D       (output) REAL array, dimension (min(M,N))   
            The diagonal elements of the bidiagonal matrix B:   
            D(i) = A(i,i).   

    E       (output) REAL array, dimension (min(M,N)-1)   
            The off-diagonal elements of the bidiagonal matrix B:   
            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;   
            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.   

    TAUQ    (output) REAL array dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix Q. See Further Details.   

    TAUP    (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix P. See Further Details.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of the array WORK.  LWORK >= max(1,M,N).   
            For optimum performance LWORK >= (M+N)*NB, where NB   
            is the optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    The matrices Q and P are represented as products of elementary   
    reflectors:   

    If m >= n,   

       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are real scalars, and v and u are real vectors;   
    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);   
    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);   
    tauq is stored in TAUQ(i) and taup in TAUP(i).   

    If m < n,   

       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are real scalars, and v and u are real vectors;   
    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);   
    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);   
    tauq is stored in TAUQ(i) and taup in TAUP(i).   

    The contents of A on exit are illustrated by the following examples:   

    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):   

      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )   
      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )   
      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )   
      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )   
      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )   
      (  v1  v2  v3  v4  v5 )   

    where d and e denote diagonal and off-diagonal elements of B, vi   
    denotes an element of the vector defining H(i), and ui an element of   
    the vector defining G(i).   

    =====================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__3 = 3;
    static integer c__2 = 2;
    static real c_b21 = -1.f;
    static real c_b22 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__, j, nbmin, iinfo;
    extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 
            integer *, real *, real *, integer *, real *, integer *, real *, 
            real *, integer *);
    static integer minmn;
    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer 
            *, real *, real *, real *, real *, real *, integer *);
    static integer nb, nx;
    extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real 
            *, integer *, real *, real *, real *, real *, real *, integer *, 
            real *, integer *);
    static real ws;
    extern /* Subroutine */ int xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 
            integer *, integer *, ftnlen, ftnlen);
    static integer ldwrkx, ldwrky, lwkopt;
    static logical lquery;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;

    /* Function Body */
    *info = 0;
/* Computing MAX */
    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
            ftnlen)6, (ftnlen)1);
    nb = f2cmax(i__1,i__2);
    lwkopt = (*m + *n) * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    } else /* if(complicated condition) */ {
/* Computing MAX */
        i__1 = f2cmax(1,*m);
        if (*lwork < f2cmax(i__1,*n) && ! lquery) {
            *info = -10;
        }
    }
    if (*info < 0) {
        i__1 = -(*info);
        xerbla_("SGEBRD", &i__1);
        return 0;
    } else if (lquery) {
        return 0;
    }

/*     Quick return if possible */

    minmn = f2cmin(*m,*n);
    if (minmn == 0) {
        work[1] = 1.f;
        return 0;
    }

    ws = (real) f2cmax(*m,*n);
    ldwrkx = *m;
    ldwrky = *n;

    if (nb > 1 && nb < minmn) {

/*        Set the crossover point NX.   

   Computing MAX */
        i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
                ftnlen)6, (ftnlen)1);
        nx = f2cmax(i__1,i__2);

/*        Determine when to switch from blocked to unblocked code. */

        if (nx < minmn) {
            ws = (real) ((*m + *n) * nb);
            if ((real) (*lwork) < ws) {

/*              Not enough work space for the optimal NB, consider using   
                a smaller block size. */

                nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
                        ftnlen)6, (ftnlen)1);
                if (*lwork >= (*m + *n) * nbmin) {
                    nb = *lwork / (*m + *n);
                } else {
                    nb = 1;
                    nx = minmn;
                }
            }
        }
    } else {
        nx = minmn;
    }

    i__1 = minmn - nx;
    i__2 = nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {

/*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return   
          the matrices X and Y which are needed to update the unreduced   
          part of the matrix */

        i__3 = *m - i__ + 1;
        i__4 = *n - i__ + 1;
        slabrd_(&i__3, &i__4, &nb, &a_ref(i__, i__), lda, &d__[i__], &e[i__], 
                &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb 
                + 1], &ldwrky);

/*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update   
          of the form  A := A - V*Y' - X*U' */

        i__3 = *m - i__ - nb + 1;
        i__4 = *n - i__ - nb + 1;
        sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a_ref(
                i__ + nb, i__), lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &
                c_b22, &a_ref(i__ + nb, i__ + nb), lda)
                ;
        i__3 = *m - i__ - nb + 1;
        i__4 = *n - i__ - nb + 1;
        sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
                work[nb + 1], &ldwrkx, &a_ref(i__, i__ + nb), lda, &c_b22, &
                a_ref(i__ + nb, i__ + nb), lda);

/*        Copy diagonal and off-diagonal elements of B back into A */

        if (*m >= *n) {
            i__3 = i__ + nb - 1;
            for (j = i__; j <= i__3; ++j) {
                a_ref(j, j) = d__[j];
                a_ref(j, j + 1) = e[j];
/* L10: */
            }
        } else {
            i__3 = i__ + nb - 1;
            for (j = i__; j <= i__3; ++j) {
                a_ref(j, j) = d__[j];
                a_ref(j + 1, j) = e[j];
/* L20: */
            }
        }
/* L30: */
    }

/*     Use unblocked code to reduce the remainder of the matrix */

    i__2 = *m - i__ + 1;
    i__1 = *n - i__ + 1;
    sgebd2_(&i__2, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tauq[
            i__], &taup[i__], &work[1], &iinfo);
    work[1] = ws;
    return 0;

/*     End of SGEBRD */

} /* sgebrd_ */
int sgelq2_ ( integer m,
integer n,
real a,
integer lda,
real tau,
real work,
integer info 
)

Definition at line 22808 of file lapackblas.cpp.

References a_ref, f2cmax, f2cmin, slarf_(), slarfg_(), and xerbla_().

Referenced by sgelqf_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SGELQ2 computes an LQ factorization of a real m by n matrix A:   
    A = L * Q.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the m by n matrix A.   
            On exit, the elements on and below the diagonal of the array   
            contain the m by min(m,n) lower trapezoidal matrix L (L is   
            lower triangular if m <= n); the elements above the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of elementary reflectors (see Further Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace) REAL array, dimension (M)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(k) . . . H(2) H(1), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer i__, k;
    extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 
            integer *, real *, real *, integer *, real *), xerbla_(
            const char *, integer *), slarfg_(integer *, real *, real *, 
            integer *, real *);
    static real aii;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]

    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SGELQ2", &i__1);
        return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)   

   Computing MIN */
        i__2 = i__ + 1;
        i__3 = *n - i__ + 1;
        slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &tau[
                i__]);
        if (i__ < *m) {

/*           Apply H(i) to A(i+1:m,i:n) from the right */

            aii = a_ref(i__, i__);
            a_ref(i__, i__) = 1.f;
            i__2 = *m - i__;
            i__3 = *n - i__ + 1;
            slarf_("Right", &i__2, &i__3, &a_ref(i__, i__), lda, &tau[i__], &
                    a_ref(i__ + 1, i__), lda, &work[1]);
            a_ref(i__, i__) = aii;
        }
/* L10: */
    }
    return 0;

/*     End of SGELQ2 */

} /* sgelq2_ */
int sgelqf_ ( integer m,
integer n,
real a,
integer lda,
real tau,
real work,
integer lwork,
integer info 
)

Definition at line 21958 of file lapackblas.cpp.

References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), nx, sgelq2_(), slarfb_(), slarft_(), and xerbla_().

Referenced by sgesvd_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SGELQF computes an LQ factorization of a real M-by-N matrix A:   
    A = L * Q.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, the elements on and below the diagonal of the array   
            contain the m-by-min(m,n) lower trapezoidal matrix L (L is   
            lower triangular if m <= n); the elements above the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of elementary reflectors (see Further Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,M).   
            For optimum performance LWORK >= M*NB, where NB is the   
            optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(k) . . . H(2) H(1), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__3 = 3;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer 
            *, real *, real *, integer *);
    static integer ib, nb, nx;
    extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 
            integer *, integer *, integer *, real *, integer *, real *, 
            integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 
            integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 
            real *, integer *, real *, real *, integer *);
    static integer ldwork, lwkopt;
    static logical lquery;
    static integer iws;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
            1);
    lwkopt = *m * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
        *info = -7;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SGELQF", &i__1);
        return 0;
    } else if (lquery) {
        return 0;
    }

/*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
        work[1] = 1.f;
        return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *m;
    if (nb > 1 && nb < k) {

/*        Determine when to cross over from blocked to unblocked code.   

   Computing MAX */
        i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
                ftnlen)6, (ftnlen)1);
        nx = f2cmax(i__1,i__2);
        if (nx < k) {

/*           Determine if workspace is large enough for blocked code. */

            ldwork = *m;
            iws = ldwork * nb;
            if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  reduce NB and   
                determine the minimum value of NB. */

                nb = *lwork / ldwork;
/* Computing MAX */
                i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
                        c_n1, (ftnlen)6, (ftnlen)1);
                nbmin = f2cmax(i__1,i__2);
            }
        }
    }

    if (nb >= nbmin && nb < k && nx < k) {

/*        Use blocked code initially */

        i__1 = k - nx;
        i__2 = nb;
        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
            i__3 = k - i__ + 1;
            ib = f2cmin(i__3,nb);

/*           Compute the LQ factorization of the current block   
             A(i:i+ib-1,i:n) */

            i__3 = *n - i__ + 1;
            sgelq2_(&ib, &i__3, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
                    iinfo);
            if (i__ + ib <= *m) {

/*              Form the triangular factor of the block reflector   
                H = H(i) H(i+1) . . . H(i+ib-1) */

                i__3 = *n - i__ + 1;
                slarft_("Forward", "Rowwise", &i__3, &ib, &a_ref(i__, i__), 
                        lda, &tau[i__], &work[1], &ldwork);

/*              Apply H to A(i+ib:m,i:n) from the right */

                i__3 = *m - i__ - ib + 1;
                i__4 = *n - i__ + 1;
                slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
                        &i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, 
                        &a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork);
            }
/* L10: */
        }
    } else {
        i__ = 1;
    }

/*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
        i__2 = *m - i__ + 1;
        i__1 = *n - i__ + 1;
        sgelq2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
                iinfo);
    }

    work[1] = (real) iws;
    return 0;

/*     End of SGELQF */

} /* sgelqf_ */
int sgemm_ ( const char *  transa,
const char *  transb,
integer m,
integer n,
integer k,
real alpha,
real a,
integer lda,
real b,
integer ldb,
real beta,
real c__,
integer ldc 
)

Definition at line 1312 of file lapackblas.cpp.

References a_ref, b_ref, c___ref, f2cmax, lsame_(), and xerbla_().

Referenced by sgebrd_(), sgesvd_(), slaed0_(), slaed3_(), slaed7_(), slarfb_(), and sstedc_().

{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
            i__3;
    /* Local variables */
    static integer info;
    static logical nota, notb;
    static real temp;
    static integer i__, j, l, ncola;
    extern logical lsame_(const char *, const char *);
    static integer nrowa, nrowb;
    extern /* Subroutine */ int xerbla_(const char *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
/*  Purpose   
    =======   
    SGEMM  performs one of the matrix-matrix operations   
       C := alpha*op( A )*op( B ) + beta*C,   
    where  op( X ) is one of   
       op( X ) = X   or   op( X ) = X',   
    alpha and beta are scalars, and A, B and C are matrices, with op( A )   
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.   
    Parameters   
    ==========   
    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in   
             the matrix multiplication as follows:   
                TRANSA = 'N' or 'n',  op( A ) = A.   
                TRANSA = 'T' or 't',  op( A ) = A'.   
                TRANSA = 'C' or 'c',  op( A ) = A'.   
             Unchanged on exit.   
    TRANSB - CHARACTER*1.   
             On entry, TRANSB specifies the form of op( B ) to be used in   
             the matrix multiplication as follows:   
                TRANSB = 'N' or 'n',  op( B ) = B.   
                TRANSB = 'T' or 't',  op( B ) = B'.   
                TRANSB = 'C' or 'c',  op( B ) = B'.   
             Unchanged on exit.   
    M      - INTEGER.   
             On entry,  M  specifies  the number  of rows  of the  matrix   
             op( A )  and of the  matrix  C.  M  must  be at least  zero.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry,  N  specifies the number  of columns of the matrix   
             op( B ) and the number of columns of the matrix C. N must be   
             at least zero.   
             Unchanged on exit.   
    K      - INTEGER.   
             On entry,  K  specifies  the number of columns of the matrix   
             op( A ) and the number of rows of the matrix op( B ). K must   
             be at least  zero.   
             Unchanged on exit.   
    ALPHA  - REAL            .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   
    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is   
             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.   
             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k   
             part of the array  A  must contain the matrix  A,  otherwise   
             the leading  k by m  part of the array  A  must contain  the   
             matrix A.   
             Unchanged on exit.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program. When  TRANSA = 'N' or 'n' then   
             LDA must be at least  f2cmax( 1, m ), otherwise  LDA must be at   
             least  f2cmax( 1, k ).   
             Unchanged on exit.   
    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is   
             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n   
             part of the array  B  must contain the matrix  B,  otherwise   
             the leading  n by k  part of the array  B  must contain  the   
             matrix B.   
             Unchanged on exit.   
    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared   
             in the calling (sub) program. When  TRANSB = 'N' or 'n' then   
             LDB must be at least  f2cmax( 1, k ), otherwise  LDB must be at   
             least  f2cmax( 1, n ).   
             Unchanged on exit.   
    BETA   - REAL            .   
             On entry,  BETA  specifies the scalar  beta.  When  BETA  is   
             supplied as zero then C need not be set on input.   
             Unchanged on exit.   
    C      - REAL             array of DIMENSION ( LDC, n ).   
             Before entry, the leading  m by n  part of the array  C must   
             contain the matrix  C,  except when  beta  is zero, in which   
             case C need not be set on entry.   
             On exit, the array  C  is overwritten by the  m by n  matrix   
             ( alpha*op( A )*op( B ) + beta*C ).   
    LDC    - INTEGER.   
             On entry, LDC specifies the first dimension of C as declared   
             in  the  calling  (sub)  program.   LDC  must  be  at  least   
             f2cmax( 1, m ).   
             Unchanged on exit.   
    Level 3 Blas routine.   
    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   
       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not   
       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows   
       and  columns of  A  and the  number of  rows  of  B  respectively.   
       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    /* Function Body */
    nota = lsame_(transa, "N");
    notb = lsame_(transb, "N");
    if (nota) {
        nrowa = *m;
        ncola = *k;
    } else {
        nrowa = *k;
        ncola = *m;
    }
    if (notb) {
        nrowb = *k;
    } else {
        nrowb = *n;
    }
/*     Test the input parameters. */
    info = 0;
    if (! nota && ! lsame_(transa, "C") && ! lsame_(
            transa, "T")) {
        info = 1;
    } else if (! notb && ! lsame_(transb, "C") && ! 
            lsame_(transb, "T")) {
        info = 2;
    } else if (*m < 0) {
        info = 3;
    } else if (*n < 0) {
        info = 4;
    } else if (*k < 0) {
        info = 5;
    } else if (*lda < f2cmax(1,nrowa)) {
        info = 8;
    } else if (*ldb < f2cmax(1,nrowb)) {
        info = 10;
    } else if (*ldc < f2cmax(1,*m)) {
        info = 13;
    }
    if (info != 0) {
        xerbla_("SGEMM ", &info);
        return 0;
    }
/*     Quick return if possible. */
    if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
        return 0;
    }
/*     And if  alpha.eq.zero. */
    if (*alpha == 0.f) {
        if (*beta == 0.f) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    c___ref(i__, j) = 0.f;
/* L10: */
                }
/* L20: */
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    c___ref(i__, j) = *beta * c___ref(i__, j);
/* L30: */
                }
/* L40: */
            }
        }
        return 0;
    }
/*     Start the operations. */
    if (notb) {
        if (nota) {
/*           Form  C := alpha*A*B + beta*C. */
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (*beta == 0.f) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c___ref(i__, j) = 0.f;
/* L50: */
                    }
                } else if (*beta != 1.f) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c___ref(i__, j) = *beta * c___ref(i__, j);
/* L60: */
                    }
                }
                i__2 = *k;
                for (l = 1; l <= i__2; ++l) {
                    if (b_ref(l, j) != 0.f) {
                        temp = *alpha * b_ref(l, j);
                        i__3 = *m;
                        for (i__ = 1; i__ <= i__3; ++i__) {
                            c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
                                    i__, l);
/* L70: */
                        }
                    }
/* L80: */
                }
/* L90: */
            }
        } else {
/*           Form  C := alpha*A'*B + beta*C */
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp = 0.f;
                    i__3 = *k;
                    for (l = 1; l <= i__3; ++l) {
                        temp += a_ref(l, i__) * b_ref(l, j);
/* L100: */
                    }
                    if (*beta == 0.f) {
                        c___ref(i__, j) = *alpha * temp;
                    } else {
                        c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
                                 j);
                    }
/* L110: */
                }
/* L120: */
            }
        }
    } else {
        if (nota) {
/*           Form  C := alpha*A*B' + beta*C */
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (*beta == 0.f) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c___ref(i__, j) = 0.f;
/* L130: */
                    }
                } else if (*beta != 1.f) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c___ref(i__, j) = *beta * c___ref(i__, j);
/* L140: */
                    }
                }
                i__2 = *k;
                for (l = 1; l <= i__2; ++l) {
                    if (b_ref(j, l) != 0.f) {
                        temp = *alpha * b_ref(j, l);
                        i__3 = *m;
                        for (i__ = 1; i__ <= i__3; ++i__) {
                            c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
                                    i__, l);
/* L150: */
                        }
                    }
/* L160: */
                }
/* L170: */
            }
        } else {
/*           Form  C := alpha*A'*B' + beta*C */
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp = 0.f;
                    i__3 = *k;
                    for (l = 1; l <= i__3; ++l) {
                        temp += a_ref(l, i__) * b_ref(j, l);
/* L180: */
                    }
                    if (*beta == 0.f) {
                        c___ref(i__, j) = *alpha * temp;
                    } else {
                        c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
                                 j);
                    }
/* L190: */
                }
/* L200: */
            }
        }
    }
    return 0;
/*     End of SGEMM . */
} /* sgemm_ */
int sgemv_ ( const char *  trans,
integer m,
integer n,
real alpha,
real a,
integer lda,
real x,
integer incx,
real beta,
real y,
integer incy 
)

Definition at line 1624 of file lapackblas.cpp.

References a_ref, f2cmax, lsame_(), x, xerbla_(), and y.

Referenced by EMAN::PCAlarge::analyze(), EMAN::PCA::dopca_lan(), EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), slabrd_(), slaeda_(), slarf_(), slarft_(), and slatrd_().

{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    static integer info;
    static real temp;
    static integer lenx, leny, i__, j;
    extern logical lsame_(const char *, const char *);
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int xerbla_(const char *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
/*  Purpose   
    =======   
    SGEMV  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is an   
    m by n matrix.   
    Parameters   
    ==========   
    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
             Unchanged on exit.   
    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   
    ALPHA  - REAL            .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   
    A      - REAL             array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   
             Unchanged on exit.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program. LDA must be at least   
             f2cmax( 1, m ).   
             Unchanged on exit.   
    X      - REAL             array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   
             Unchanged on exit.   
    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   
    BETA   - REAL            .   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   
    Y      - REAL             array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the   
             updated vector y.   
    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   
    Level 2 Blas routine.   
    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   
       Test the input parameters.   
       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --x;
    --y;
    /* Function Body */
    info = 0;
    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
            ) {
        info = 1;
    } else if (*m < 0) {
        info = 2;
    } else if (*n < 0) {
        info = 3;
    } else if (*lda < f2cmax(1,*m)) {
        info = 6;
    } else if (*incx == 0) {
        info = 8;
    } else if (*incy == 0) {
        info = 11;
    }
    if (info != 0) {
        xerbla_("SGEMV ", &info);
        return 0;
    }
/*     Quick return if possible. */
    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
        return 0;
    }
/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set   
       up the start points in  X  and  Y. */
    if (lsame_(trans, "N")) {
        lenx = *n;
        leny = *m;
    } else {
        lenx = *m;
        leny = *n;
    }
    if (*incx > 0) {
        kx = 1;
    } else {
        kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
        ky = 1;
    } else {
        ky = 1 - (leny - 1) * *incy;
    }
/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A.   
       First form  y := beta*y. */
    if (*beta != 1.f) {
        if (*incy == 1) {
            if (*beta == 0.f) {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[i__] = 0.f;
/* L10: */
                }
            } else {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[i__] = *beta * y[i__];
/* L20: */
                }
            }
        } else {
            iy = ky;
            if (*beta == 0.f) {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[iy] = 0.f;
                    iy += *incy;
/* L30: */
                }
            } else {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[iy] = *beta * y[iy];
                    iy += *incy;
/* L40: */
                }
            }
        }
    }
    if (*alpha == 0.f) {
        return 0;
    }
    if (lsame_(trans, "N")) {
/*        Form  y := alpha*A*x + y. */
        jx = kx;
        if (*incy == 1) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (x[jx] != 0.f) {
                    temp = *alpha * x[jx];
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        y[i__] += temp * a_ref(i__, j);
/* L50: */
                    }
                }
                jx += *incx;
/* L60: */
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (x[jx] != 0.f) {
                    temp = *alpha * x[jx];
                    iy = ky;
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        y[iy] += temp * a_ref(i__, j);
                        iy += *incy;
/* L70: */
                    }
                }
                jx += *incx;
/* L80: */
            }
        }
    } else {
/*        Form  y := alpha*A'*x + y. */
        jy = ky;
        if (*incx == 1) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = 0.f;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp += a_ref(i__, j) * x[i__];
/* L90: */
                }
                y[jy] += *alpha * temp;
                jy += *incy;
/* L100: */
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = 0.f;
                ix = kx;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp += a_ref(i__, j) * x[ix];
                    ix += *incx;
/* L110: */
                }
                y[jy] += *alpha * temp;
                jy += *incy;
/* L120: */
            }
        }
    }
    return 0;
/*     End of SGEMV . */
} /* sgemv_ */
int sgeqr2_ ( integer m,
integer n,
real a,
integer lda,
real tau,
real work,
integer info 
)

Definition at line 24684 of file lapackblas.cpp.

References a_ref, f2cmax, f2cmin, slarf_(), slarfg_(), and xerbla_().

Referenced by sgeqrf_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SGEQR2 computes a QR factorization of a real m by n matrix A:   
    A = Q * R.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the m by n matrix A.   
            On exit, the elements on and above the diagonal of the array   
            contain the min(m,n) by n upper trapezoidal matrix R (R is   
            upper triangular if m >= n); the elements below the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of elementary reflectors (see Further Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace) REAL array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(1) H(2) . . . H(k), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer i__, k;
    extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 
            integer *, real *, real *, integer *, real *), xerbla_(
            const char *, integer *), slarfg_(integer *, real *, real *, 
            integer *, real *);
    static real aii;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SGEQR2", &i__1);
        return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)   

   Computing MIN */
        i__2 = i__ + 1;
        i__3 = *m - i__ + 1;
        slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1, &
                tau[i__]);
        if (i__ < *n) {

/*           Apply H(i) to A(i:m,i+1:n) from the left */

            aii = a_ref(i__, i__);
            a_ref(i__, i__) = 1.f;
            i__2 = *m - i__ + 1;
            i__3 = *n - i__;
            slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tau[i__], &
                    a_ref(i__, i__ + 1), lda, &work[1]);
            a_ref(i__, i__) = aii;
        }
/* L10: */
    }
    return 0;

/*     End of SGEQR2 */

} /* sgeqr2_ */
int sgeqrf_ ( integer m,
integer n,
real a,
integer lda,
real tau,
real work,
integer lwork,
integer info 
)

Definition at line 23835 of file lapackblas.cpp.

References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), nx, sgeqr2_(), slarfb_(), slarft_(), and xerbla_().

Referenced by sgesvd_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SGEQRF computes a QR factorization of a real M-by-N matrix A:   
    A = Q * R.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, the elements on and above the diagonal of the array   
            contain the min(M,N)-by-N upper trapezoidal matrix R (R is   
            upper triangular if m >= n); the elements below the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of min(m,n) elementary reflectors (see Further   
            Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,N).   
            For optimum performance LWORK >= N*NB, where NB is   
            the optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(1) H(2) . . . H(k), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__3 = 3;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer 
            *, real *, real *, integer *);
    static integer ib, nb, nx;
    extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 
            integer *, integer *, integer *, real *, integer *, real *, 
            integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 
            integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 
            real *, integer *, real *, real *, integer *);
    static integer ldwork, lwkopt;
    static logical lquery;
    static integer iws;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
            1);
    lwkopt = *n * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -4;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
        *info = -7;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SGEQRF", &i__1);
        return 0;
    } else if (lquery) {
        return 0;
    }

/*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
        work[1] = 1.f;
        return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {

/*        Determine when to cross over from blocked to unblocked code.   

   Computing MAX */
        i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
                ftnlen)6, (ftnlen)1);
        nx = f2cmax(i__1,i__2);
        if (nx < k) {

/*           Determine if workspace is large enough for blocked code. */

            ldwork = *n;
            iws = ldwork * nb;
            if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  reduce NB and   
                determine the minimum value of NB. */

                nb = *lwork / ldwork;
/* Computing MAX */
                i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
                        c_n1, (ftnlen)6, (ftnlen)1);
                nbmin = f2cmax(i__1,i__2);
            }
        }
    }

    if (nb >= nbmin && nb < k && nx < k) {

/*        Use blocked code initially */

        i__1 = k - nx;
        i__2 = nb;
        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
            i__3 = k - i__ + 1;
            ib = f2cmin(i__3,nb);

/*           Compute the QR factorization of the current block   
             A(i:m,i:i+ib-1) */

            i__3 = *m - i__ + 1;
            sgeqr2_(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
                    iinfo);
            if (i__ + ib <= *n) {

/*              Form the triangular factor of the block reflector   
                H = H(i) H(i+1) . . . H(i+ib-1) */

                i__3 = *m - i__ + 1;
                slarft_("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__),
                         lda, &tau[i__], &work[1], &ldwork);

/*              Apply H' to A(i:m,i+ib:n) from the left */

                i__3 = *m - i__ + 1;
                i__4 = *n - i__ - ib + 1;
                slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
                        i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
                        a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork);
            }
/* L10: */
        }
    } else {
        i__ = 1;
    }

/*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
        i__2 = *m - i__ + 1;
        i__1 = *n - i__ + 1;
        sgeqr2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
                iinfo);
    }

    work[1] = (real) iws;
    return 0;

/*     End of SGEQRF */

} /* sgeqrf_ */
int sger_ ( integer m,
integer n,
real alpha,
real x,
integer incx,
real y,
integer incy,
real a,
integer lda 
)

Definition at line 1870 of file lapackblas.cpp.

References a_ref, f2cmax, x, xerbla_(), and y.

Referenced by slarf_().

{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    static integer info;
    static real temp;
    static integer i__, j, ix, jy, kx;
    extern /* Subroutine */ int xerbla_(const char *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
/*  Purpose   
    =======   
    SGER   performs the rank 1 operation   
       A := alpha*x*y' + A,   
    where alpha is a scalar, x is an m element vector, y is an n element   
    vector and A is an m by n matrix.   
    Parameters   
    ==========   
    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   
    ALPHA  - REAL            .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   
    X      - REAL             array of dimension at least   
             ( 1 + ( m - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the m   
             element vector x.   
             Unchanged on exit.   
    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   
    Y      - REAL             array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   
    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   
    A      - REAL             array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients. On exit, A is   
             overwritten by the updated matrix.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program. LDA must be at least   
             f2cmax( 1, m ).   
             Unchanged on exit.   
    Level 2 Blas routine.   
    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   
       Test the input parameters.   
       Parameter adjustments */
    --x;
    --y;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    /* Function Body */
    info = 0;
    if (*m < 0) {
        info = 1;
    } else if (*n < 0) {
        info = 2;
    } else if (*incx == 0) {
        info = 5;
    } else if (*incy == 0) {
        info = 7;
    } else if (*lda < f2cmax(1,*m)) {
        info = 9;
    }
    if (info != 0) {
        xerbla_("SGER  ", &info);
        return 0;
    }
/*     Quick return if possible. */
    if (*m == 0 || *n == 0 || *alpha == 0.f) {
        return 0;
    }
/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */
    if (*incy > 0) {
        jy = 1;
    } else {
        jy = 1 - (*n - 1) * *incy;
    }
    if (*incx == 1) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            if (y[jy] != 0.f) {
                temp = *alpha * y[jy];
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp;
/* L10: */
                }
            }
            jy += *incy;
/* L20: */
        }
    } else {
        if (*incx > 0) {
            kx = 1;
        } else {
            kx = 1 - (*m - 1) * *incx;
        }
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            if (y[jy] != 0.f) {
                temp = *alpha * y[jy];
                ix = kx;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp;
                    ix += *incx;
/* L30: */
                }
            }
            jy += *incy;
/* L40: */
        }
    }
    return 0;
/*     End of SGER  . */
} /* sger_ */
int sgesvd_ ( char *  jobu,
char *  jobvt,
integer m,
integer n,
real a,
integer lda,
real s,
real u,
integer ldu,
real vt,
integer ldvt,
real work,
integer lwork,
integer info 
)

Definition at line 16378 of file lapackblas.cpp.

References a_ref, f2cmax, f2cmin, ierr, ilaenv_(), lsame_(), s_cat(), sbdsqr_(), sgebrd_(), sgelqf_(), sgemm_(), sgeqrf_(), slacpy_(), slamch_(), slange_(), slascl_(), slaset_(), sorgbr_(), sorglq_(), sorgqr_(), sormbr_(), sqrt(), u_ref, vt_ref, and xerbla_().

Referenced by svd().

{
    /* System generated locals */
    typedef const char *address;

    address a__1[2];
    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
            i__2, i__3, i__4;
    char ch__1[2];

    /* Builtin functions   
       Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen);
    //double sqrt(doublereal);

    /* Local variables */
    static integer iscl;
    static real anrm;
    static integer ierr, itau, ncvt, nrvt, i__;
    extern logical lsame_(const char *, const char *);
    static integer chunk;
    extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 
            integer *, real *, real *, integer *, real *, integer *, real *, 
            real *, integer *);
    static integer minmn, wrkbl, itaup, itauq, mnthr, iwork;
    static logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
    static integer ie, ir, bdspac, iu;
    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
            *, real *, real *, real *, real *, real *, integer *, integer *);
    extern doublereal slamch_(const char *), slange_(const char *, integer *, 
            integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 
            integer *, integer *, ftnlen, ftnlen);
    static real bignum;
    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
            *, real *, real *, integer *, integer *), slascl_(const char *, integer 
            *, integer *, real *, real *, integer *, integer *, real *, 
            integer *, integer *), sgeqrf_(integer *, integer *, real 
            *, integer *, real *, real *, integer *, integer *), slacpy_(const char 
            *, integer *, integer *, real *, integer *, real *, integer *), slaset_(const char *, integer *, integer *, real *, real *, 
            real *, integer *), sbdsqr_(const char *, integer *, integer *, 
            integer *, integer *, real *, real *, real *, integer *, real *, 
            integer *, real *, integer *, real *, integer *), sorgbr_(
            const char *, integer *, integer *, integer *, real *, integer *, real *
            , real *, integer *, integer *), sormbr_(const char *, const char *, 
            const char *, integer *, integer *, integer *, real *, integer *, real *
            , real *, integer *, real *, integer *, integer *);
    static integer ldwrkr, minwrk, ldwrku, maxwrk;
    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real 
            *, integer *, real *, real *, integer *, integer *);
    static real smlnum;
    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
            *, integer *, real *, real *, integer *, integer *);
    static logical lquery, wntuas, wntvas;
    static integer blk, ncu;
    static real dum[1], eps;
    static integer nru;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SGESVD computes the singular value decomposition (SVD) of a real   
    M-by-N matrix A, optionally computing the left and/or right singular   
    vectors. The SVD is written   

         A = U * SIGMA * transpose(V)   

    where SIGMA is an M-by-N matrix which is zero except for its   
    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and   
    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA   
    are the singular values of A; they are real and non-negative, and   
    are returned in descending order.  The first min(m,n) columns of   
    U and V are the left and right singular vectors of A.   

    Note that the routine returns V**T, not V.   

    Arguments   
    =========   

    JOBU    (input) CHARACTER*1   
            Specifies options for computing all or part of the matrix U:   
            = 'A':  all M columns of U are returned in array U:   
            = 'S':  the first min(m,n) columns of U (the left singular   
                    vectors) are returned in the array U;   
            = 'O':  the first min(m,n) columns of U (the left singular   
                    vectors) are overwritten on the array A;   
            = 'N':  no columns of U (no left singular vectors) are   
                    computed.   

    JOBVT   (input) CHARACTER*1   
            Specifies options for computing all or part of the matrix   
            V**T:   
            = 'A':  all N rows of V**T are returned in the array VT;   
            = 'S':  the first min(m,n) rows of V**T (the right singular   
                    vectors) are returned in the array VT;   
            = 'O':  the first min(m,n) rows of V**T (the right singular   
                    vectors) are overwritten on the array A;   
            = 'N':  no rows of V**T (no right singular vectors) are   
                    computed.   

            JOBVT and JOBU cannot both be 'O'.   

    M       (input) INTEGER   
            The number of rows of the input matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the input matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit,   
            if JOBU = 'O',  A is overwritten with the first min(m,n)   
                            columns of U (the left singular vectors,   
                            stored columnwise);   
            if JOBVT = 'O', A is overwritten with the first min(m,n)   
                            rows of V**T (the right singular vectors,   
                            stored rowwise);   
            if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A   
                            are destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    S       (output) REAL array, dimension (min(M,N))   
            The singular values of A, sorted so that S(i) >= S(i+1).   

    U       (output) REAL array, dimension (LDU,UCOL)   
            (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.   
            If JOBU = 'A', U contains the M-by-M orthogonal matrix U;   
            if JOBU = 'S', U contains the first min(m,n) columns of U   
            (the left singular vectors, stored columnwise);   
            if JOBU = 'N' or 'O', U is not referenced.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= 1; if   
            JOBU = 'S' or 'A', LDU >= M.   

    VT      (output) REAL array, dimension (LDVT,N)   
            If JOBVT = 'A', VT contains the N-by-N orthogonal matrix   
            V**T;   
            if JOBVT = 'S', VT contains the first min(m,n) rows of   
            V**T (the right singular vectors, stored rowwise);   
            if JOBVT = 'N' or 'O', VT is not referenced.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.  LDVT >= 1; if   
            JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;   
            if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged   
            superdiagonal elements of an upper bidiagonal matrix B   
            whose diagonal is in S (not necessarily sorted). B   
            satisfies A = U * B * VT, so it has the same singular values   
            as A, and singular vectors related by U and VT.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= 1.   
            LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).   
            For good performance, LWORK should generally be larger.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if SBDSQR did not converge, INFO specifies how many   
                  superdiagonals of an intermediate bidiagonal form B   
                  did not converge to zero. See the description of WORK   
                  above for details.   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --s;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --work;

    /* Function Body */
    *info = 0;
    minmn = f2cmin(*m,*n);
/* Writing concatenation */
    i__1[0] = 1, a__1[0] = jobu;
    i__1[1] = 1, a__1[1] = jobvt;
    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
    mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (ftnlen)6, (
            ftnlen)2);
    wntua = lsame_(jobu, "A");
    wntus = lsame_(jobu, "S");
    wntuas = wntua || wntus;
    wntuo = lsame_(jobu, "O");
    wntun = lsame_(jobu, "N");
    wntva = lsame_(jobvt, "A");
    wntvs = lsame_(jobvt, "S");
    wntvas = wntva || wntvs;
    wntvo = lsame_(jobvt, "O");
    wntvn = lsame_(jobvt, "N");
    minwrk = 1;
    lquery = *lwork == -1;

    if (! (wntua || wntus || wntuo || wntun)) {
        *info = -1;
    } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
        *info = -2;
    } else if (*m < 0) {
        *info = -3;
    } else if (*n < 0) {
        *info = -4;
    } else if (*lda < f2cmax(1,*m)) {
        *info = -6;
    } else if (*ldu < 1 || wntuas && *ldu < *m) {
        *info = -9;
    } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
        *info = -11;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV.) */

    if (*info == 0 && (*lwork >= 1 || lquery) && *m > 0 && *n > 0) {
        if (*m >= *n) {

/*           Compute space needed for SBDSQR */

            bdspac = *n * 5;
            if (*m >= mnthr) {
                if (wntun) {

/*                 Path 1 (M much larger than N, JOBU='N') */

                    maxwrk = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                    if (wntvo || wntvas) {
/* Computing MAX */
                        i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(&
                                c__1, "SORGBR", "P", n, n, n, &c_n1, (ftnlen)
                                6, (ftnlen)1);
                        maxwrk = f2cmax(i__2,i__3);
                    }
                    maxwrk = f2cmax(maxwrk,bdspac);
/* Computing MAX */
                    i__2 = *n << 2;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntuo && wntvn) {

/*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 
                            " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
/* Computing MAX */
                    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
                    maxwrk = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntuo && wntvas) {

/*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or   
                   'A') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 
                            " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
/* Computing MAX */
                    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
                    maxwrk = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntus && wntvn) {

/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 
                            " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *n * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntus && wntvo) {

/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 
                            " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = (*n << 1) * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntus && wntvas) {

/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or   
                   'A') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 
                            " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *n * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntua && wntvn) {

/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 
                            " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *n * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntua && wntvo) {

/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 
                            " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = (*n << 1) * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntua && wntvas) {

/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or   
                   'A') */

                    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 
                            " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
                            , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *n * *n + wrkbl;
/* Computing MAX */
                    i__2 = *n * 3 + *m;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                }
            } else {

/*              Path 10 (M at least N, but not much larger) */

                maxwrk = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
                         n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
                if (wntus || wntuo) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORG"
                            "BR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                if (wntua) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *n * 3 + *m * ilaenv_(&c__1, "SORG"
                            "BR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                if (! wntvn) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                maxwrk = f2cmax(maxwrk,bdspac);
/* Computing MAX */
                i__2 = *n * 3 + *m;
                minwrk = f2cmax(i__2,bdspac);
                maxwrk = f2cmax(maxwrk,minwrk);
            }
        } else {

/*           Compute space needed for SBDSQR */

            bdspac = *m * 5;
            if (*n >= mnthr) {
                if (wntvn) {

/*                 Path 1t(N much larger than M, JOBVT='N') */

                    maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                    if (wntuo || wntuas) {
/* Computing MAX */
                        i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1, 
                                "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, (
                                ftnlen)1);
                        maxwrk = f2cmax(i__2,i__3);
                    }
                    maxwrk = f2cmax(maxwrk,bdspac);
/* Computing MAX */
                    i__2 = *m << 2;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntvo && wntun) {

/*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 
                            " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
/* Computing MAX */
                    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
                    maxwrk = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntvo && wntuas) {

/*                 Path 3t(N much larger than M, JOBU='S' or 'A',   
                   JOBVT='O') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 
                            " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
                            , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
/* Computing MAX */
                    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
                    maxwrk = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntvs && wntun) {

/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 
                            " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *m * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntvs && wntuo) {

/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 
                            " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
                            , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = (*m << 1) * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntvs && wntuas) {

/*                 Path 6t(N much larger than M, JOBU='S' or 'A',   
                   JOBVT='S') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 
                            " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
                            , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *m * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntva && wntun) {

/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 
                            " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *m * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntva && wntuo) {

/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 
                            " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
                            , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = (*m << 1) * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                } else if (wntva && wntuas) {

/*                 Path 9t(N much larger than M, JOBU='S' or 'A',   
                   JOBVT='A') */

                    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
                            c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 
                            " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 
                            "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
                            ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    wrkbl = f2cmax(i__2,i__3);
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
                            , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    wrkbl = f2cmax(i__2,i__3);
                    wrkbl = f2cmax(wrkbl,bdspac);
                    maxwrk = *m * *m + wrkbl;
/* Computing MAX */
                    i__2 = *m * 3 + *n;
                    minwrk = f2cmax(i__2,bdspac);
                    maxwrk = f2cmax(maxwrk,minwrk);
                }
            } else {

/*              Path 10t(N greater than M, but not much larger) */

                maxwrk = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
                         n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
                if (wntvs || wntvo) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORG"
                            "BR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                if (wntva) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *m * 3 + *n * ilaenv_(&c__1, "SORG"
                            "BR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                if (! wntun) {
/* Computing MAX */
                    i__2 = maxwrk, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 
                            "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
                            1);
                    maxwrk = f2cmax(i__2,i__3);
                }
                maxwrk = f2cmax(maxwrk,bdspac);
/* Computing MAX */
                i__2 = *m * 3 + *n;
                minwrk = f2cmax(i__2,bdspac);
                maxwrk = f2cmax(maxwrk,minwrk);
            }
        }
        work[1] = (real) maxwrk;
    }

    if (*lwork < minwrk && ! lquery) {
        *info = -13;
    }
    if (*info != 0) {
        i__2 = -(*info);
        xerbla_("SGESVD", &i__2);
        return 0;
    } else if (lquery) {
        return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
        if (*lwork >= 1) {
            work[1] = 1.f;
        }
        return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = sqrt(slamch_("S")) / eps;
    bignum = 1.f / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
    iscl = 0;
    if (anrm > 0.f && anrm < smlnum) {
        iscl = 1;
        slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
                ierr);
    } else if (anrm > bignum) {
        iscl = 1;
        slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
                ierr);
    }

    if (*m >= *n) {

/*        A has at least as many rows as columns. If A has sufficiently   
          more rows than columns, first reduce using the QR   
          decomposition (if sufficient workspace available) */

        if (*m >= mnthr) {

            if (wntun) {

/*              Path 1 (M much larger than N, JOBU='N')   
                No left singular vectors to be computed */

                itau = 1;
                iwork = itau + *n;

/*              Compute A=Q*R   
                (Workspace: need 2*N, prefer N+N*NB) */

                i__2 = *lwork - iwork + 1;
                sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
                        i__2, &ierr);

/*              Zero out below R */

                i__2 = *n - 1;
                i__3 = *n - 1;
                slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 1), 
                        lda);
                ie = 1;
                itauq = ie + *n;
                itaup = itauq + *n;
                iwork = itaup + *n;

/*              Bidiagonalize R in A   
                (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                i__2 = *lwork - iwork + 1;
                sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
                        itauq], &work[itaup], &work[iwork], &i__2, &ierr);
                ncvt = 0;
                if (wntvo || wntvas) {

/*                 If right singular vectors desired, generate P'.   
                   (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
                            work[iwork], &i__2, &ierr);
                    ncvt = *n;
                }
                iwork = ie + *n;

/*              Perform bidiagonal QR iteration, computing right   
                singular vectors of A in A if desired   
                (Workspace: need BDSPAC) */

                sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
                        a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], 
                        info);

/*              If right singular vectors desired in VT, copy them there */

                if (wntvas) {
                    slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 
                            ldvt);
                }

            } else if (wntuo && wntvn) {

/*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')   
                N left singular vectors to be overwritten on A and   
                no right singular vectors to be computed   

   Computing MAX */
                i__2 = *n << 2;
                if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {

/*                 Sufficient workspace for a fast algorithm */

                    ir = 1;
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *lda * *n + *n;
                    if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {

/*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N */

                        ldwrku = *lda;
                        ldwrkr = *lda;
                    } else /* if(complicated condition) */ {
/* Computing MAX */
                        i__2 = wrkbl, i__3 = *lda * *n + *n;
                        if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {

/*                    WORK(IU) is LDA by N, WORK(IR) is N by N */

                            ldwrku = *lda;
                            ldwrkr = *n;
                        } else {

/*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N */

                            ldwrku = (*lwork - *n * *n - *n) / *n;
                            ldwrkr = *n;
                        }
                    }
                    itau = ir + ldwrkr * *n;
                    iwork = itau + *n;

/*                 Compute A=Q*R   
                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__2, &ierr);

/*                 Copy R to WORK(IR) and zero out below it */

                    slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
                    i__2 = *n - 1;
                    i__3 = *n - 1;
                    slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir + 1]
                            , &ldwrkr);

/*                 Generate Q in A   
                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__2, &ierr);
                    ie = itau;
                    itauq = ie + *n;
                    itaup = itauq + *n;
                    iwork = itaup + *n;

/*                 Bidiagonalize R in WORK(IR)   
                   (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__2, &ierr);

/*                 Generate left vectors bidiagonalizing R   
                   (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
                            work[iwork], &i__2, &ierr);
                    iwork = ie + *n;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of R in WORK(IR)   
                   (Workspace: need N*N+BDSPAC) */

                    sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
                            c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
                            , info);
                    iu = ie + *n;

/*                 Multiply Q in A by left singular vectors of R in   
                   WORK(IR), storing result in WORK(IU) and copying to A   
                   (Workspace: need N*N+2*N, prefer N*N+M*N+N) */

                    i__2 = *m;
                    i__3 = ldwrku;
                    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
                             i__3) {
/* Computing MIN */
                        i__4 = *m - i__ + 1;
                        chunk = f2cmin(i__4,ldwrku);
                        sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1)
                                , lda, &work[ir], &ldwrkr, &c_b416, &work[iu],
                                 &ldwrku);
                        slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref(
                                i__, 1), lda);
/* L10: */
                    }

                } else {

/*                 Insufficient workspace for a fast algorithm */

                    ie = 1;
                    itauq = ie + *n;
                    itaup = itauq + *n;
                    iwork = itaup + *n;

/*                 Bidiagonalize A   
                   (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__3, &ierr);

/*                 Generate left vectors bidiagonalizing A   
                   (Workspace: need 4*N, prefer 3*N+N*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
                            work[iwork], &i__3, &ierr);
                    iwork = ie + *n;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of A in A   
                   (Workspace: need BDSPAC) */

                    sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
                            c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
                             info);

                }

            } else if (wntuo && wntvas) {

/*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')   
                N left singular vectors to be overwritten on A and   
                N right singular vectors to be computed in VT   

   Computing MAX */
                i__3 = *n << 2;
                if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {

/*                 Sufficient workspace for a fast algorithm */

                    ir = 1;
/* Computing MAX */
                    i__3 = wrkbl, i__2 = *lda * *n + *n;
                    if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {

/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N */

                        ldwrku = *lda;
                        ldwrkr = *lda;
                    } else /* if(complicated condition) */ {
/* Computing MAX */
                        i__3 = wrkbl, i__2 = *lda * *n + *n;
                        if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {

/*                    WORK(IU) is LDA by N and WORK(IR) is N by N */

                            ldwrku = *lda;
                            ldwrkr = *n;
                        } else {

/*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N */

                            ldwrku = (*lwork - *n * *n - *n) / *n;
                            ldwrkr = *n;
                        }
                    }
                    itau = ir + ldwrkr * *n;
                    iwork = itau + *n;

/*                 Compute A=Q*R   
                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__3, &ierr);

/*                 Copy R to VT, zeroing out below it */

                    slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
                            ldvt);
                    i__3 = *n - 1;
                    i__2 = *n - 1;
                    slaset_("L", &i__3, &i__2, &c_b416, &c_b416, &vt_ref(2, 1)
                            , ldvt);

/*                 Generate Q in A   
                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__3, &ierr);
                    ie = itau;
                    itauq = ie + *n;
                    itaup = itauq + *n;
                    iwork = itaup + *n;

/*                 Bidiagonalize R in VT, copying result to WORK(IR)   
                   (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
                            work[itauq], &work[itaup], &work[iwork], &i__3, &
                            ierr);
                    slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
                            ldwrkr);

/*                 Generate left vectors bidiagonalizing R in WORK(IR)   
                   (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
                            work[iwork], &i__3, &ierr);

/*                 Generate right vectors bidiagonalizing R in VT   
                   (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
                            &work[iwork], &i__3, &ierr);
                    iwork = ie + *n;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of R in WORK(IR) and computing right   
                   singular vectors of R in VT   
                   (Workspace: need N*N+BDSPAC) */

                    sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
                            vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, 
                            &work[iwork], info);
                    iu = ie + *n;

/*                 Multiply Q in A by left singular vectors of R in   
                   WORK(IR), storing result in WORK(IU) and copying to A   
                   (Workspace: need N*N+2*N, prefer N*N+M*N+N) */

                    i__3 = *m;
                    i__2 = ldwrku;
                    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
                             i__2) {
/* Computing MIN */
                        i__4 = *m - i__ + 1;
                        chunk = f2cmin(i__4,ldwrku);
                        sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1)
                                , lda, &work[ir], &ldwrkr, &c_b416, &work[iu],
                                 &ldwrku);
                        slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref(
                                i__, 1), lda);
/* L20: */
                    }

                } else {

/*                 Insufficient workspace for a fast algorithm */

                    itau = 1;
                    iwork = itau + *n;

/*                 Compute A=Q*R   
                   (Workspace: need 2*N, prefer N+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__2, &ierr);

/*                 Copy R to VT, zeroing out below it */

                    slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
                            ldvt);
                    i__2 = *n - 1;
                    i__3 = *n - 1;
                    slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(2, 1)
                            , ldvt);

/*                 Generate Q in A   
                   (Workspace: need 2*N, prefer N+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__2, &ierr);
                    ie = itau;
                    itauq = ie + *n;
                    itaup = itauq + *n;
                    iwork = itaup + *n;

/*                 Bidiagonalize R in VT   
                   (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
                            work[itauq], &work[itaup], &work[iwork], &i__2, &
                            ierr);

/*                 Multiply Q in A by left vectors bidiagonalizing R   
                   (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
                            work[itauq], &a[a_offset], lda, &work[iwork], &
                            i__2, &ierr);

/*                 Generate right vectors bidiagonalizing R in VT   
                   (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
                            &work[iwork], &i__2, &ierr);
                    iwork = ie + *n;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of A in A and computing right   
                   singular vectors of A in VT   
                   (Workspace: need BDSPAC) */

                    sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
                            vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
                            work[iwork], info);

                }

            } else if (wntus) {

                if (wntvn) {

/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')   
                   N left singular vectors to be computed in U and   
                   no right singular vectors to be computed   

   Computing MAX */
                    i__2 = *n << 2;
                    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        ir = 1;
                        if (*lwork >= wrkbl + *lda * *n) {

/*                       WORK(IR) is LDA by N */

                            ldwrkr = *lda;
                        } else {

/*                       WORK(IR) is N by N */

                            ldwrkr = *n;
                        }
                        itau = ir + ldwrkr * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy R to WORK(IR), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
                                ldwrkr);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir 
                                + 1], &ldwrkr);

/*                    Generate Q in A   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IR)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Generate left vectors bidiagonalizing R in WORK(IR)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IR)   
                      (Workspace: need N*N+BDSPAC) */

                        sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
                                dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply Q in A by left singular vectors of R in   
                      WORK(IR), storing result in U   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 
                                &work[ir], &ldwrkr, &c_b416, &u[u_offset], 
                                ldu);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Zero out below R in A */

                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
                                 1), lda);

/*                    Bidiagonalize R in A   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left vectors bidiagonalizing R   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
                                work[itauq], &u[u_offset], ldu, &work[iwork], 
                                &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
                                dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
                                work[iwork], info);

                    }

                } else if (wntvo) {

/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')   
                   N left singular vectors to be computed in U and   
                   N right singular vectors to be overwritten on A   

   Computing MAX */
                    i__2 = *n << 2;
                    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + (*lda << 1) * *n) {

/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *lda;
                        } else if (*lwork >= wrkbl + (*lda + *n) * *n) {

/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *n;
                        } else {

/*                       WORK(IU) is N by N and WORK(IR) is N by N */

                            ldwrku = *n;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *n;
                        }
                        itau = ir + ldwrkr * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R   
                      (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy R to WORK(IU), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + 1], &ldwrku);

/*                    Generate Q in A   
                      (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IU), copying result to   
                      WORK(IR)   
                      (Workspace: need 2*N*N+4*N,   
                                  prefer 2*N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
                                ldwrkr);

/*                    Generate left bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need 2*N*N+4*N-1,   
                                  prefer 2*N*N+3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IU) and computing   
                      right singular vectors of R in WORK(IR)   
                      (Workspace: need 2*N*N+BDSPAC) */

                        sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
                                ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
                                &work[iwork], info);

/*                    Multiply Q in A by left singular vectors of R in   
                      WORK(IU), storing result in U   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 
                                &work[iu], &ldwrku, &c_b416, &u[u_offset], 
                                ldu);

/*                    Copy right singular vectors of R to A   
                      (Workspace: need N*N) */

                        slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
                                lda);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Zero out below R in A */

                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
                                 1), lda);

/*                    Bidiagonalize R in A   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left vectors bidiagonalizing R   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
                                work[itauq], &u[u_offset], ldu, &work[iwork], 
                                &i__2, &ierr)
                                ;

/*                    Generate right vectors bidiagonalizing R in A   
                      (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in A   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
                                a_offset], lda, &u[u_offset], ldu, dum, &c__1,
                                 &work[iwork], info);

                    }

                } else if (wntvas) {

/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'   
                           or 'A')   
                   N left singular vectors to be computed in U and   
                   N right singular vectors to be computed in VT   

   Computing MAX */
                    i__2 = *n << 2;
                    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + *lda * *n) {

/*                       WORK(IU) is LDA by N */

                            ldwrku = *lda;
                        } else {

/*                       WORK(IU) is N by N */

                            ldwrku = *n;
                        }
                        itau = iu + ldwrku * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy R to WORK(IU), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + 1], &ldwrku);

/*                    Generate Q in A   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IU), copying result to VT   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
                                 ldvt);

/*                    Generate left bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in VT   
                      (Workspace: need N*N+4*N-1,   
                                  prefer N*N+3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
                                itaup], &work[iwork], &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IU) and computing   
                      right singular vectors of R in VT   
                      (Workspace: need N*N+BDSPAC) */

                        sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &work[iu], &ldwrku, dum, &
                                c__1, &work[iwork], info);

/*                    Multiply Q in A by left singular vectors of R in   
                      WORK(IU), storing result in U   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 
                                &work[iu], &ldwrku, &c_b416, &u[u_offset], 
                                ldu);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy R to VT, zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(
                                2, 1), ldvt);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in VT   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
                                &work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left bidiagonalizing vectors   
                      in VT   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
                                &work[itauq], &u[u_offset], ldu, &work[iwork],
                                 &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in VT   
                      (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
                                itaup], &work[iwork], &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &u[u_offset], ldu, dum, &
                                c__1, &work[iwork], info);

                    }

                }

            } else if (wntua) {

                if (wntvn) {

/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')   
                   M left singular vectors to be computed in U and   
                   no right singular vectors to be computed   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        ir = 1;
                        if (*lwork >= wrkbl + *lda * *n) {

/*                       WORK(IR) is LDA by N */

                            ldwrkr = *lda;
                        } else {

/*                       WORK(IR) is N by N */

                            ldwrkr = *n;
                        }
                        itau = ir + ldwrkr * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Copy R to WORK(IR), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
                                ldwrkr);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir 
                                + 1], &ldwrkr);

/*                    Generate Q in U   
                      (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IR)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IR)   
                      (Workspace: need N*N+BDSPAC) */

                        sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
                                dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply Q in U by left singular vectors of R in   
                      WORK(IR), storing result in A   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 
                                &work[ir], &ldwrkr, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy left singular vectors of A from A to U */

                        slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need N+M, prefer N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Zero out below R in A */

                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
                                 1), lda);

/*                    Bidiagonalize R in A   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left bidiagonalizing vectors   
                      in A   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
                                work[itauq], &u[u_offset], ldu, &work[iwork], 
                                &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
                                dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
                                work[iwork], info);

                    }

                } else if (wntvo) {

/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')   
                   M left singular vectors to be computed in U and   
                   N right singular vectors to be overwritten on A   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + (*lda << 1) * *n) {

/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *lda;
                        } else if (*lwork >= wrkbl + (*lda + *n) * *n) {

/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *n;
                        } else {

/*                       WORK(IU) is N by N and WORK(IR) is N by N */

                            ldwrku = *n;
                            ir = iu + ldwrku * *n;
                            ldwrkr = *n;
                        }
                        itau = ir + ldwrkr * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy R to WORK(IU), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + 1], &ldwrku);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IU), copying result to   
                      WORK(IR)   
                      (Workspace: need 2*N*N+4*N,   
                                  prefer 2*N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
                                ldwrkr);

/*                    Generate left bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need 2*N*N+4*N-1,   
                                  prefer 2*N*N+3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IU) and computing   
                      right singular vectors of R in WORK(IR)   
                      (Workspace: need 2*N*N+BDSPAC) */

                        sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
                                ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
                                &work[iwork], info);

/*                    Multiply Q in U by left singular vectors of R in   
                      WORK(IU), storing result in A   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 
                                &work[iu], &ldwrku, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy left singular vectors of A from A to U */

                        slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Copy right singular vectors of R from WORK(IR) to A */

                        slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
                                lda);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need N+M, prefer N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Zero out below R in A */

                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
                                 1), lda);

/*                    Bidiagonalize R in A   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left bidiagonalizing vectors   
                      in A   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
                                work[itauq], &u[u_offset], ldu, &work[iwork], 
                                &i__2, &ierr)
                                ;

/*                    Generate right bidiagonalizing vectors in A   
                      (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in A   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
                                a_offset], lda, &u[u_offset], ldu, dum, &c__1,
                                 &work[iwork], info);

                    }

                } else if (wntvas) {

/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'   
                           or 'A')   
                   M left singular vectors to be computed in U and   
                   N right singular vectors to be computed in VT   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + *lda * *n) {

/*                       WORK(IU) is LDA by N */

                            ldwrku = *lda;
                        } else {

/*                       WORK(IU) is N by N */

                            ldwrku = *n;
                        }
                        itau = iu + ldwrku * *n;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy R to WORK(IU), zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + 1], &ldwrku);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in WORK(IU), copying result to VT   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
                                 ldvt);

/*                    Generate left bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in VT   
                      (Workspace: need N*N+4*N-1,   
                                  prefer N*N+3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
                                itaup], &work[iwork], &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of R in WORK(IU) and computing   
                      right singular vectors of R in VT   
                      (Workspace: need N*N+BDSPAC) */

                        sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &work[iu], &ldwrku, dum, &
                                c__1, &work[iwork], info);

/*                    Multiply Q in U by left singular vectors of R in   
                      WORK(IU), storing result in A   
                      (Workspace: need N*N) */

                        sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 
                                &work[iu], &ldwrku, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy left singular vectors of A from A to U */

                        slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *n;

/*                    Compute A=Q*R, copying result to U   
                      (Workspace: need 2*N, prefer N+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
                                ldu);

/*                    Generate Q in U   
                      (Workspace: need N+M, prefer N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy R from A to VT, zeroing out below it */

                        slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);
                        i__2 = *n - 1;
                        i__3 = *n - 1;
                        slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(
                                2, 1), ldvt);
                        ie = itau;
                        itauq = ie + *n;
                        itaup = itauq + *n;
                        iwork = itaup + *n;

/*                    Bidiagonalize R in VT   
                      (Workspace: need 4*N, prefer 3*N+2*N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
                                &work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply Q in U by left bidiagonalizing vectors   
                      in VT   
                      (Workspace: need 3*N+M, prefer 3*N+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
                                &work[itauq], &u[u_offset], ldu, &work[iwork],
                                 &i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in VT   
                      (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
                                itaup], &work[iwork], &i__2, &ierr)
                                ;
                        iwork = ie + *n;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &u[u_offset], ldu, dum, &
                                c__1, &work[iwork], info);

                    }

                }

            }

        } else {

/*           M .LT. MNTHR   

             Path 10 (M at least N, but not much larger)   
             Reduce to bidiagonal form without QR decomposition */

            ie = 1;
            itauq = ie + *n;
            itaup = itauq + *n;
            iwork = itaup + *n;

/*           Bidiagonalize A   
             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */

            i__2 = *lwork - iwork + 1;
            sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
                    work[itaup], &work[iwork], &i__2, &ierr);
            if (wntuas) {

/*              If left singular vectors desired in U, copy result to U   
                and generate left bidiagonalizing vectors in U   
                (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */

                slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
                if (wntus) {
                    ncu = *n;
                }
                if (wntua) {
                    ncu = *m;
                }
                i__2 = *lwork - iwork + 1;
                sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
                        work[iwork], &i__2, &ierr);
            }
            if (wntvas) {

/*              If right singular vectors desired in VT, copy result to   
                VT and generate right bidiagonalizing vectors in VT   
                (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
                i__2 = *lwork - iwork + 1;
                sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
                        work[iwork], &i__2, &ierr);
            }
            if (wntuo) {

/*              If left singular vectors desired in A, generate left   
                bidiagonalizing vectors in A   
                (Workspace: need 4*N, prefer 3*N+N*NB) */

                i__2 = *lwork - iwork + 1;
                sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
                        iwork], &i__2, &ierr);
            }
            if (wntvo) {

/*              If right singular vectors desired in A, generate right   
                bidiagonalizing vectors in A   
                (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

                i__2 = *lwork - iwork + 1;
                sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
                        iwork], &i__2, &ierr);
            }
            iwork = ie + *n;
            if (wntuas || wntuo) {
                nru = *m;
            }
            if (wntun) {
                nru = 0;
            }
            if (wntvas || wntvo) {
                ncvt = *n;
            }
            if (wntvn) {
                ncvt = 0;
            }
            if (! wntuo && ! wntvo) {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in U and computing right singular   
                vectors in VT   
                (Workspace: need BDSPAC) */

                sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
                        vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
                        work[iwork], info);
            } else if (! wntuo && wntvo) {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in U and computing right singular   
                vectors in A   
                (Workspace: need BDSPAC) */

                sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
                        a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
                        iwork], info);
            } else {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in A and computing right singular   
                vectors in VT   
                (Workspace: need BDSPAC) */

                sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
                        vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
                        work[iwork], info);
            }

        }

    } else {

/*        A has more columns than rows. If A has sufficiently more   
          columns than rows, first reduce using the LQ decomposition (if   
          sufficient workspace available) */

        if (*n >= mnthr) {

            if (wntvn) {

/*              Path 1t(N much larger than M, JOBVT='N')   
                No right singular vectors to be computed */

                itau = 1;
                iwork = itau + *m;

/*              Compute A=L*Q   
                (Workspace: need 2*M, prefer M+M*NB) */

                i__2 = *lwork - iwork + 1;
                sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
                        i__2, &ierr);

/*              Zero out above L */

                i__2 = *m - 1;
                i__3 = *m - 1;
                slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 2), 
                        lda);
                ie = 1;
                itauq = ie + *m;
                itaup = itauq + *m;
                iwork = itaup + *m;

/*              Bidiagonalize L in A   
                (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                i__2 = *lwork - iwork + 1;
                sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
                        itauq], &work[itaup], &work[iwork], &i__2, &ierr);
                if (wntuo || wntuas) {

/*                 If left singular vectors desired, generate Q   
                   (Workspace: need 4*M, prefer 3*M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
                            work[iwork], &i__2, &ierr);
                }
                iwork = ie + *m;
                nru = 0;
                if (wntuo || wntuas) {
                    nru = *m;
                }

/*              Perform bidiagonal QR iteration, computing left singular   
                vectors of A in A if desired   
                (Workspace: need BDSPAC) */

                sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
                        c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 
                        info);

/*              If left singular vectors desired in U, copy them there */

                if (wntuas) {
                    slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
                }

            } else if (wntvo && wntun) {

/*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')   
                M right singular vectors to be overwritten on A and   
                no left singular vectors to be computed   

   Computing MAX */
                i__2 = *m << 2;
                if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {

/*                 Sufficient workspace for a fast algorithm */

                    ir = 1;
/* Computing MAX */
                    i__2 = wrkbl, i__3 = *lda * *n + *m;
                    if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {

/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */

                        ldwrku = *lda;
                        chunk = *n;
                        ldwrkr = *lda;
                    } else /* if(complicated condition) */ {
/* Computing MAX */
                        i__2 = wrkbl, i__3 = *lda * *n + *m;
                        if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {

/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */

                            ldwrku = *lda;
                            chunk = *n;
                            ldwrkr = *m;
                        } else {

/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */

                            ldwrku = *m;
                            chunk = (*lwork - *m * *m - *m) / *m;
                            ldwrkr = *m;
                        }
                    }
                    itau = ir + ldwrkr * *m;
                    iwork = itau + *m;

/*                 Compute A=L*Q   
                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__2, &ierr);

/*                 Copy L to WORK(IR) and zero out above it */

                    slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
                    i__2 = *m - 1;
                    i__3 = *m - 1;
                    slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir + 
                            ldwrkr], &ldwrkr);

/*                 Generate Q in A   
                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__2, &ierr);
                    ie = itau;
                    itauq = ie + *m;
                    itaup = itauq + *m;
                    iwork = itaup + *m;

/*                 Bidiagonalize L in WORK(IR)   
                   (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__2, &ierr);

/*                 Generate right vectors bidiagonalizing L   
                   (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
                            work[iwork], &i__2, &ierr);
                    iwork = ie + *m;

/*                 Perform bidiagonal QR iteration, computing right   
                   singular vectors of L in WORK(IR)   
                   (Workspace: need M*M+BDSPAC) */

                    sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
                            ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
                            , info);
                    iu = ie + *m;

/*                 Multiply right singular vectors of L in WORK(IR) by Q   
                   in A, storing result in WORK(IU) and copying to A   
                   (Workspace: need M*M+2*M, prefer M*M+M*N+M) */

                    i__2 = *n;
                    i__3 = chunk;
                    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
                             i__3) {
/* Computing MIN */
                        i__4 = *n - i__ + 1;
                        blk = f2cmin(i__4,chunk);
                        sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], &
                                ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[
                                iu], &ldwrku);
                        slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1, 
                                i__), lda);
/* L30: */
                    }

                } else {

/*                 Insufficient workspace for a fast algorithm */

                    ie = 1;
                    itauq = ie + *m;
                    itaup = itauq + *m;
                    iwork = itaup + *m;

/*                 Bidiagonalize A   
                   (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__3, &ierr);

/*                 Generate right vectors bidiagonalizing A   
                   (Workspace: need 4*M, prefer 3*M+M*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
                            work[iwork], &i__3, &ierr);
                    iwork = ie + *m;

/*                 Perform bidiagonal QR iteration, computing right   
                   singular vectors of A in A   
                   (Workspace: need BDSPAC) */

                    sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
                            a_offset], lda, dum, &c__1, dum, &c__1, &work[
                            iwork], info);

                }

            } else if (wntvo && wntuas) {

/*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')   
                M right singular vectors to be overwritten on A and   
                M left singular vectors to be computed in U   

   Computing MAX */
                i__3 = *m << 2;
                if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {

/*                 Sufficient workspace for a fast algorithm */

                    ir = 1;
/* Computing MAX */
                    i__3 = wrkbl, i__2 = *lda * *n + *m;
                    if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {

/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */

                        ldwrku = *lda;
                        chunk = *n;
                        ldwrkr = *lda;
                    } else /* if(complicated condition) */ {
/* Computing MAX */
                        i__3 = wrkbl, i__2 = *lda * *n + *m;
                        if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {

/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */

                            ldwrku = *lda;
                            chunk = *n;
                            ldwrkr = *m;
                        } else {

/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */

                            ldwrku = *m;
                            chunk = (*lwork - *m * *m - *m) / *m;
                            ldwrkr = *m;
                        }
                    }
                    itau = ir + ldwrkr * *m;
                    iwork = itau + *m;

/*                 Compute A=L*Q   
                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__3, &ierr);

/*                 Copy L to U, zeroing about above it */

                    slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
                    i__3 = *m - 1;
                    i__2 = *m - 1;
                    slaset_("U", &i__3, &i__2, &c_b416, &c_b416, &u_ref(1, 2),
                             ldu);

/*                 Generate Q in A   
                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__3, &ierr);
                    ie = itau;
                    itauq = ie + *m;
                    itaup = itauq + *m;
                    iwork = itaup + *m;

/*                 Bidiagonalize L in U, copying result to WORK(IR)   
                   (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                    i__3 = *lwork - iwork + 1;
                    sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__3, &ierr);
                    slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);

/*                 Generate right vectors bidiagonalizing L in WORK(IR)   
                   (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
                            work[iwork], &i__3, &ierr);

/*                 Generate left vectors bidiagonalizing L in U   
                   (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */

                    i__3 = *lwork - iwork + 1;
                    sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
                            work[iwork], &i__3, &ierr);
                    iwork = ie + *m;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of L in U, and computing right   
                   singular vectors of L in WORK(IR)   
                   (Workspace: need M*M+BDSPAC) */

                    sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], 
                            &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
                            iwork], info);
                    iu = ie + *m;

/*                 Multiply right singular vectors of L in WORK(IR) by Q   
                   in A, storing result in WORK(IU) and copying to A   
                   (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */

                    i__3 = *n;
                    i__2 = chunk;
                    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
                             i__2) {
/* Computing MIN */
                        i__4 = *n - i__ + 1;
                        blk = f2cmin(i__4,chunk);
                        sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], &
                                ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[
                                iu], &ldwrku);
                        slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1, 
                                i__), lda);
/* L40: */
                    }

                } else {

/*                 Insufficient workspace for a fast algorithm */

                    itau = 1;
                    iwork = itau + *m;

/*                 Compute A=L*Q   
                   (Workspace: need 2*M, prefer M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
                            , &i__2, &ierr);

/*                 Copy L to U, zeroing out above it */

                    slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
                    i__2 = *m - 1;
                    i__3 = *m - 1;
                    slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1, 2),
                             ldu);

/*                 Generate Q in A   
                   (Workspace: need 2*M, prefer M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
                            iwork], &i__2, &ierr);
                    ie = itau;
                    itauq = ie + *m;
                    itaup = itauq + *m;
                    iwork = itaup + *m;

/*                 Bidiagonalize L in U   
                   (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
                            itauq], &work[itaup], &work[iwork], &i__2, &ierr);

/*                 Multiply right vectors bidiagonalizing L by Q in A   
                   (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                    i__2 = *lwork - iwork + 1;
                    sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
                            itaup], &a[a_offset], lda, &work[iwork], &i__2, &
                            ierr);

/*                 Generate left vectors bidiagonalizing L in U   
                   (Workspace: need 4*M, prefer 3*M+M*NB) */

                    i__2 = *lwork - iwork + 1;
                    sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
                            work[iwork], &i__2, &ierr);
                    iwork = ie + *m;

/*                 Perform bidiagonal QR iteration, computing left   
                   singular vectors of A in U and computing right   
                   singular vectors of A in A   
                   (Workspace: need BDSPAC) */

                    sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
                            a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
                            work[iwork], info);

                }

            } else if (wntvs) {

                if (wntun) {

/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')   
                   M right singular vectors to be computed in VT and   
                   no left singular vectors to be computed   

   Computing MAX */
                    i__2 = *m << 2;
                    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        ir = 1;
                        if (*lwork >= wrkbl + *lda * *m) {

/*                       WORK(IR) is LDA by M */

                            ldwrkr = *lda;
                        } else {

/*                       WORK(IR) is M by M */

                            ldwrkr = *m;
                        }
                        itau = ir + ldwrkr * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy L to WORK(IR), zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
                                ldwrkr);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir 
                                + ldwrkr], &ldwrkr);

/*                    Generate Q in A   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IR)   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Generate right vectors bidiagonalizing L in   
                      WORK(IR)   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing right   
                      singular vectors of L in WORK(IR)   
                      (Workspace: need M*M+BDSPAC) */

                        sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
                                work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IR) by   
                      Q in A, storing result in VT   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr,
                                 &a[a_offset], lda, &c_b416, &vt[vt_offset], 
                                ldvt);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy result to VT */

                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Zero out above L in A */

                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
                                 2), lda);

/*                    Bidiagonalize L in A   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right vectors bidiagonalizing L by Q in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
                                vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
                                work[iwork], info);

                    }

                } else if (wntuo) {

/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')   
                   M right singular vectors to be computed in VT and   
                   M left singular vectors to be overwritten on A   

   Computing MAX */
                    i__2 = *m << 2;
                    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + (*lda << 1) * *m) {

/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *lda;
                        } else if (*lwork >= wrkbl + (*lda + *m) * *m) {

/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *m;
                        } else {

/*                       WORK(IU) is M by M and WORK(IR) is M by M */

                            ldwrku = *m;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *m;
                        }
                        itau = ir + ldwrkr * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q   
                      (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy L to WORK(IU), zeroing out below it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + ldwrku], &ldwrku);

/*                    Generate Q in A   
                      (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IU), copying result to   
                      WORK(IR)   
                      (Workspace: need 2*M*M+4*M,   
                                  prefer 2*M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
                                ldwrkr);

/*                    Generate right bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need 2*M*M+4*M-1,   
                                  prefer 2*M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of L in WORK(IR) and computing   
                      right singular vectors of L in WORK(IU)   
                      (Workspace: need 2*M*M+BDSPAC) */

                        sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
                                iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
                                &work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IU) by   
                      Q in A, storing result in VT   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
                                 &a[a_offset], lda, &c_b416, &vt[vt_offset], 
                                ldvt);

/*                    Copy left singular vectors of L to A   
                      (Workspace: need M*M) */

                        slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
                                lda);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Zero out above L in A */

                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
                                 2), lda);

/*                    Bidiagonalize L in A   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right vectors bidiagonalizing L by Q in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors of L in A   
                      (Workspace: need 4*M, prefer 3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, compute left   
                      singular vectors of A in A and compute right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &a[a_offset], lda, dum, &
                                c__1, &work[iwork], info);

                    }

                } else if (wntuas) {

/*                 Path 6t(N much larger than M, JOBU='S' or 'A',   
                           JOBVT='S')   
                   M right singular vectors to be computed in VT and   
                   M left singular vectors to be computed in U   

   Computing MAX */
                    i__2 = *m << 2;
                    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + *lda * *m) {

/*                       WORK(IU) is LDA by N */

                            ldwrku = *lda;
                        } else {

/*                       WORK(IU) is LDA by M */

                            ldwrku = *m;
                        }
                        itau = iu + ldwrku * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);

/*                    Copy L to WORK(IU), zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + ldwrku], &ldwrku);

/*                    Generate Q in A   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IU), copying result to U   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
                                ldu);

/*                    Generate right bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need M*M+4*M-1,   
                                  prefer M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in U   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of L in U and computing right   
                      singular vectors of L in WORK(IU)   
                      (Workspace: need M*M+BDSPAC) */

                        sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
                                iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IU) by   
                      Q in A, storing result in VT   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
                                 &a[a_offset], lda, &c_b416, &vt[vt_offset], 
                                ldvt);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy L to U, zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
                                ldu);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1,
                                 2), ldu);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in U   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right bidiagonalizing vectors in U by Q   
                      in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in U   
                      (Workspace: need 4*M, prefer 3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &u[u_offset], ldu, dum, &
                                c__1, &work[iwork], info);

                    }

                }

            } else if (wntva) {

                if (wntun) {

/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')   
                   N right singular vectors to be computed in VT and   
                   no left singular vectors to be computed   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        ir = 1;
                        if (*lwork >= wrkbl + *lda * *m) {

/*                       WORK(IR) is LDA by M */

                            ldwrkr = *lda;
                        } else {

/*                       WORK(IR) is M by M */

                            ldwrkr = *m;
                        }
                        itau = ir + ldwrkr * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Copy L to WORK(IR), zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
                                ldwrkr);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir 
                                + ldwrkr], &ldwrkr);

/*                    Generate Q in VT   
                      (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IR)   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Generate right bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need M*M+4*M-1,   
                                  prefer M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing right   
                      singular vectors of L in WORK(IR)   
                      (Workspace: need M*M+BDSPAC) */

                        sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
                                work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IR) by   
                      Q in VT, storing result in A   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr,
                                 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy right singular vectors of A from A to VT */

                        slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need M+N, prefer M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Zero out above L in A */

                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
                                 2), lda);

/*                    Bidiagonalize L in A   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right bidiagonalizing vectors in A by Q   
                      in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
                                vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
                                work[iwork], info);

                    }

                } else if (wntuo) {

/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')   
                   N right singular vectors to be computed in VT and   
                   M left singular vectors to be overwritten on A   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + (*lda << 1) * *m) {

/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *lda;
                        } else if (*lwork >= wrkbl + (*lda + *m) * *m) {

/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */

                            ldwrku = *lda;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *m;
                        } else {

/*                       WORK(IU) is M by M and WORK(IR) is M by M */

                            ldwrku = *m;
                            ir = iu + ldwrku * *m;
                            ldwrkr = *m;
                        }
                        itau = ir + ldwrkr * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy L to WORK(IU), zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + ldwrku], &ldwrku);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IU), copying result to   
                      WORK(IR)   
                      (Workspace: need 2*M*M+4*M,   
                                  prefer 2*M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
                                ldwrkr);

/*                    Generate right bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need 2*M*M+4*M-1,   
                                  prefer 2*M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in WORK(IR)   
                      (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
                                , &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of L in WORK(IR) and computing   
                      right singular vectors of L in WORK(IU)   
                      (Workspace: need 2*M*M+BDSPAC) */

                        sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
                                iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
                                &work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IU) by   
                      Q in VT, storing result in A   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
                                 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy right singular vectors of A from A to VT */

                        slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Copy left singular vectors of A from WORK(IR) to A */

                        slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
                                lda);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need M+N, prefer M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Zero out above L in A */

                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
                                 2), lda);

/*                    Bidiagonalize L in A   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right bidiagonalizing vectors in A by Q   
                      in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in A   
                      (Workspace: need 4*M, prefer 3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in A and computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &a[a_offset], lda, dum, &
                                c__1, &work[iwork], info);

                    }

                } else if (wntuas) {

/*                 Path 9t(N much larger than M, JOBU='S' or 'A',   
                           JOBVT='A')   
                   N right singular vectors to be computed in VT and   
                   M left singular vectors to be computed in U   

   Computing MAX */
                    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
                    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {

/*                    Sufficient workspace for a fast algorithm */

                        iu = 1;
                        if (*lwork >= wrkbl + *lda * *m) {

/*                       WORK(IU) is LDA by M */

                            ldwrku = *lda;
                        } else {

/*                       WORK(IU) is M by M */

                            ldwrku = *m;
                        }
                        itau = iu + ldwrku * *m;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy L to WORK(IU), zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
                                ldwrku);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 
                                + ldwrku], &ldwrku);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in WORK(IU), copying result to U   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);
                        slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
                                ldu);

/*                    Generate right bidiagonalizing vectors in WORK(IU)   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
                                , &work[iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in U   
                      (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of L in U and computing right   
                      singular vectors of L in WORK(IU)   
                      (Workspace: need M*M+BDSPAC) */

                        sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
                                iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
                                work[iwork], info);

/*                    Multiply right singular vectors of L in WORK(IU) by   
                      Q in VT, storing result in A   
                      (Workspace: need M*M) */

                        sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
                                 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 
                                lda);

/*                    Copy right singular vectors of A from A to VT */

                        slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

                    } else {

/*                    Insufficient workspace for a fast algorithm */

                        itau = 1;
                        iwork = itau + *m;

/*                    Compute A=L*Q, copying result to VT   
                      (Workspace: need 2*M, prefer M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
                                iwork], &i__2, &ierr);
                        slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
                                ldvt);

/*                    Generate Q in VT   
                      (Workspace: need M+N, prefer M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
                                work[iwork], &i__2, &ierr);

/*                    Copy L to U, zeroing out above it */

                        slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
                                ldu);
                        i__2 = *m - 1;
                        i__3 = *m - 1;
                        slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1,
                                 2), ldu);
                        ie = itau;
                        itauq = ie + *m;
                        itaup = itauq + *m;
                        iwork = itaup + *m;

/*                    Bidiagonalize L in U   
                      (Workspace: need 4*M, prefer 3*M+2*M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
                                work[itauq], &work[itaup], &work[iwork], &
                                i__2, &ierr);

/*                    Multiply right bidiagonalizing vectors in U by Q   
                      in VT   
                      (Workspace: need 3*M+N, prefer 3*M+N*NB) */

                        i__2 = *lwork - iwork + 1;
                        sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
                                work[itaup], &vt[vt_offset], ldvt, &work[
                                iwork], &i__2, &ierr);

/*                    Generate left bidiagonalizing vectors in U   
                      (Workspace: need 4*M, prefer 3*M+M*NB) */

                        i__2 = *lwork - iwork + 1;
                        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
                                 &work[iwork], &i__2, &ierr);
                        iwork = ie + *m;

/*                    Perform bidiagonal QR iteration, computing left   
                      singular vectors of A in U and computing right   
                      singular vectors of A in VT   
                      (Workspace: need BDSPAC) */

                        sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
                                vt_offset], ldvt, &u[u_offset], ldu, dum, &
                                c__1, &work[iwork], info);

                    }

                }

            }

        } else {

/*           N .LT. MNTHR   

             Path 10t(N greater than M, but not much larger)   
             Reduce to bidiagonal form without LQ decomposition */

            ie = 1;
            itauq = ie + *m;
            itaup = itauq + *m;
            iwork = itaup + *m;

/*           Bidiagonalize A   
             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */

            i__2 = *lwork - iwork + 1;
            sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
                    work[itaup], &work[iwork], &i__2, &ierr);
            if (wntuas) {

/*              If left singular vectors desired in U, copy result to U   
                and generate left bidiagonalizing vectors in U   
                (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */

                slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
                i__2 = *lwork - iwork + 1;
                sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
                        iwork], &i__2, &ierr);
            }
            if (wntvas) {

/*              If right singular vectors desired in VT, copy result to   
                VT and generate right bidiagonalizing vectors in VT   
                (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */

                slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
                if (wntva) {
                    nrvt = *n;
                }
                if (wntvs) {
                    nrvt = *m;
                }
                i__2 = *lwork - iwork + 1;
                sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 
                        &work[iwork], &i__2, &ierr);
            }
            if (wntuo) {

/*              If left singular vectors desired in A, generate left   
                bidiagonalizing vectors in A   
                (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */

                i__2 = *lwork - iwork + 1;
                sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
                        iwork], &i__2, &ierr);
            }
            if (wntvo) {

/*              If right singular vectors desired in A, generate right   
                bidiagonalizing vectors in A   
                (Workspace: need 4*M, prefer 3*M+M*NB) */

                i__2 = *lwork - iwork + 1;
                sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
                        iwork], &i__2, &ierr);
            }
            iwork = ie + *m;
            if (wntuas || wntuo) {
                nru = *m;
            }
            if (wntun) {
                nru = 0;
            }
            if (wntvas || wntvo) {
                ncvt = *n;
            }
            if (wntvn) {
                ncvt = 0;
            }
            if (! wntuo && ! wntvo) {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in U and computing right singular   
                vectors in VT   
                (Workspace: need BDSPAC) */

                sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
                        vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
                        work[iwork], info);
            } else if (! wntuo && wntvo) {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in U and computing right singular   
                vectors in A   
                (Workspace: need BDSPAC) */

                sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
                        a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
                        iwork], info);
            } else {

/*              Perform bidiagonal QR iteration, if desired, computing   
                left singular vectors in A and computing right singular   
                vectors in VT   
                (Workspace: need BDSPAC) */

                sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
                        vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
                        work[iwork], info);
            }

        }

    }

/*     If SBDSQR failed to converge, copy unconverged superdiagonals   
       to WORK( 2:MINMN ) */

    if (*info != 0) {
        if (ie > 2) {
            i__2 = minmn - 1;
            for (i__ = 1; i__ <= i__2; ++i__) {
                work[i__ + 1] = work[i__ + ie - 1];
/* L50: */
            }
        }
        if (ie < 2) {
            for (i__ = minmn - 1; i__ >= 1; --i__) {
                work[i__ + 1] = work[i__ + ie - 1];
/* L60: */
            }
        }
    }

/*     Undo scaling if necessary */

    if (iscl == 1) {
        if (anrm > bignum) {
            slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
                    minmn, &ierr);
        }
        if (*info != 0 && anrm > bignum) {
            i__2 = minmn - 1;
            slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
                     &minmn, &ierr);
        }
        if (anrm < smlnum) {
            slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
                    minmn, &ierr);
        }
        if (*info != 0 && anrm < smlnum) {
            i__2 = minmn - 1;
            slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
                     &minmn, &ierr);
        }
    }

/*     Return optimal workspace in WORK(1) */

    work[1] = (real) maxwrk;

    return 0;

/*     End of SGESVD */

} /* sgesvd_ */
int slabrd_ ( integer m,
integer n,
integer nb,
real a,
integer lda,
real d__,
real e,
real tauq,
real taup,
real x,
integer ldx,
real y,
integer ldy 
)

Definition at line 24278 of file lapackblas.cpp.

References a_ref, f2cmin, sgemv_(), slarfg_(), sscal_(), x_ref, and y_ref.

Referenced by sgebrd_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLABRD reduces the first NB rows and columns of a real general   
    m by n matrix A to upper or lower bidiagonal form by an orthogonal   
    transformation Q' * A * P, and returns the matrices X and Y which   
    are needed to apply the transformation to the unreduced part of A.   

    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower   
    bidiagonal form.   

    This is an auxiliary routine called by SGEBRD   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows in the matrix A.   

    N       (input) INTEGER   
            The number of columns in the matrix A.   

    NB      (input) INTEGER   
            The number of leading rows and columns of A to be reduced.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the m by n general matrix to be reduced.   
            On exit, the first NB rows and columns of the matrix are   
            overwritten; the rest of the array is unchanged.   
            If m >= n, elements on and below the diagonal in the first NB   
              columns, with the array TAUQ, represent the orthogonal   
              matrix Q as a product of elementary reflectors; and   
              elements above the diagonal in the first NB rows, with the   
              array TAUP, represent the orthogonal matrix P as a product   
              of elementary reflectors.   
            If m < n, elements below the diagonal in the first NB   
              columns, with the array TAUQ, represent the orthogonal   
              matrix Q as a product of elementary reflectors, and   
              elements on and above the diagonal in the first NB rows,   
              with the array TAUP, represent the orthogonal matrix P as   
              a product of elementary reflectors.   
            See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    D       (output) REAL array, dimension (NB)   
            The diagonal elements of the first NB rows and columns of   
            the reduced matrix.  D(i) = A(i,i).   

    E       (output) REAL array, dimension (NB)   
            The off-diagonal elements of the first NB rows and columns of   
            the reduced matrix.   

    TAUQ    (output) REAL array dimension (NB)   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix Q. See Further Details.   

    TAUP    (output) REAL array, dimension (NB)   
            The scalar factors of the elementary reflectors which   
            represent the orthogonal matrix P. See Further Details.   

    X       (output) REAL array, dimension (LDX,NB)   
            The m-by-nb matrix X required to update the unreduced part   
            of A.   

    LDX     (input) INTEGER   
            The leading dimension of the array X. LDX >= M.   

    Y       (output) REAL array, dimension (LDY,NB)   
            The n-by-nb matrix Y required to update the unreduced part   
            of A.   

    LDY     (output) INTEGER   
            The leading dimension of the array Y. LDY >= N.   

    Further Details   
    ===============   

    The matrices Q and P are represented as products of elementary   
    reflectors:   

       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are real scalars, and v and u are real vectors.   

    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in   
    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in   
    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).   

    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in   
    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in   
    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).   

    The elements of the vectors v and u together form the m-by-nb matrix   
    V and the nb-by-n matrix U' which are needed, with X and Y, to apply   
    the transformation to the unreduced part of the matrix, using a block   
    update of the form:  A := A - V*Y' - X*U'.   

    The contents of A on exit are illustrated by the following examples   
    with nb = 2:   

    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):   

      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )   
      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )   
      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )   
      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )   
      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )   
      (  v1  v2  a   a   a  )   

    where a denotes an element of the original matrix which is unchanged,   
    vi denotes an element of the vector defining H(i), and ui an element   
    of the vector defining G(i).   

    =====================================================================   


       Quick return if possible   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b4 = -1.f;
    static real c_b5 = 1.f;
    static integer c__1 = 1;
    static real c_b16 = 0.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, 
            i__3;
    /* Local variables */
    static integer i__;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
            sgemv_(const char *, integer *, integer *, real *, real *, integer *, 
            real *, integer *, real *, real *, integer *), slarfg_(
            integer *, real *, real *, integer *, real *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
#define y_ref(a_1,a_2) y[(a_2)*y_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1 * 1;
    y -= y_offset;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
        return 0;
    }

    if (*m >= *n) {

/*        Reduce to upper bidiagonal form */

        i__1 = *nb;
        for (i__ = 1; i__ <= i__1; ++i__) {

/*           Update A(i:m,i) */

            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__, 1), lda, &
                    y_ref(i__, 1), ldy, &c_b5, &a_ref(i__, i__), &c__1);
            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__, 1), ldx, &
                    a_ref(1, i__), &c__1, &c_b5, &a_ref(i__, i__), &c__1);

/*           Generate reflection Q(i) to annihilate A(i+1:m,i)   

   Computing MIN */
            i__2 = i__ + 1;
            i__3 = *m - i__ + 1;
            slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1,
                     &tauq[i__]);
            d__[i__] = a_ref(i__, i__);
            if (i__ < *n) {
                a_ref(i__, i__) = 1.f;

/*              Compute Y(i+1:n,i) */

                i__2 = *m - i__ + 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, i__ + 1),
                         lda, &a_ref(i__, i__), &c__1, &c_b16, &y_ref(i__ + 1,
                         i__), &c__1);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, 
                        &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), &
                        c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1)
                        , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &x_ref(i__, 1), ldx, 
                        &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), &
                        c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1), 
                        lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *n - i__;
                sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1);

/*              Update A(i,i+1:n) */

                i__2 = *n - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &y_ref(i__ + 1, 1),
                         ldy, &a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__ + 1)
                        , lda);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1), 
                        lda, &x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__ + 1),
                         lda);

/*              Generate reflection P(i) to annihilate A(i,i+2:n)   

   Computing MIN */
                i__2 = i__ + 2;
                i__3 = *n - i__;
                slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n))
                        , lda, &taup[i__]);
                e[i__] = a_ref(i__, i__ + 1);
                a_ref(i__, i__ + 1) = 1.f;

/*              Compute X(i+1:m,i) */

                i__2 = *m - i__;
                i__3 = *n - i__;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 
                        i__ + 1), lda, &a_ref(i__, i__ + 1), lda, &c_b16, &
                        x_ref(i__ + 1, i__), &c__1);
                i__2 = *n - i__;
                sgemv_("Transpose", &i__2, &i__, &c_b5, &y_ref(i__ + 1, 1), 
                        ldy, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1, i__)
                        , &c__1);
                i__2 = *m - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &a_ref(i__ + 1, 1),
                         lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ + 1)
                        , lda, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1, 
                        i__), &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1)
                        , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *m - i__;
                sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1);
            }
/* L10: */
        }
    } else {

/*        Reduce to lower bidiagonal form */

        i__1 = *nb;
        for (i__ = 1; i__ <= i__1; ++i__) {

/*           Update A(i,i:n) */

            i__2 = *n - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__, 1), ldy, &
                    a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__), lda);
            i__2 = i__ - 1;
            i__3 = *n - i__ + 1;
            sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__), lda, &
                    x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__), lda);

/*           Generate reflection P(i) to annihilate A(i,i+1:n)   

   Computing MIN */
            i__2 = i__ + 1;
            i__3 = *n - i__ + 1;
            slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &
                    taup[i__]);
            d__[i__] = a_ref(i__, i__);
            if (i__ < *m) {
                a_ref(i__, i__) = 1.f;

/*              Compute X(i+1:m,i) */

                i__2 = *m - i__;
                i__3 = *n - i__ + 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 
                        i__), lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(i__ 
                        + 1, i__), &c__1);
                i__2 = *n - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &y_ref(i__, 1), ldy, 
                        &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1)
                        , lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__ + 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__), 
                        lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), &
                        c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1)
                        , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *m - i__;
                sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1);

/*              Update A(i+1:m,i) */

                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1)
                        , lda, &y_ref(i__, 1), ldy, &c_b5, &a_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *m - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &x_ref(i__ + 1, 1),
                         ldx, &a_ref(1, i__), &c__1, &c_b5, &a_ref(i__ + 1, 
                        i__), &c__1);

/*              Generate reflection Q(i) to annihilate A(i+2:m,i)   

   Computing MIN */
                i__2 = i__ + 2;
                i__3 = *m - i__;
                slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__)
                        , &c__1, &tauq[i__]);
                e[i__] = a_ref(i__ + 1, i__);
                a_ref(i__ + 1, i__) = 1.f;

/*              Compute Y(i+1:n,i) */

                i__2 = *m - i__;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, i__ 
                        + 1), lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &
                        y_ref(i__ + 1, i__), &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1), 
                        lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1, 
                        i__), &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1)
                        , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *m - i__;
                sgemv_("Transpose", &i__2, &i__, &c_b5, &x_ref(i__ + 1, 1), 
                        ldx, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1, 
                        i__), &c__1);
                i__2 = *n - i__;
                sgemv_("Transpose", &i__, &i__2, &c_b4, &a_ref(1, i__ + 1), 
                        lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 
                        i__), &c__1);
                i__2 = *n - i__;
                sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1);
            }
/* L20: */
        }
    }
    return 0;

/*     End of SLABRD */

} /* slabrd_ */
int slacpy_ ( const char *  uplo,
integer m,
integer n,
real a,
integer lda,
real b,
integer ldb 
)

Definition at line 12889 of file lapackblas.cpp.

References a_ref, b_ref, f2cmin, and lsame_().

Referenced by sgesvd_(), slaed0_(), slaed2_(), slaed3_(), slaed8_(), and sstedc_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLACPY copies all or part of a two-dimensional matrix A to another   
    matrix B.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies the part of the matrix A to be copied to B.   
            = 'U':      Upper triangular part   
            = 'L':      Lower triangular part   
            Otherwise:  All of the matrix A   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input) REAL array, dimension (LDA,N)   
            The m by n matrix A.  If UPLO = 'U', only the upper triangle   
            or trapezoid is accessed; if UPLO = 'L', only the lower   
            triangle or trapezoid is accessed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    B       (output) REAL array, dimension (LDB,N)   
            On exit, B = A in the locations specified by UPLO.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,M).   

    =====================================================================   


       Parameter adjustments */
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    /* Local variables */
    static integer i__, j;
    extern logical lsame_(const char *, const char *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]

    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    if (lsame_(uplo, "U")) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = f2cmin(j,*m);
            for (i__ = 1; i__ <= i__2; ++i__) {
                b_ref(i__, j) = a_ref(i__, j);
/* L10: */
            }
/* L20: */
        }
    } else if (lsame_(uplo, "L")) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = j; i__ <= i__2; ++i__) {
                b_ref(i__, j) = a_ref(i__, j);
/* L30: */
            }
/* L40: */
        }
    } else {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = 1; i__ <= i__2; ++i__) {
                b_ref(i__, j) = a_ref(i__, j);
/* L50: */
            }
/* L60: */
        }
    }
    return 0;

/*     End of SLACPY */

} /* slacpy_ */
int slae2_ ( real a,
real b,
real c__,
real rt1,
real rt2 
)

Definition at line 2012 of file lapackblas.cpp.

References b, dabs, and sqrt().

Referenced by ssteqr_(), and ssterf_().

{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    SLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix   
       [  A   B  ]   
       [  B   C  ].   
    On return, RT1 is the eigenvalue of larger absolute value, and RT2   
    is the eigenvalue of smaller absolute value.   

    Arguments   
    =========   

    A       (input) REAL   
            The (1,1) element of the 2-by-2 matrix.   

    B       (input) REAL   
            The (1,2) and (2,1) elements of the 2-by-2 matrix.   

    C       (input) REAL   
            The (2,2) element of the 2-by-2 matrix.   

    RT1     (output) REAL   
            The eigenvalue of larger absolute value.   

    RT2     (output) REAL   
            The eigenvalue of smaller absolute value.   

    Further Details   
    ===============   

    RT1 is accurate to a few ulps barring over/underflow.   

    RT2 may be inaccurate if there is massive cancellation in the   
    determinant A*C-B*B; higher precision or correctly rounded or   
    correctly truncated arithmetic would be needed to compute RT2   
    accurately in all cases.   

    Overflow is possible only if RT1 is within a factor of 5 of overflow.   
    Underflow is harmless if the input data is 0 or exceeds   
       underflow_threshold / macheps.   

   =====================================================================   


       Compute the eigenvalues */
    /* System generated locals */
    real r__1;
    /* Builtin functions */
//    double sqrt(doublereal);
    /* Local variables */
    static real acmn, acmx, ab, df, tb, sm, rt, adf;


    sm = *a + *c__;
    df = *a - *c__;
    adf = dabs(df);
    tb = *b + *b;
    ab = dabs(tb);
    if (dabs(*a) > dabs(*c__)) {
        acmx = *a;
        acmn = *c__;
    } else {
        acmx = *c__;
        acmn = *a;
    }
    if (adf > ab) {
/* Computing 2nd power */
        r__1 = ab / adf;
        rt = adf * sqrt(r__1 * r__1 + 1.f);
    } else if (adf < ab) {
/* Computing 2nd power */
        r__1 = adf / ab;
        rt = ab * sqrt(r__1 * r__1 + 1.f);
    } else {

/*        Includes case AB=ADF=0 */

        rt = ab * sqrt(2.f);
    }
    if (sm < 0.f) {
        *rt1 = (sm - rt) * .5f;

/*        Order of execution important.   
          To get fully accurate smaller eigenvalue,   
          next line needs to be executed in higher precision. */

        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else if (sm > 0.f) {
        *rt1 = (sm + rt) * .5f;

/*        Order of execution important.   
          To get fully accurate smaller eigenvalue,   
          next line needs to be executed in higher precision. */

        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else {

/*        Includes case RT1 = RT2 = 0 */

        *rt1 = rt * .5f;
        *rt2 = rt * -.5f;
    }
    return 0;

/*     End of SLAE2 */

} /* slae2_ */
int slaed0_ ( integer icompq,
integer qsiz,
integer n,
real d__,
real e,
real q,
integer ldq,
real qstore,
integer ldqs,
real work,
integer iwork,
integer info 
)

Definition at line 11924 of file lapackblas.cpp.

References c__0, c__1, c__2, dabs, f2cmax, ilaenv_(), log(), pow_ii(), q_ref, qstore_ref, scopy_(), sgemm_(), slacpy_(), slaed1_(), slaed7_(), ssteqr_(), and xerbla_().

Referenced by sstedc_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLAED0 computes all eigenvalues and corresponding eigenvectors of a   
    symmetric tridiagonal matrix using the divide and conquer method.   

    Arguments   
    =========   

    ICOMPQ  (input) INTEGER   
            = 0:  Compute eigenvalues only.   
            = 1:  Compute eigenvectors of original dense symmetric matrix   
                  also.  On entry, Q contains the orthogonal matrix used   
                  to reduce the original matrix to tridiagonal form.   
            = 2:  Compute eigenvalues and eigenvectors of tridiagonal   
                  matrix.   

    QSIZ   (input) INTEGER   
           The dimension of the orthogonal matrix used to reduce   
           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    D      (input/output) REAL array, dimension (N)   
           On entry, the main diagonal of the tridiagonal matrix.   
           On exit, its eigenvalues.   

    E      (input) REAL array, dimension (N-1)   
           The off-diagonal elements of the tridiagonal matrix.   
           On exit, E has been destroyed.   

    Q      (input/output) REAL array, dimension (LDQ, N)   
           On entry, Q must contain an N-by-N orthogonal matrix.   
           If ICOMPQ = 0    Q is not referenced.   
           If ICOMPQ = 1    On entry, Q is a subset of the columns of the   
                            orthogonal matrix used to reduce the full   
                            matrix to tridiagonal form corresponding to   
                            the subset of the full matrix which is being   
                            decomposed at this time.   
           If ICOMPQ = 2    On entry, Q will be the identity matrix.   
                            On exit, Q contains the eigenvectors of the   
                            tridiagonal matrix.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  If eigenvectors are   
           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.   

    QSTORE (workspace) REAL array, dimension (LDQS, N)   
           Referenced only when ICOMPQ = 1.  Used to store parts of   
           the eigenvector matrix when the updating matrix multiplies   
           take place.   

    LDQS   (input) INTEGER   
           The leading dimension of the array QSTORE.  If ICOMPQ = 1,   
           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.   

    WORK   (workspace) REAL array,   
           If ICOMPQ = 0 or 1, the dimension of WORK must be at least   
                       1 + 3*N + 2*N*lg N + 2*N**2   
                       ( lg( N ) = smallest integer k   
                                   such that 2^k >= N )   
           If ICOMPQ = 2, the dimension of WORK must be at least   
                       4*N + N**2.   

    IWORK  (workspace) INTEGER array,   
           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least   
                          6 + 6*N + 5*N*lg N.   
                          ( lg( N ) = smallest integer k   
                                      such that 2^k >= N )   
           If ICOMPQ = 2, the dimension of IWORK must be at least   
                          3 + 5*N.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  The algorithm failed to compute an eigenvalue while   
                  working on the submatrix lying in rows and columns   
                  INFO/(N+1) through mod(INFO,N+1).   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__9 = 9;
    static integer c__0 = 0;
    static integer c__2 = 2;
    static real c_b23 = 1.f;
    static real c_b24 = 0.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
    real r__1;
    /* Builtin functions */
    // double log(doublereal);
    integer pow_ii(integer *, integer *);
    /* Local variables */
    static real temp;
    static integer curr, i__, j, k;
    extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 
            integer *, real *, real *, integer *, real *, integer *, real *, 
            real *, integer *);
    static integer iperm, indxq, iwrem;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
            integer *);
    static integer iqptr, tlvls;
    extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *, 
            integer *, real *, integer *, real *, integer *, integer *), 
            slaed7_(integer *, integer *, integer *, integer *, integer *, 
            integer *, real *, real *, integer *, integer *, real *, integer *
            , real *, integer *, integer *, integer *, integer *, integer *, 
            real *, real *, integer *, integer *);
    static integer iq, igivcl;
    extern /* Subroutine */ int xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 
            integer *, integer *, ftnlen, ftnlen);
    static integer igivnm, submat;
    extern /* Subroutine */ int slacpy_(const char *, integer *, integer *, real *, 
            integer *, real *, integer *);
    static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz;
    extern /* Subroutine */ int ssteqr_(const char *, integer *, real *, real *, 
            real *, integer *, real *, integer *);
    static integer lgn, msd2, smm1, spm1, spm2;
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define qstore_ref(a_1,a_2) qstore[(a_2)*qstore_dim1 + a_1]


    --d__;
    --e;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    qstore_dim1 = *ldqs;
    qstore_offset = 1 + qstore_dim1 * 1;
    qstore -= qstore_offset;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 2) {
        *info = -1;
    } else if (*icompq == 1 && *qsiz < f2cmax(0,*n)) {
        *info = -2;
    } else if (*n < 0) {
        *info = -3;
    } else if (*ldq < f2cmax(1,*n)) {
        *info = -7;
    } else if (*ldqs < f2cmax(1,*n)) {
        *info = -9;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED0", &i__1);
        return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
            ftnlen)6, (ftnlen)1);

/*     Determine the size and placement of the submatrices, and save in   
       the leading elements of IWORK. */

    iwork[1] = *n;
    subpbs = 1;
    tlvls = 0;
L10:
    if (iwork[subpbs] > smlsiz) {
        for (j = subpbs; j >= 1; --j) {
            iwork[j * 2] = (iwork[j] + 1) / 2;
            iwork[(j << 1) - 1] = iwork[j] / 2;
/* L20: */
        }
        ++tlvls;
        subpbs <<= 1;
        goto L10;
    }
    i__1 = subpbs;
    for (j = 2; j <= i__1; ++j) {
        iwork[j] += iwork[j - 1];
/* L30: */
    }

/*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1   
       using rank-1 modifications (cuts). */

    spm1 = subpbs - 1;
    i__1 = spm1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        submat = iwork[i__] + 1;
        smm1 = submat - 1;
        d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
        d__[submat] -= (r__1 = e[smm1], dabs(r__1));
/* L40: */
    }

    indxq = (*n << 2) + 3;
    if (*icompq != 2) {

/*        Set up workspaces for eigenvalues only/accumulate new vectors   
          routine */

        temp = log((real) (*n)) / log(2.f);
        lgn = (integer) temp;
        if (pow_ii(&c__2, &lgn) < *n) {
            ++lgn;
        }
        if (pow_ii(&c__2, &lgn) < *n) {
            ++lgn;
        }
        iprmpt = indxq + *n + 1;
        iperm = iprmpt + *n * lgn;
        iqptr = iperm + *n * lgn;
        igivpt = iqptr + *n + 2;
        igivcl = igivpt + *n * lgn;

        igivnm = 1;
        iq = igivnm + (*n << 1) * lgn;
/* Computing 2nd power */
        i__1 = *n;
        iwrem = iq + i__1 * i__1 + 1;

/*        Initialize pointers */

        i__1 = subpbs;
        for (i__ = 0; i__ <= i__1; ++i__) {
            iwork[iprmpt + i__] = 1;
            iwork[igivpt + i__] = 1;
/* L50: */
        }
        iwork[iqptr] = 1;
    }

/*     Solve each submatrix eigenproblem at the bottom of the divide and   
       conquer tree. */

    curr = 0;
    i__1 = spm1;
    for (i__ = 0; i__ <= i__1; ++i__) {
        if (i__ == 0) {
            submat = 1;
            matsiz = iwork[1];
        } else {
            submat = iwork[i__] + 1;
            matsiz = iwork[i__ + 1] - iwork[i__];
        }
        if (*icompq == 2) {
            ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q_ref(submat, 
                    submat), ldq, &work[1], info);
            if (*info != 0) {
                goto L130;
            }
        } else {
            ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + 
                    iwork[iqptr + curr]], &matsiz, &work[1], info);
            if (*info != 0) {
                goto L130;
            }
            if (*icompq == 1) {
                sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q_ref(1, 
                        submat), ldq, &work[iq - 1 + iwork[iqptr + curr]], &
                        matsiz, &c_b24, &qstore_ref(1, submat), ldqs);
            }
/* Computing 2nd power */
            i__2 = matsiz;
            iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
            ++curr;
        }
        k = 1;
        i__2 = iwork[i__ + 1];
        for (j = submat; j <= i__2; ++j) {
            iwork[indxq + j] = k;
            ++k;
/* L60: */
        }
/* L70: */
    }

/*     Successively merge eigensystems of adjacent submatrices   
       into eigensystem for the corresponding larger matrix.   

       while ( SUBPBS > 1 ) */

    curlvl = 1;
L80:
    if (subpbs > 1) {
        spm2 = subpbs - 2;
        i__1 = spm2;
        for (i__ = 0; i__ <= i__1; i__ += 2) {
            if (i__ == 0) {
                submat = 1;
                matsiz = iwork[2];
                msd2 = iwork[1];
                curprb = 0;
            } else {
                submat = iwork[i__] + 1;
                matsiz = iwork[i__ + 2] - iwork[i__];
                msd2 = matsiz / 2;
                ++curprb;
            }

/*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)   
       into an eigensystem of size MATSIZ.   
       SLAED1 is used only for the full eigensystem of a tridiagonal   
       matrix.   
       SLAED7 handles the cases in which eigenvalues only or eigenvalues   
       and eigenvectors of a full symmetric matrix (which was reduced to   
       tridiagonal form) are desired. */

            if (*icompq == 2) {
                slaed1_(&matsiz, &d__[submat], &q_ref(submat, submat), ldq, &
                        iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
                        work[1], &iwork[subpbs + 1], info);
            } else {
                slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
                        submat], &qstore_ref(1, submat), ldqs, &iwork[indxq + 
                        submat], &e[submat + msd2 - 1], &msd2, &work[iq], &
                        iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
                        igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem], 
                        &iwork[subpbs + 1], info);
            }
            if (*info != 0) {
                goto L130;
            }
            iwork[i__ / 2 + 1] = iwork[i__ + 2];
/* L90: */
        }
        subpbs /= 2;
        ++curlvl;
        goto L80;
    }

/*     end while   

       Re-merge the eigenvalues/vectors which were deflated at the final   
       merge step. */

    if (*icompq == 1) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
            scopy_(qsiz, &qstore_ref(1, j), &c__1, &q_ref(1, i__), &c__1);
/* L100: */
        }
        scopy_(n, &work[1], &c__1, &d__[1], &c__1);
    } else if (*icompq == 2) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
            scopy_(n, &q_ref(1, j), &c__1, &work[*n * i__ + 1], &c__1);
/* L110: */
        }
        scopy_(n, &work[1], &c__1, &d__[1], &c__1);
        slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
    } else {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
/* L120: */
        }
        scopy_(n, &work[1], &c__1, &d__[1], &c__1);
    }
    goto L140;

L130:
    *info = submat * (*n + 1) + submat + matsiz - 1;

L140:
    return 0;

/*     End of SLAED0 */

} /* slaed0_ */
int slaed1_ ( integer n,
real d__,
real q,
integer ldq,
integer indxq,
real rho,
integer cutpnt,
real work,
integer iwork,
integer info 
)

Definition at line 12663 of file lapackblas.cpp.

References c__1, c_n1, f2cmax, f2cmin, q_ref, scopy_(), slaed2_(), slaed3_(), slamrg_(), and xerbla_().

Referenced by slaed0_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLAED1 computes the updated eigensystem of a diagonal   
    matrix after modification by a rank-one symmetric matrix.  This   
    routine is used only for the eigenproblem which requires all   
    eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles   
    the case in which eigenvalues only or eigenvalues and eigenvectors   
    of a full symmetric matrix (which was reduced to tridiagonal form)   
    are desired.   

      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)   

       where Z = Q'u, u is a vector of length N with ones in the   
       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.   

       The eigenvectors of the original matrix are stored in Q, and the   
       eigenvalues are in D.  The algorithm consists of three stages:   

          The first stage consists of deflating the size of the problem   
          when there are multiple eigenvalues or if there is a zero in   
          the Z vector.  For each such occurence the dimension of the   
          secular equation problem is reduced by one.  This stage is   
          performed by the routine SLAED2.   

          The second stage consists of calculating the updated   
          eigenvalues. This is done by finding the roots of the secular   
          equation via the routine SLAED4 (as called by SLAED3).   
          This routine also calculates the eigenvectors of the current   
          problem.   

          The final stage consists of computing the updated eigenvectors   
          directly using the updated eigenvalues.  The eigenvectors for   
          the current problem are multiplied with the eigenvectors from   
          the overall problem.   

    Arguments   
    =========   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    D      (input/output) REAL array, dimension (N)   
           On entry, the eigenvalues of the rank-1-perturbed matrix.   
           On exit, the eigenvalues of the repaired matrix.   

    Q      (input/output) REAL array, dimension (LDQ,N)   
           On entry, the eigenvectors of the rank-1-perturbed matrix.   
           On exit, the eigenvectors of the repaired tridiagonal matrix.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input/output) INTEGER array, dimension (N)   
           On entry, the permutation which separately sorts the two   
           subproblems in D into ascending order.   
           On exit, the permutation which will reintegrate the   
           subproblems back into sorted order,   
           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.   

    RHO    (input) REAL   
           The subdiagonal entry used to create the rank-1 modification.   

    CUTPNT (input) INTEGER   
           The location of the last eigenvalue in the leading sub-matrix.   
           min(1,N) <= CUTPNT <= N/2.   

    WORK   (workspace) REAL array, dimension (4*N + N**2)   

    IWORK  (workspace) INTEGER array, dimension (4*N)   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an eigenvalue did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   
    Modified by Francoise Tisseur, University of Tennessee.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    /* Local variables */
    static integer indx, i__, k, indxc, indxp;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
            integer *);
    static integer n1, n2;
    extern /* Subroutine */ int slaed2_(integer *, integer *, integer *, real 
            *, real *, integer *, integer *, real *, real *, real *, real *, 
            real *, integer *, integer *, integer *, integer *, integer *), 
            slaed3_(integer *, integer *, integer *, real *, real *, integer *
            , real *, real *, real *, integer *, integer *, real *, real *, 
            integer *);
    static integer idlmda, is, iw, iz;
    extern /* Subroutine */ int xerbla_(const char *, integer *), slamrg_(
            integer *, integer *, real *, integer *, integer *, integer *);
    static integer coltyp, iq2, cpp1;
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]


    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --indxq;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
        *info = -1;
    } else if (*ldq < f2cmax(1,*n)) {
        *info = -4;
    } else /* if(complicated condition) */ {
/* Computing MIN */
        i__1 = 1, i__2 = *n / 2;
        if (f2cmin(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
            *info = -7;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED1", &i__1);
        return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

/*     The following values are integer pointers which indicate   
       the portion of the workspace   
       used by a particular array in SLAED2 and SLAED3. */

    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq2 = iw + *n;

    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;


/*     Form the z-vector which consists of the last row of Q_1 and the   
       first row of Q_2. */

    scopy_(cutpnt, &q_ref(*cutpnt, 1), ldq, &work[iz], &c__1);
    cpp1 = *cutpnt + 1;
    i__1 = *n - *cutpnt;
    scopy_(&i__1, &q_ref(cpp1, cpp1), ldq, &work[iz + *cutpnt], &c__1);

/*     Deflate eigenvalues. */

    slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
            iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
            indxc], &iwork[indxp], &iwork[coltyp], info);

    if (*info != 0) {
        goto L20;
    }

/*     Solve Secular Equation. */

    if (k != 0) {
        is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + 
                1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
        slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
                 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
                is], info);
        if (*info != 0) {
            goto L20;
        }

/*     Prepare the INDXQ sorting permutation. */

        n1 = k;
        n2 = *n - k;
        slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            indxq[i__] = i__;
/* L10: */
        }
    }

L20:
    return 0;

/*     End of SLAED1 */

} /* slaed1_ */
int slaed2_ ( integer k,
integer n,
integer n1,
real d__,
real q,
integer ldq,
integer indxq,
real rho,
real z__,
real dlamda,
real w,
real q2,
integer indx,
integer indxc,
integer indxp,
integer coltyp,
integer info 
)

Definition at line 13569 of file lapackblas.cpp.

References dabs, df2cmax, f2cmax, f2cmin, isamax_(), q_ref, scopy_(), slacpy_(), slamch_(), slamrg_(), slapy2_(), sqrt(), srot_(), sscal_(), t, and xerbla_().

Referenced by slaed1_().

{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Builtin functions */
    // double sqrt(doublereal);

    /* Local variables */
    static integer imax, jmax, ctot[4];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
            integer *, real *, real *);
    static real c__;
    static integer i__, j;
    static real s, t;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer k2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
            integer *);
    static integer n2;
    extern doublereal slapy2_(real *, real *);
    static integer ct, nj, pj, js;
    extern doublereal slamch_(const char *);
    extern /* Subroutine */ int xerbla_(const char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
            *, integer *, integer *), slacpy_(const char *, integer *, integer *, 
            real *, integer *, real *, integer *);
    static integer iq1, iq2, n1p1;
    static real eps, tau, tol;
    static integer psm[4];


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]


/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SLAED2 merges the two sets of eigenvalues together into a single   
    sorted set.  Then it tries to deflate the size of the problem.   
    There are two ways in which deflation can occur:  when two or more   
    eigenvalues are close together or if there is a tiny entry in the   
    Z vector.  For each such occurrence the order of the related secular   
    equation problem is reduced by one.   

    Arguments   
    =========   

    K      (output) INTEGER   
           The number of non-deflated eigenvalues, and the order of the   
           related secular equation. 0 <= K <=N.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    N1     (input) INTEGER   
           The location of the last eigenvalue in the leading sub-matrix.   
           f2cmin(1,N) <= N1 <= N/2.   

    D      (input/output) REAL array, dimension (N)   
           On entry, D contains the eigenvalues of the two submatrices to   
           be combined.   
           On exit, D contains the trailing (N-K) updated eigenvalues   
           (those which were deflated) sorted into increasing order.   

    Q      (input/output) REAL array, dimension (LDQ, N)   
           On entry, Q contains the eigenvectors of two submatrices in   
           the two square blocks with corners at (1,1), (N1,N1)   
           and (N1+1, N1+1), (N,N).   
           On exit, Q contains the trailing (N-K) updated eigenvectors   
           (those which were deflated) in its last N-K columns.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input/output) INTEGER array, dimension (N)   
           The permutation which separately sorts the two sub-problems   
           in D into ascending order.  Note that elements in the second   
           half of this permutation must first have N1 added to their   
           values. Destroyed on exit.   

    RHO    (input/output) REAL   
           On entry, the off-diagonal element associated with the rank-1   
           cut which originally split the two submatrices which are now   
           being recombined.   
           On exit, RHO has been modified to the value required by   
           SLAED3.   

    Z      (input) REAL array, dimension (N)   
           On entry, Z contains the updating vector (the last   
           row of the first sub-eigenvector matrix and the first row of   
           the second sub-eigenvector matrix).   
           On exit, the contents of Z have been destroyed by the updating   
           process.   

    DLAMDA (output) REAL array, dimension (N)   
           A copy of the first K eigenvalues which will be used by   
           SLAED3 to form the secular equation.   

    W      (output) REAL array, dimension (N)   
           The first k values of the final deflation-altered z-vector   
           which will be passed to SLAED3.   

    Q2     (output) REAL array, dimension (N1**2+(N-N1)**2)   
           A copy of the first K eigenvectors which will be used by   
           SLAED3 in a matrix multiply (SGEMM) to solve for the new   
           eigenvectors.   

    INDX   (workspace) INTEGER array, dimension (N)   
           The permutation used to sort the contents of DLAMDA into   
           ascending order.   

    INDXC  (output) INTEGER array, dimension (N)   
           The permutation used to arrange the columns of the deflated   
           Q matrix into three groups:  the first group contains non-zero   
           elements only at and above N1, the second contains   
           non-zero elements only below N1, and the third is dense.   

    INDXP  (workspace) INTEGER array, dimension (N)   
           The permutation used to place deflated values of D at the end   
           of the array.  INDXP(1:K) points to the nondeflated D-values   
           and INDXP(K+1:N) points to the deflated eigenvalues.   

    COLTYP (workspace/output) INTEGER array, dimension (N)   
           During execution, a label which will indicate which of the   
           following types a column in the Q2 matrix is:   
           1 : non-zero in the upper half only;   
           2 : dense;   
           3 : non-zero in the lower half only;   
           4 : deflated.   
           On exit, COLTYP(i) is the number of columns of type i,   
           for i=1 to 4 only.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   
    Modified by Francoise Tisseur, University of Tennessee.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --indxq;
    --z__;
    --dlamda;
    --w;
    --q2;
    --indx;
    --indxc;
    --indxp;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
        *info = -2;
    } else if (*ldq < f2cmax(1,*n)) {
        *info = -6;
    } else /* if(complicated condition) */ {
/* Computing F2CMIN */
        i__1 = 1, i__2 = *n / 2;
        if (f2cmin(i__1,i__2) > *n1 || *n / 2 < *n1) {
            *info = -3;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED2", &i__1);
        return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    n2 = *n - *n1;
    n1p1 = *n1 + 1;

    if (*rho < 0.f) {
        sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1.  Since z is the concatenation of   
       two normalized vectors, norm2(z) = sqrt(2). */

    t = 1.f / sqrt(2.f);
    sscal_(n, &t, &z__[1], &c__1);

/*     RHO = ABS( norm(z)**2 * RHO ) */

    *rho = (r__1 = *rho * 2.f, dabs(r__1));

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        indxq[i__] += *n1;
/* L10: */
    }

/*     re-integrate the deflated parts from the last pass */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = d__[indxq[i__]];
/* L20: */
    }
    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        indx[i__] = indxq[indxc[i__]];
/* L30: */
    }

/*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z__[1], &c__1);
    jmax = isamax_(n, &d__[1], &c__1);
    eps = slamch_("Epsilon");
/* Computing MAX */
    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
            r__2));
    tol = eps * 8.f * df2cmax(r__3,r__4);

/*     If the rank-1 modifier is small enough, no more needs to be done   
       except to reorganize Q so that its columns correspond with the   
       elements in D. */

    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
        *k = 0;
        iq2 = 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__ = indx[j];
            scopy_(n, &q_ref(1, i__), &c__1, &q2[iq2], &c__1);
            dlamda[j] = d__[i__];
            iq2 += *n;
/* L40: */
        }
        slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
        scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
        goto L190;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here   
       the number of equal eigenvalues are found.  As each equal   
       eigenvalue is found, an elementary reflector is computed to rotate   
       the corresponding eigensubspace so that the corresponding   
       components of Z are zero in this new basis. */

    i__1 = *n1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        coltyp[i__] = 1;
/* L50: */
    }
    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        coltyp[i__] = 3;
/* L60: */
    }


    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        nj = indx[j];
        if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

            --k2;
            coltyp[nj] = 4;
            indxp[k2] = nj;
            if (j == *n) {
                goto L100;
            }
        } else {
            pj = nj;
            goto L80;
        }
/* L70: */
    }
L80:
    ++j;
    nj = indx[j];
    if (j > *n) {
        goto L100;
    }
    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

        --k2;
        coltyp[nj] = 4;
        indxp[k2] = nj;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

        s = z__[pj];
        c__ = z__[nj];

/*        Find sqrt(a**2+b**2) without overflow or   
          destructive underflow. */

        tau = slapy2_(&c__, &s);
        t = d__[nj] - d__[pj];
        c__ /= tau;
        s = -s / tau;
        if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

            z__[nj] = tau;
            z__[pj] = 0.f;
            if (coltyp[nj] != coltyp[pj]) {
                coltyp[nj] = 2;
            }
            coltyp[pj] = 4;
            srot_(n, &q_ref(1, pj), &c__1, &q_ref(1, nj), &c__1, &c__, &s);
/* Computing 2nd power */
            r__1 = c__;
/* Computing 2nd power */
            r__2 = s;
            t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
/* Computing 2nd power */
            r__1 = s;
/* Computing 2nd power */
            r__2 = c__;
            d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
            d__[pj] = t;
            --k2;
            i__ = 1;
L90:
            if (k2 + i__ <= *n) {
                if (d__[pj] < d__[indxp[k2 + i__]]) {
                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
                    indxp[k2 + i__] = pj;
                    ++i__;
                    goto L90;
                } else {
                    indxp[k2 + i__ - 1] = pj;
                }
            } else {
                indxp[k2 + i__ - 1] = pj;
            }
            pj = nj;
        } else {
            ++(*k);
            dlamda[*k] = d__[pj];
            w[*k] = z__[pj];
            indxp[*k] = pj;
            pj = nj;
        }
    }
    goto L80;
L100:

/*     Record the last eigenvalue. */

    ++(*k);
    dlamda[*k] = d__[pj];
    w[*k] = z__[pj];
    indxp[*k] = pj;

/*     Count up the total number of the various types of columns, then   
       form a permutation which positions the four column types into   
       four uniform groups (although one or more of these groups may be   
       empty). */

    for (j = 1; j <= 4; ++j) {
        ctot[j - 1] = 0;
/* L110: */
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        ct = coltyp[j];
        ++ctot[ct - 1];
/* L120: */
    }

/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */

    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    *k = *n - ctot[3];

/*     Fill out the INDXC array so that the permutation which it induces   
       will place all type-1 columns first, all type-2 columns next,   
       then all type-3's, and finally all type-4's. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        js = indxp[j];
        ct = coltyp[js];
        indx[psm[ct - 1]] = js;
        indxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
/* L130: */
    }

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA   
       and Q2 respectively.  The eigenvalues/vectors which were not   
       deflated go into the first K slots of DLAMDA and Q2 respectively,   
       while those which were deflated go into the last N - K slots. */

    i__ = 1;
    iq1 = 1;
    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
    i__1 = ctot[0];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
/* L140: */
    }

    i__1 = ctot[1];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1);
        scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        iq2 += n2;
/* L150: */
    }

    i__1 = ctot[2];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq2 += n2;
/* L160: */
    }

    iq1 = iq2;
    i__1 = ctot[3];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n, &q_ref(1, js), &c__1, &q2[iq2], &c__1);
        iq2 += *n;
        z__[i__] = d__[js];
        ++i__;
/* L170: */
    }

/*     The deflated eigenvalues and their corresponding vectors go back   
       into the last N - K slots of D and Q respectively. */

    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q_ref(1, *k + 1), ldq);
    i__1 = *n - *k;
    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);

/*     Copy CTOT into COLTYP for referencing in SLAED3. */

    for (j = 1; j <= 4; ++j) {
        coltyp[j] = ctot[j - 1];
/* L180: */
    }

L190:
    return 0;

/*     End of SLAED2 */

} /* slaed2_ */
int slaed3_ ( integer k,
integer n,
integer n1,
real d__,
real q,
integer ldq,
real rho,
real dlamda,
real q2,
integer indx,
integer ctot,
real w,
real s,
integer info 
)

Definition at line 15616 of file lapackblas.cpp.

References f2cmax, q_ref, r_sign(), scopy_(), sgemm_(), slacpy_(), slaed4_(), slamc3_(), slaset_(), snrm2_(), sqrt(), and xerbla_().

Referenced by slaed1_().

{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,   
       Courant Institute, NAG Ltd., and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLAED3 finds the roots of the secular equation, as defined by the   
    values in D, W, and RHO, between 1 and K.  It makes the   
    appropriate calls to SLAED4 and then updates the eigenvectors by   
    multiplying the matrix of eigenvectors of the pair of eigensystems   
    being combined by the matrix of eigenvectors of the K-by-K system   
    which is solved here.   

    This code makes very mild assumptions about floating point   
    arithmetic. It will work on machines with a guard digit in   
    add/subtract, or on those binary machines without guard digits   
    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.   
    It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.   

    Arguments   
    =========   

    K       (input) INTEGER   
            The number of terms in the rational function to be solved by   
            SLAED4.  K >= 0.   

    N       (input) INTEGER   
            The number of rows and columns in the Q matrix.   
            N >= K (deflation may result in N>K).   

    N1      (input) INTEGER   
            The location of the last eigenvalue in the leading submatrix.   
            min(1,N) <= N1 <= N/2.   

    D       (output) REAL array, dimension (N)   
            D(I) contains the updated eigenvalues for   
            1 <= I <= K.   

    Q       (output) REAL array, dimension (LDQ,N)   
            Initially the first K columns are used as workspace.   
            On output the columns 1 to K contain   
            the updated eigenvectors.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= max(1,N).   

    RHO     (input) REAL   
            The value of the parameter in the rank one update equation.   
            RHO >= 0 required.   

    DLAMDA  (input/output) REAL array, dimension (K)   
            The first K elements of this array contain the old roots   
            of the deflated updating problem.  These are the poles   
            of the secular equation. May be changed on output by   
            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,   
            Cray-2, or Cray C-90, as described above.   

    Q2      (input) REAL array, dimension (LDQ2, N)   
            The first K columns of this matrix contain the non-deflated   
            eigenvectors for the split problem.   

    INDX    (input) INTEGER array, dimension (N)   
            The permutation used to arrange the columns of the deflated   
            Q matrix into three groups (see SLAED2).   
            The rows of the eigenvectors found by SLAED4 must be likewise   
            permuted before the matrix multiply can take place.   

    CTOT    (input) INTEGER array, dimension (4)   
            A count of the total number of the various types of columns   
            in Q, as described in INDX.  The fourth column type is any   
            column which has been deflated.   

    W       (input/output) REAL array, dimension (K)   
            The first K elements of this array contain the components   
            of the deflation-adjusted updating vector. Destroyed on   
            output.   

    S       (workspace) REAL array, dimension (N1 + 1)*K   
            Will contain the eigenvectors of the repaired matrix which   
            will be multiplied by the previously accumulated eigenvectors   
            to update the system.   

    LDS     (input) INTEGER   
            The leading dimension of S.  LDS >= max(1,K).   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an eigenvalue did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   
    Modified by Francoise Tisseur, University of Tennessee.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b22 = 1.f;
    static real c_b23 = 0.f;
    
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1;
    /* Builtin functions */
    // double sqrt(doublereal), r_sign(real *, real *);
    /* Local variables */
    static real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j;
    extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 
            integer *, real *, real *, integer *, real *, integer *, real *, 
            real *, integer *), scopy_(integer *, real *, 
            integer *, real *, integer *);
    static integer n2;
    extern /* Subroutine */ int slaed4_(integer *, integer *, real *, real *, 
            real *, real *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    static integer n12, ii, n23;
    extern /* Subroutine */ int xerbla_(const char *, integer *), slacpy_(
            const char *, integer *, integer *, real *, integer *, real *, integer *
            ), slaset_(const char *, integer *, integer *, real *, real *, 
            real *, integer *);