#include <cstdio>
#include <cstdlib>
#include "lapackblas.h"
Include dependency graph for lapackblas.cpp:
Go to the source code of this file.
Defines | |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | work_ref(a_1, a_2) work[(a_2)*work_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | v_ref(a_1, a_2) v[(a_2)*v_dim1 + a_1] |
#define | t_ref(a_1, a_2) t[(a_2)*t_dim1 + a_1] |
#define | v_ref(a_1, a_2) v[(a_2)*v_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | stack_ref(a_1, a_2) stack[(a_2)*2 + a_1 - 3] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | w_ref(a_1, a_2) w[(a_2)*w_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
#define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | qstore_ref(a_1, a_2) qstore[(a_2)*qstore_dim1 + a_1] |
#define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
#define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | q2_ref(a_1, a_2) q2[(a_2)*q2_dim1 + a_1] |
#define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
#define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | s_ref(a_1, a_2) s[(a_2)*s_dim1 + a_1] |
#define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
#define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
#define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | u_ref(a_1, a_2) u[(a_2)*u_dim1 + a_1] |
#define | vt_ref(a_1, a_2) vt[(a_2)*vt_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | u_ref(a_1, a_2) u[(a_2)*u_dim1 + a_1] |
#define | vt_ref(a_1, a_2) vt[(a_2)*vt_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | x_ref(a_1, a_2) x[(a_2)*x_dim1 + a_1] |
#define | y_ref(a_1, a_2) y[(a_2)*y_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
#define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
#define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
Functions | |
int | s_cat (char *lp, const char **rpp, integer *rnp, integer *np, ftnlen ll) |
integer | ieeeck_ (integer *ispec, real *zero, real *one) |
integer | ilaenv_ (integer *ispec, const char *name__, const char *, integer *n1, integer *n2, integer *, integer *n4, ftnlen name_len, ftnlen) |
logical | lsame_ (const char *ca, const char *cb) |
double | pow_ri (real *ap, integer *bp) |
integer | pow_ii (integer *ap, integer *bp) |
double | r_sign (real *a, real *b) |
int | saxpy_ (integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy) |
integer | s_cmp (char *a0, const char *b0, ftnlen la, ftnlen lb) |
void | s_copy (char *a, const char *b, ftnlen la, ftnlen lb) |
int | scopy_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
doublereal | sdot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
int | sgemm_ (const char *transa, const char *transb, integer *m, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc) |
int | sgemv_ (const char *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy) |
int | sger_ (integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) |
int | slae2_ (real *a, real *b, real *c__, real *rt1, real *rt2) |
int | slaev2_ (real *a, real *b, real *c__, real *rt1, real *rt2, real *cs1, real *sn1) |
doublereal | slamch_ (const char *cmach) |
int | slamc1_ (integer *beta, integer *t, logical *rnd, logical *ieee1) |
int | slamc2_ (integer *beta, integer *t, logical *rnd, real *eps, integer *emin, real *rmin, integer *emax, real *rmax) |
doublereal | slamc3_ (real *a, real *b) |
int | slamc4_ (integer *emin, real *start, integer *base) |
int | slamc5_ (integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax) |
doublereal | slanst_ (const char *norm, integer *n, real *d__, real *e) |
doublereal | slansy_ (const char *norm, char *uplo, integer *n, real *a, integer *lda, real *work) |
doublereal | slapy2_ (real *x, real *y) |
int | slarfb_ (const char *side, const char *trans, const char *direct, const char *storev, integer *m, integer *n, integer *k, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *ldwork) |
int | slarf_ (const char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work) |
int | slarfg_ (integer *n, real *alpha, real *x, integer *incx, real *tau) |
int | slarft_ (const char *direct, const char *storev, integer *n, integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt) |
int | slartg_ (real *f, real *g, real *cs, real *sn, real *r__) |
int | slascl_ (const char *type__, integer *kl, integer *ku, real *cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, integer *info) |
int | slaset_ (const char *uplo, integer *m, integer *n, real *alpha, real *beta, real *a, integer *lda) |
int | slasr_ (const char *side, const char *pivot, const char *direct, integer *m, integer *n, real *c__, real *s, real *a, integer *lda) |
int | slasrt_ (const char *id, integer *n, real *d__, integer *info) |
int | slassq_ (integer *n, real *x, integer *incx, real *scale, real *sumsq) |
int | slatrd_ (char *uplo, integer *n, integer *nb, real *a, integer *lda, real *e, real *tau, real *w, integer *ldw) |
doublereal | snrm2_ (integer *n, real *x, integer *incx) |
int | sorg2l_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
int | sorg2r_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
int | sorgql_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | sorgqr_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | sorgtr_ (char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | sscal_ (integer *n, real *sa, real *sx, integer *incx) |
int | ssteqr_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info) |
int | ssterf_ (integer *n, real *d__, real *e, integer *info) |
int | sswap_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
int | ssyev_ (char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *info) |
int | ssymv_ (const char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy) |
int | ssyr2_ (char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) |
int | ssyr2k_ (char *uplo, const char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc) |
int | ssytd2_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info) |
int | ssytrd_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, real *work, integer *lwork, integer *info) |
int | strmm_ (const char *side, const char *uplo, const char *transa, const char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb) |
int | strmv_ (const char *uplo, const char *trans, const char *diag, integer *n, real *a, integer *lda, real *x, integer *incx) |
int | xerbla_ (const char *srname, integer *info) |
int | sstedc_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) |
int | sstevd_ (char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) |
int | slaed0_ (integer *icompq, integer *qsiz, integer *n, real *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, real *work, integer *iwork, integer *info) |
int | slaed7_ (integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *qstore, integer *qptr, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *work, integer *iwork, integer *info) |
int | slaed1_ (integer *n, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *work, integer *iwork, integer *info) |
int | slacpy_ (const char *uplo, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb) |
int | slamrg_ (integer *n1, integer *n2, real *a, integer *strd1, integer *strd2, integer *index) |
int | slaed8_ (integer *icompq, integer *k, integer *n, integer *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *indxp, integer *indx, integer *info) |
int | slaed2_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info) |
int | slaed9_ (integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info) |
integer | isamax_ (integer *n, real *sx, integer *incx) |
int | srot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c__, real *s) |
int | slaed4_ (integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam, integer *info) |
int | slaeda_ (integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info) |
int | slaed3_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *indx, integer *ctot, real *w, real *s, integer *info) |
int | slaed6_ (integer *kniter, logical *orgati, real *rho, real *d__, real *z__, real *finit, real *tau, integer *info) |
int | slaed5_ (integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam) |
int | sgesvd_ (char *jobu, char *jobvt, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *info) |
int | sorgl2_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
int | sorglq_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
doublereal | slange_ (const char *norm, integer *m, integer *n, real *a, integer *lda, real *work) |
int | sgebrd_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *lwork, integer *info) |
int | sgebd2_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *info) |
int | sormbr_ (const char *vect, const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
int | sgelqf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | sormlq_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
int | sormqr_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
int | sgelq2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info) |
int | sbdsqr_ (const char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) |
int | sgeqrf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | sorml2_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info) |
int | slabrd_ (integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy) |
int | sgeqr2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info) |
int | sorm2r_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info) |
int | sorgbr_ (const char *vect, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
int | slasq1_ (integer *n, real *d__, real *e, real *work, integer *info) |
int | slasq2_ (integer *n, real *z__, integer *info) |
int | slasq3_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee) |
int | slasq4_ (integer *i0, integer *n0, real *z__, integer *pp, integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *tau, integer *ttype) |
int | slasq5_ (integer *i0, integer *n0, real *z__, integer *pp, real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2, logical *ieee) |
int | slasq6_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2) |
int | slasv2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax, real *snr, real *csr, real *snl, real *csl) |
int | slas2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax) |
Variables | |
static real | c_b3 = -1.f |
static integer | c__1 = 1 |
static integer | c__6 = 6 |
static integer | c__0 = 0 |
static integer | c__2 = 2 |
static integer | c_n1 = -1 |
static real | c_b416 = 0.f |
static real | c_b438 = 1.f |
static doublereal | c_b15 = -.125 |
static real | c_b49 = 1.f |
static real | c_b72 = -1.f |
static integer | c__10 = 10 |
static integer | c__3 = 3 |
static integer | c__4 = 4 |
static integer | c__11 = 11 |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
#define a_ref | ( | a_1, | |||
a_2 | ) | a[(a_2)*a_dim1 + a_1] |
Referenced by sgebd2_(), sgebrd_(), sgelq2_(), sgelqf_(), sgemm_(), sgemv_(), sgeqr2_(), sgeqrf_(), sger_(), sgesvd_(), slabrd_(), slacpy_(), slange_(), slansy_(), slascl_(), slaset_(), slasr_(), slatrd_(), sorg2l_(), sorg2r_(), sorgbr_(), sorgl2_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_().
#define b_ref | ( | a_1, | |||
a_2 | ) | b[(a_2)*b_dim1 + a_1] |
#define b_ref | ( | a_1, | |||
a_2 | ) | b[(a_2)*b_dim1 + a_1] |
#define b_ref | ( | a_1, | |||
a_2 | ) | b[(a_2)*b_dim1 + a_1] |
#define b_ref | ( | a_1, | |||
a_2 | ) | b[(a_2)*b_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define c___ref | ( | a_1, | |||
a_2 | ) | c__[(a_2)*c_dim1 + a_1] |
#define givcol_ref | ( | a_1, | |||
a_2 | ) | givcol[(a_2)*2 + a_1] |
#define givcol_ref | ( | a_1, | |||
a_2 | ) | givcol[(a_2)*2 + a_1] |
#define givcol_ref | ( | a_1, | |||
a_2 | ) | givcol[(a_2)*2 + a_1] |
#define givnum_ref | ( | a_1, | |||
a_2 | ) | givnum[(a_2)*2 + a_1] |
#define givnum_ref | ( | a_1, | |||
a_2 | ) | givnum[(a_2)*2 + a_1] |
#define givnum_ref | ( | a_1, | |||
a_2 | ) | givnum[(a_2)*2 + a_1] |
#define q2_ref | ( | a_1, | |||
a_2 | ) | q2[(a_2)*q2_dim1 + a_1] |
Referenced by slaed8_().
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define q_ref | ( | a_1, | |||
a_2 | ) | q[(a_2)*q_dim1 + a_1] |
#define qstore_ref | ( | a_1, | |||
a_2 | ) | qstore[(a_2)*qstore_dim1 + a_1] |
Referenced by slaed0_().
#define s_ref | ( | a_1, | |||
a_2 | ) | s[(a_2)*s_dim1 + a_1] |
Referenced by slaed9_().
#define stack_ref | ( | a_1, | |||
a_2 | ) | stack[(a_2)*2 + a_1 - 3] |
Referenced by slasrt_().
#define t_ref | ( | a_1, | |||
a_2 | ) | t[(a_2)*t_dim1 + a_1] |
Referenced by slarft_().
#define u_ref | ( | a_1, | |||
a_2 | ) | u[(a_2)*u_dim1 + a_1] |
#define v_ref | ( | a_1, | |||
a_2 | ) | v[(a_2)*v_dim1 + a_1] |
#define vt_ref | ( | a_1, | |||
a_2 | ) | vt[(a_2)*vt_dim1 + a_1] |
#define w_ref | ( | a_1, | |||
a_2 | ) | w[(a_2)*w_dim1 + a_1] |
Referenced by slatrd_().
#define work_ref | ( | a_1, | |||
a_2 | ) | work[(a_2)*work_dim1 + a_1] |
Referenced by slarfb_().
#define x_ref | ( | a_1, | |||
a_2 | ) | x[(a_2)*x_dim1 + a_1] |
Referenced by slabrd_().
#define y_ref | ( | a_1, | |||
a_2 | ) | y[(a_2)*y_dim1 + a_1] |
Referenced by slabrd_().
#define z___ref | ( | a_1, | |||
a_2 | ) | z__[(a_2)*z_dim1 + a_1] |
#define z___ref | ( | a_1, | |||
a_2 | ) | z__[(a_2)*z_dim1 + a_1] |
#define z___ref | ( | a_1, | |||
a_2 | ) | z__[(a_2)*z_dim1 + a_1] |
Definition at line 56 of file lapackblas.cpp.
References integer.
Referenced by ilaenv_().
00057 { 00058 /* -- LAPACK auxiliary routine (version 3.0) -- 00059 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00060 Courant Institute, Argonne National Lab, and Rice University 00061 June 30, 1998 00062 00063 00064 Purpose 00065 ======= 00066 00067 IEEECK is called from the ILAENV to verify that Infinity and 00068 possibly NaN arithmetic is safe (i.e. will not trap). 00069 00070 Arguments 00071 ========= 00072 00073 ISPEC (input) INTEGER 00074 Specifies whether to test just for inifinity arithmetic 00075 or whether to test for infinity and NaN arithmetic. 00076 = 0: Verify infinity arithmetic only. 00077 = 1: Verify infinity and NaN arithmetic. 00078 00079 ZERO (input) REAL 00080 Must contain the value 0.0 00081 This is passed to prevent the compiler from optimizing 00082 away this code. 00083 00084 ONE (input) REAL 00085 Must contain the value 1.0 00086 This is passed to prevent the compiler from optimizing 00087 away this code. 00088 00089 RETURN VALUE: INTEGER 00090 = 0: Arithmetic failed to produce the correct answers 00091 = 1: Arithmetic produced the correct answers */ 00092 /* System generated locals */ 00093 integer ret_val; 00094 /* Local variables */ 00095 static real neginf, posinf, negzro, newzro, nan1, nan2, nan3, nan4, nan5, 00096 nan6; 00097 00098 00099 ret_val = 1; 00100 00101 posinf = *one / *zero; 00102 if (posinf <= *one) { 00103 ret_val = 0; 00104 return ret_val; 00105 } 00106 00107 neginf = -(*one) / *zero; 00108 if (neginf >= *zero) { 00109 ret_val = 0; 00110 return ret_val; 00111 } 00112 00113 negzro = *one / (neginf + *one); 00114 if (negzro != *zero) { 00115 ret_val = 0; 00116 return ret_val; 00117 } 00118 00119 neginf = *one / negzro; 00120 if (neginf >= *zero) { 00121 ret_val = 0; 00122 return ret_val; 00123 } 00124 00125 newzro = negzro + *zero; 00126 if (newzro != *zero) { 00127 ret_val = 0; 00128 return ret_val; 00129 } 00130 00131 posinf = *one / newzro; 00132 if (posinf <= *one) { 00133 ret_val = 0; 00134 return ret_val; 00135 } 00136 00137 neginf *= posinf; 00138 if (neginf >= *zero) { 00139 ret_val = 0; 00140 return ret_val; 00141 } 00142 00143 posinf *= posinf; 00144 if (posinf <= *one) { 00145 ret_val = 0; 00146 return ret_val; 00147 } 00148 00149 00150 00151 00152 /* Return if we were only asked to check infinity arithmetic */ 00153 00154 if (*ispec == 0) { 00155 return ret_val; 00156 } 00157 00158 nan1 = posinf + neginf; 00159 00160 nan2 = posinf / neginf; 00161 00162 nan3 = posinf / posinf; 00163 00164 nan4 = posinf * *zero; 00165 00166 nan5 = neginf * negzro; 00167 00168 nan6 = nan5 * 0.f; 00169 00170 if (nan1 == nan1) { 00171 ret_val = 0; 00172 return ret_val; 00173 } 00174 00175 if (nan2 == nan2) { 00176 ret_val = 0; 00177 return ret_val; 00178 } 00179 00180 if (nan3 == nan3) { 00181 ret_val = 0; 00182 return ret_val; 00183 } 00184 00185 if (nan4 == nan4) { 00186 ret_val = 0; 00187 return ret_val; 00188 } 00189 00190 if (nan5 == nan5) { 00191 ret_val = 0; 00192 return ret_val; 00193 } 00194 00195 if (nan6 == nan6) { 00196 ret_val = 0; 00197 return ret_val; 00198 } 00199 00200 return ret_val; 00201 } /* ieeeck_ */
integer ilaenv_ | ( | integer * | ispec, | |
const char * | name__, | |||
const char * | , | |||
integer * | n1, | |||
integer * | n2, | |||
integer * | , | |||
integer * | n4, | |||
ftnlen | name_len, | |||
ftnlen | ||||
) |
Definition at line 206 of file lapackblas.cpp.
References c__0, c__1, f2cmin, ieeeck_(), integer, nx, s_cmp(), and s_copy().
Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), sgesvd_(), slaed0_(), slasq2_(), sorgbr_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sormbr_(), sormlq_(), sormqr_(), sstedc_(), ssyev_(), and ssytrd_().
00208 { 00209 /* -- LAPACK auxiliary routine (version 3.0) -- 00210 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00211 Courant Institute, Argonne National Lab, and Rice University 00212 June 30, 1999 00213 00214 00215 Purpose 00216 ======= 00217 00218 ILAENV is called from the LAPACK routines to choose problem-dependent 00219 parameters for the local environment. See ISPEC for a description of 00220 the parameters. 00221 00222 This version provides a set of parameters which should give good, 00223 but not optimal, performance on many of the currently available 00224 computers. Users are encouraged to modify this subroutine to set 00225 the tuning parameters for their particular machine using the option 00226 and problem size information in the arguments. 00227 00228 This routine will not function correctly if it is converted to all 00229 lower case. Converting it to all upper case is allowed. 00230 00231 Arguments 00232 ========= 00233 00234 ISPEC (input) INTEGER 00235 Specifies the parameter to be returned as the value of 00236 ILAENV. 00237 = 1: the optimal blocksize; if this value is 1, an unblocked 00238 algorithm will give the best performance. 00239 = 2: the minimum block size for which the block routine 00240 should be used; if the usable block size is less than 00241 this value, an unblocked routine should be used. 00242 = 3: the crossover point (in a block routine, for N less 00243 than this value, an unblocked routine should be used) 00244 = 4: the number of shifts, used in the nonsymmetric 00245 eigenvalue routines 00246 = 5: the MINIMUM Column dimension for blocking to be used; 00247 rectangular blocks must have dimension at least k by m, 00248 where k is given by ILAENV(2,...) and m by ILAENV(5,...) 00249 = 6: the crossover point for the SVD (when reducing an m by n 00250 matrix to bidiagonal form, if f2cmax(m,n)/min(m,n) exceeds 00251 this value, a QR factorization is used first to reduce 00252 the matrix to a triangular form.) 00253 = 7: the number of processors 00254 = 8: the crossover point for the multishift QR and QZ methods 00255 for nonsymmetric eigenvalue problems. 00256 = 9: maximum size of the subproblems at the bottom of the 00257 computation tree in the divide-and-conquer algorithm 00258 (used by xGELSD and xGESDD) 00259 =10: ieee NaN arithmetic can be trusted not to trap 00260 =11: infinity arithmetic can be trusted not to trap 00261 00262 NAME (input) CHARACTER*(*) 00263 The name of the calling subroutine, in either upper case or 00264 lower case. 00265 00266 OPTS (input) CHARACTER*(*) 00267 The character options to the subroutine NAME, concatenated 00268 into a single character string. For example, UPLO = 'U', 00269 TRANS = 'T', and DIAG = 'N' for a triangular routine would 00270 be specified as OPTS = 'UTN'. 00271 00272 N1 (input) INTEGER 00273 N2 (input) INTEGER 00274 N3 (input) INTEGER 00275 N4 (input) INTEGER 00276 Problem dimensions for the subroutine NAME; these may not all 00277 be required. 00278 00279 (ILAENV) (output) INTEGER 00280 >= 0: the value of the parameter specified by ISPEC 00281 < 0: if ILAENV = -k, the k-th argument had an illegal value. 00282 00283 Further Details 00284 =============== 00285 00286 The following conventions have been used when calling ILAENV from the 00287 LAPACK routines: 00288 1) OPTS is a concatenation of all of the character options to 00289 subroutine NAME, in the same order that they appear in the 00290 argument list for NAME, even if they are not used in determining 00291 the value of the parameter specified by ISPEC. 00292 2) The problem dimensions N1, N2, N3, N4 are specified in the order 00293 that they appear in the argument list for NAME. N1 is used 00294 first, N2 second, and so on, and unused problem dimensions are 00295 passed a value of -1. 00296 3) The parameter value returned by ILAENV is checked for validity in 00297 the calling subroutine. For example, ILAENV is used to retrieve 00298 the optimal blocksize for STRTRI as follows: 00299 00300 NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) 00301 IF( NB.LE.1 ) NB = MAX( 1, N ) 00302 00303 ===================================================================== */ 00304 /* Table of constant values */ 00305 static integer c__0 = 0; 00306 static real c_b162 = 0.f; 00307 static real c_b163 = 1.f; 00308 static integer c__1 = 1; 00309 00310 /* System generated locals */ 00311 integer ret_val; 00312 /* Builtin functions 00313 Subroutine */ void s_copy(char *, const char *, ftnlen, ftnlen); 00314 integer s_cmp(char *, const char *, ftnlen, ftnlen); 00315 /* Local variables */ 00316 static integer i__; 00317 static logical cname, sname; 00318 static integer nbmin; 00319 static char c1[1], c2[2], c3[3], c4[2]; 00320 static integer ic, nb; 00321 extern integer ieeeck_(integer *, real *, real *); 00322 static integer iz, nx; 00323 static char subnam[6]; 00324 00325 00326 00327 00328 switch (*ispec) { 00329 case 1: goto L100; 00330 case 2: goto L100; 00331 case 3: goto L100; 00332 case 4: goto L400; 00333 case 5: goto L500; 00334 case 6: goto L600; 00335 case 7: goto L700; 00336 case 8: goto L800; 00337 case 9: goto L900; 00338 case 10: goto L1000; 00339 case 11: goto L1100; 00340 } 00341 00342 /* Invalid value for ISPEC */ 00343 00344 ret_val = -1; 00345 return ret_val; 00346 00347 L100: 00348 00349 /* Convert NAME to upper case if the first character is lower case. */ 00350 00351 ret_val = 1; 00352 s_copy(subnam, name__, (ftnlen)6, name_len); 00353 ic = *(unsigned char *)subnam; 00354 iz = 'Z'; 00355 if (iz == 90 || iz == 122) { 00356 00357 /* ASCII character set */ 00358 00359 if (ic >= 97 && ic <= 122) { 00360 *(unsigned char *)subnam = (char) (ic - 32); 00361 for (i__ = 2; i__ <= 6; ++i__) { 00362 ic = *(unsigned char *)&subnam[i__ - 1]; 00363 if (ic >= 97 && ic <= 122) { 00364 *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); 00365 } 00366 /* L10: */ 00367 } 00368 } 00369 00370 } else if (iz == 233 || iz == 169) { 00371 00372 /* EBCDIC character set */ 00373 00374 if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && 00375 ic <= 169) { 00376 *(unsigned char *)subnam = (char) (ic + 64); 00377 for (i__ = 2; i__ <= 6; ++i__) { 00378 ic = *(unsigned char *)&subnam[i__ - 1]; 00379 if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 00380 162 && ic <= 169) { 00381 *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); 00382 } 00383 /* L20: */ 00384 } 00385 } 00386 00387 } else if (iz == 218 || iz == 250) { 00388 00389 /* Prime machines: ASCII+128 */ 00390 00391 if (ic >= 225 && ic <= 250) { 00392 *(unsigned char *)subnam = (char) (ic - 32); 00393 for (i__ = 2; i__ <= 6; ++i__) { 00394 ic = *(unsigned char *)&subnam[i__ - 1]; 00395 if (ic >= 225 && ic <= 250) { 00396 *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); 00397 } 00398 /* L30: */ 00399 } 00400 } 00401 } 00402 00403 *(unsigned char *)c1 = *(unsigned char *)subnam; 00404 sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; 00405 cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; 00406 if (! (cname || sname)) { 00407 return ret_val; 00408 } 00409 s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); 00410 s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); 00411 s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); 00412 00413 switch (*ispec) { 00414 case 1: goto L110; 00415 case 2: goto L200; 00416 case 3: goto L300; 00417 } 00418 00419 L110: 00420 00421 /* ISPEC = 1: block size 00422 00423 In these examples, separate code is provided for setting NB for 00424 real and complex. We assume that NB will take the same value in 00425 single or double precision. */ 00426 00427 nb = 1; 00428 00429 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { 00430 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00431 if (sname) { 00432 nb = 64; 00433 } else { 00434 nb = 64; 00435 } 00436 } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, 00437 "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) 00438 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) 00439 == 0) { 00440 if (sname) { 00441 nb = 32; 00442 } else { 00443 nb = 32; 00444 } 00445 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { 00446 if (sname) { 00447 nb = 32; 00448 } else { 00449 nb = 32; 00450 } 00451 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { 00452 if (sname) { 00453 nb = 32; 00454 } else { 00455 nb = 32; 00456 } 00457 } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { 00458 if (sname) { 00459 nb = 64; 00460 } else { 00461 nb = 64; 00462 } 00463 } 00464 } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { 00465 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00466 if (sname) { 00467 nb = 64; 00468 } else { 00469 nb = 64; 00470 } 00471 } 00472 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { 00473 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00474 if (sname) { 00475 nb = 64; 00476 } else { 00477 nb = 64; 00478 } 00479 } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00480 nb = 32; 00481 } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { 00482 nb = 64; 00483 } 00484 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { 00485 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00486 nb = 64; 00487 } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00488 nb = 32; 00489 } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { 00490 nb = 64; 00491 } 00492 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { 00493 if (*(unsigned char *)c3 == 'G') { 00494 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00495 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00496 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00497 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00498 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00499 ftnlen)2, (ftnlen)2) == 0) { 00500 nb = 32; 00501 } 00502 } else if (*(unsigned char *)c3 == 'M') { 00503 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00504 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00505 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00506 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00507 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00508 ftnlen)2, (ftnlen)2) == 0) { 00509 nb = 32; 00510 } 00511 } 00512 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { 00513 if (*(unsigned char *)c3 == 'G') { 00514 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00515 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00516 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00517 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00518 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00519 ftnlen)2, (ftnlen)2) == 0) { 00520 nb = 32; 00521 } 00522 } else if (*(unsigned char *)c3 == 'M') { 00523 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00524 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00525 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00526 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00527 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00528 ftnlen)2, (ftnlen)2) == 0) { 00529 nb = 32; 00530 } 00531 } 00532 } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { 00533 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00534 if (sname) { 00535 if (*n4 <= 64) { 00536 nb = 1; 00537 } else { 00538 nb = 32; 00539 } 00540 } else { 00541 if (*n4 <= 64) { 00542 nb = 1; 00543 } else { 00544 nb = 32; 00545 } 00546 } 00547 } 00548 } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { 00549 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00550 if (sname) { 00551 if (*n2 <= 64) { 00552 nb = 1; 00553 } else { 00554 nb = 32; 00555 } 00556 } else { 00557 if (*n2 <= 64) { 00558 nb = 1; 00559 } else { 00560 nb = 32; 00561 } 00562 } 00563 } 00564 } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { 00565 if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { 00566 if (sname) { 00567 nb = 64; 00568 } else { 00569 nb = 64; 00570 } 00571 } 00572 } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { 00573 if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { 00574 if (sname) { 00575 nb = 64; 00576 } else { 00577 nb = 64; 00578 } 00579 } 00580 } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { 00581 if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { 00582 nb = 1; 00583 } 00584 } 00585 ret_val = nb; 00586 return ret_val; 00587 00588 L200: 00589 00590 /* ISPEC = 2: minimum block size */ 00591 00592 nbmin = 2; 00593 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { 00594 if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( 00595 ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( 00596 ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) 00597 { 00598 if (sname) { 00599 nbmin = 2; 00600 } else { 00601 nbmin = 2; 00602 } 00603 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { 00604 if (sname) { 00605 nbmin = 2; 00606 } else { 00607 nbmin = 2; 00608 } 00609 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { 00610 if (sname) { 00611 nbmin = 2; 00612 } else { 00613 nbmin = 2; 00614 } 00615 } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { 00616 if (sname) { 00617 nbmin = 2; 00618 } else { 00619 nbmin = 2; 00620 } 00621 } 00622 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { 00623 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { 00624 if (sname) { 00625 nbmin = 8; 00626 } else { 00627 nbmin = 8; 00628 } 00629 } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00630 nbmin = 2; 00631 } 00632 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { 00633 if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00634 nbmin = 2; 00635 } 00636 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { 00637 if (*(unsigned char *)c3 == 'G') { 00638 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00639 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00640 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00641 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00642 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00643 ftnlen)2, (ftnlen)2) == 0) { 00644 nbmin = 2; 00645 } 00646 } else if (*(unsigned char *)c3 == 'M') { 00647 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00648 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00649 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00650 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00651 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00652 ftnlen)2, (ftnlen)2) == 0) { 00653 nbmin = 2; 00654 } 00655 } 00656 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { 00657 if (*(unsigned char *)c3 == 'G') { 00658 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00659 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00660 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00661 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00662 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00663 ftnlen)2, (ftnlen)2) == 0) { 00664 nbmin = 2; 00665 } 00666 } else if (*(unsigned char *)c3 == 'M') { 00667 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00668 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00669 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00670 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00671 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00672 ftnlen)2, (ftnlen)2) == 0) { 00673 nbmin = 2; 00674 } 00675 } 00676 } 00677 ret_val = nbmin; 00678 return ret_val; 00679 00680 L300: 00681 00682 /* ISPEC = 3: crossover point */ 00683 00684 nx = 0; 00685 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { 00686 if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( 00687 ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( 00688 ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) 00689 { 00690 if (sname) { 00691 nx = 128; 00692 } else { 00693 nx = 128; 00694 } 00695 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { 00696 if (sname) { 00697 nx = 128; 00698 } else { 00699 nx = 128; 00700 } 00701 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { 00702 if (sname) { 00703 nx = 128; 00704 } else { 00705 nx = 128; 00706 } 00707 } 00708 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { 00709 if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00710 nx = 32; 00711 } 00712 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { 00713 if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { 00714 nx = 32; 00715 } 00716 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { 00717 if (*(unsigned char *)c3 == 'G') { 00718 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00719 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00720 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00721 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00722 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00723 ftnlen)2, (ftnlen)2) == 0) { 00724 nx = 128; 00725 } 00726 } 00727 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { 00728 if (*(unsigned char *)c3 == 'G') { 00729 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 00730 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( 00731 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == 00732 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( 00733 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( 00734 ftnlen)2, (ftnlen)2) == 0) { 00735 nx = 128; 00736 } 00737 } 00738 } 00739 ret_val = nx; 00740 return ret_val; 00741 00742 L400: 00743 00744 /* ISPEC = 4: number of shifts (used by xHSEQR) */ 00745 00746 ret_val = 6; 00747 return ret_val; 00748 00749 L500: 00750 00751 /* ISPEC = 5: minimum column dimension (not used) */ 00752 00753 ret_val = 2; 00754 return ret_val; 00755 00756 L600: 00757 00758 /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ 00759 00760 ret_val = (integer) ((real) f2cmin(*n1,*n2) * 1.6f); 00761 return ret_val; 00762 00763 L700: 00764 00765 /* ISPEC = 7: number of processors (not used) */ 00766 00767 ret_val = 1; 00768 return ret_val; 00769 00770 L800: 00771 00772 /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ 00773 00774 ret_val = 50; 00775 return ret_val; 00776 00777 L900: 00778 00779 /* ISPEC = 9: maximum size of the subproblems at the bottom of the 00780 computation tree in the divide-and-conquer algorithm 00781 (used by xGELSD and xGESDD) */ 00782 00783 ret_val = 25; 00784 return ret_val; 00785 00786 L1000: 00787 00788 /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap 00789 00790 ILAENV = 0 */ 00791 ret_val = 1; 00792 if (ret_val == 1) { 00793 ret_val = ieeeck_(&c__0, &c_b162, &c_b163); 00794 } 00795 return ret_val; 00796 00797 L1100: 00798 00799 /* ISPEC = 11: infinity arithmetic can be trusted not to trap 00800 00801 ILAENV = 0 */ 00802 ret_val = 1; 00803 if (ret_val == 1) { 00804 ret_val = ieeeck_(&c__1, &c_b162, &c_b163); 00805 } 00806 return ret_val; 00807 00808 /* End of ILAENV */ 00809 00810 } /* ilaenv_ */
Definition at line 14323 of file lapackblas.cpp.
Referenced by slaed2_(), and slaed8_().
14324 { 14325 /* System generated locals */ 14326 integer ret_val, i__1; 14327 real r__1; 14328 /* Local variables */ 14329 static real smax; 14330 static integer i__, ix; 14331 /* finds the index of element having max. absolute value. 14332 jack dongarra, linpack, 3/11/78. 14333 modified 3/93 to return if incx .le. 0. 14334 modified 12/3/93, array(1) declarations changed to array(*) 14335 Parameter adjustments */ 14336 --sx; 14337 /* Function Body */ 14338 ret_val = 0; 14339 if (*n < 1 || *incx <= 0) { 14340 return ret_val; 14341 } 14342 ret_val = 1; 14343 if (*n == 1) { 14344 return ret_val; 14345 } 14346 if (*incx == 1) { 14347 goto L20; 14348 } 14349 /* code for increment not equal to 1 */ 14350 ix = 1; 14351 smax = dabs(sx[1]); 14352 ix += *incx; 14353 i__1 = *n; 14354 for (i__ = 2; i__ <= i__1; ++i__) { 14355 if ((r__1 = sx[ix], dabs(r__1)) <= smax) { 14356 goto L5; 14357 } 14358 ret_val = i__; 14359 smax = (r__1 = sx[ix], dabs(r__1)); 14360 L5: 14361 ix += *incx; 14362 /* L10: */ 14363 } 14364 return ret_val; 14365 /* code for increment equal to 1 */ 14366 L20: 14367 smax = dabs(sx[1]); 14368 i__1 = *n; 14369 for (i__ = 2; i__ <= i__1; ++i__) { 14370 if ((r__1 = sx[i__], dabs(r__1)) <= smax) { 14371 goto L30; 14372 } 14373 ret_val = i__; 14374 smax = (r__1 = sx[i__], dabs(r__1)); 14375 L30: 14376 ; 14377 } 14378 return ret_val; 14379 } /* isamax_ */
logical lsame_ | ( | const char * | ca, | |
const char * | cb | |||
) |
Definition at line 814 of file lapackblas.cpp.
References integer.
Referenced by sbdsqr_(), sgemm_(), sgemv_(), sgesvd_(), slacpy_(), slamch_(), slange_(), slanst_(), slansy_(), slarf_(), slarfb_(), slarft_(), slascl_(), slaset_(), slasr_(), slasrt_(), slatrd_(), sorgbr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), sstevd_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_().
00815 { 00816 /* -- LAPACK auxiliary routine (version 3.0) -- 00817 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00818 Courant Institute, Argonne National Lab, and Rice University 00819 September 30, 1994 00820 00821 00822 Purpose 00823 ======= 00824 00825 LSAME returns .TRUE. if CA is the same letter as CB regardless of 00826 case. 00827 00828 Arguments 00829 ========= 00830 00831 CA (input) CHARACTER*1 00832 CB (input) CHARACTER*1 00833 CA and CB specify the single characters to be compared. 00834 00835 ===================================================================== 00836 00837 00838 00839 Test if the characters are equal */ 00840 /* System generated locals */ 00841 logical ret_val; 00842 /* Local variables */ 00843 static integer inta, intb, zcode; 00844 00845 00846 ret_val = *(unsigned char *)ca == *(unsigned char *)cb; 00847 if (ret_val) { 00848 return ret_val; 00849 } 00850 00851 /* Now test for equivalence if both characters are alphabetic. */ 00852 00853 zcode = 'Z'; 00854 00855 /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime 00856 machines, on which ICHAR returns a value with bit 8 set. 00857 ICHAR('A') on Prime machines returns 193 which is the same as 00858 ICHAR('A') on an EBCDIC machine. */ 00859 00860 inta = *(unsigned char *)ca; 00861 intb = *(unsigned char *)cb; 00862 00863 if (zcode == 90 || zcode == 122) { 00864 00865 /* ASCII is assumed - ZCODE is the ASCII code of either lower o 00866 r 00867 upper case 'Z'. */ 00868 00869 if (inta >= 97 && inta <= 122) { 00870 inta += -32; 00871 } 00872 if (intb >= 97 && intb <= 122) { 00873 intb += -32; 00874 } 00875 00876 } else if (zcode == 233 || zcode == 169) { 00877 00878 /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower 00879 or 00880 upper case 'Z'. */ 00881 00882 if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 00883 >= 162 && inta <= 169) { 00884 inta += 64; 00885 } 00886 if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 00887 >= 162 && intb <= 169) { 00888 intb += 64; 00889 } 00890 00891 } else if (zcode == 218 || zcode == 250) { 00892 00893 /* ASCII is assumed, on Prime machines - ZCODE is the ASCII cod 00894 e 00895 plus 128 of either lower or upper case 'Z'. */ 00896 00897 if (inta >= 225 && inta <= 250) { 00898 inta += -32; 00899 } 00900 if (intb >= 225 && intb <= 250) { 00901 intb += -32; 00902 } 00903 } 00904 ret_val = inta == intb; 00905 00906 /* RETURN 00907 00908 End of LSAME */ 00909 00910 return ret_val; 00911 } /* lsame_ */
Definition at line 952 of file lapackblas.cpp.
Referenced by slaed0_(), slaed7_(), slaeda_(), and sstedc_().
00954 { 00955 integer pow, x, n; 00956 unsigned long u; 00957 00958 x = *ap; 00959 n = *bp; 00960 00961 if (n <= 0) { 00962 if (n == 0 || x == 1) 00963 return 1; 00964 if (x != -1) 00965 return x != 0 ? 1/x : 0; 00966 n = -n; 00967 } 00968 u = n; 00969 for(pow = 1; ; ) 00970 { 00971 if(u & 01) 00972 pow *= x; 00973 if(u >>= 1) 00974 x *= x; 00975 else 00976 break; 00977 } 00978 return(pow); 00979 }
double pow_ri | ( | real * | ap, | |
integer * | bp | |||
) |
Definition at line 918 of file lapackblas.cpp.
Referenced by slaed6_(), slamc2_(), slamch_(), and slartg_().
00920 { 00921 double pow, x; 00922 integer n; 00923 unsigned long u; 00924 00925 pow = 1; 00926 x = *ap; 00927 n = *bp; 00928 00929 if(n != 0) 00930 { 00931 if(n < 0) 00932 { 00933 n = -n; 00934 x = 1/x; 00935 } 00936 for(u = n; ; ) 00937 { 00938 if(u & 01) 00939 pow *= x; 00940 if(u >>= 1) 00941 x *= x; 00942 else 00943 break; 00944 } 00945 } 00946 return(pow); 00947 }
double r_sign | ( | real * | a, | |
real * | b | |||
) |
Definition at line 37 of file lapackblas.cpp.
Referenced by sgesvd_(), sormbr_(), sormlq_(), and sormqr_().
00039 { 00040 ftnlen i, n, nc; 00041 const char *f__rp; 00042 00043 n = (int)*np; 00044 for(i = 0 ; i < n ; ++i) { 00045 nc = ll; 00046 if(rnp[i] < nc) nc = rnp[i]; 00047 ll -= nc; 00048 f__rp = rpp[i]; 00049 while(--nc >= 0) *lp++ = *f__rp++; 00050 } 00051 while(--ll >= 0) 00052 *lp++ = ' '; 00053 return 0; 00054 }
Definition at line 1071 of file lapackblas.cpp.
Referenced by dcsrch_(), ilaenv_(), lnsrlb_(), mainlb_(), prn3lb_(), and setulb_().
01073 { 01074 register unsigned char *a, *aend, *b, *bend; 01075 a = (unsigned char *)a0; 01076 b = (unsigned char *)b0; 01077 aend = a + la; 01078 bend = b + lb; 01079 01080 if(la <= lb) 01081 { 01082 while(a < aend) 01083 if(*a != *b) 01084 return( *a - *b ); 01085 else 01086 { ++a; ++b; } 01087 01088 while(b < bend) 01089 if(*b != ' ') 01090 return( ' ' - *b ); 01091 else ++b; 01092 } 01093 01094 else 01095 { 01096 while(b < bend) 01097 if(*a == *b) 01098 { ++a; ++b; } 01099 else 01100 return( *a - *b ); 01101 while(a < aend) 01102 if(*a != ' ') 01103 return(*a - ' '); 01104 else ++a; 01105 } 01106 return(0); 01107 }
Definition at line 1121 of file lapackblas.cpp.
Referenced by dcsrch_(), errclb_(), ilaenv_(), lnsrlb_(), mainlb_(), and prn2lb_().
01123 { 01124 register char *aend; 01125 const register char *bend; 01126 01127 aend = a + la; 01128 01129 if(la <= lb) 01130 #ifndef NO_OVERWRITE 01131 if (a <= b || a >= b + la) 01132 #endif 01133 while(a < aend) 01134 *a++ = *b++; 01135 #ifndef NO_OVERWRITE 01136 else 01137 for(b += la; a < aend; ) 01138 *--aend = *--b; 01139 #endif 01140 01141 else { 01142 bend = b + lb; 01143 #ifndef NO_OVERWRITE 01144 if (a <= b || a >= bend) 01145 #endif 01146 while(b < bend) 01147 *a++ = *b++; 01148 #ifndef NO_OVERWRITE 01149 else { 01150 a += lb; 01151 while(b < bend) 01152 *--a = *--bend; 01153 a += lb; 01154 } 01155 #endif 01156 while(a < aend) 01157 *a++ = ' '; 01158 } 01159 }
Definition at line 994 of file lapackblas.cpp.
References integer.
Referenced by EMAN::PCA::dopca_ooc(), EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_().
00996 { 00997 /* System generated locals */ 00998 integer i__1; 00999 /* Local variables */ 01000 static integer i__, m, ix, iy, mp1; 01001 /* constant times a vector plus a vector. 01002 uses unrolled loop for increments equal to one. 01003 jack dongarra, linpack, 3/11/78. 01004 modified 12/3/93, array(1) declarations changed to array(*) 01005 Parameter adjustments */ 01006 --sy; 01007 --sx; 01008 /* Function Body */ 01009 if (*n <= 0) { 01010 return 0; 01011 } 01012 if (*sa == 0.f) { 01013 return 0; 01014 } 01015 if (*incx == 1 && *incy == 1) { 01016 goto L20; 01017 } 01018 /* code for unequal increments or equal increments 01019 not equal to 1 */ 01020 ix = 1; 01021 iy = 1; 01022 if (*incx < 0) { 01023 ix = (-(*n) + 1) * *incx + 1; 01024 } 01025 if (*incy < 0) { 01026 iy = (-(*n) + 1) * *incy + 1; 01027 } 01028 i__1 = *n; 01029 for (i__ = 1; i__ <= i__1; ++i__) { 01030 sy[iy] += *sa * sx[ix]; 01031 ix += *incx; 01032 iy += *incy; 01033 /* L10: */ 01034 } 01035 return 0; 01036 /* code for both increments equal to 1 01037 clean-up loop */ 01038 L20: 01039 m = *n % 4; 01040 if (m == 0) { 01041 goto L40; 01042 } 01043 i__1 = m; 01044 for (i__ = 1; i__ <= i__1; ++i__) { 01045 sy[i__] += *sa * sx[i__]; 01046 /* L30: */ 01047 } 01048 if (*n < 4) { 01049 return 0; 01050 } 01051 L40: 01052 mp1 = m + 1; 01053 i__1 = *n; 01054 for (i__ = mp1; i__ <= i__1; i__ += 4) { 01055 sy[i__] += *sa * sx[i__]; 01056 sy[i__ + 1] += *sa * sx[i__ + 1]; 01057 sy[i__ + 2] += *sa * sx[i__ + 2]; 01058 sy[i__ + 3] += *sa * sx[i__ + 3]; 01059 /* L50: */ 01060 } 01061 return 0; 01062 } /* saxpy_ */
int sbdsqr_ | ( | const char * | uplo, | |
integer * | n, | |||
integer * | ncvt, | |||
integer * | nru, | |||
integer * | ncc, | |||
real * | d__, | |||
real * | e, | |||
real * | vt, | |||
integer * | ldvt, | |||
real * | u, | |||
integer * | ldu, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 22954 of file lapackblas.cpp.
References c___ref, dabs, df2cmax, df2cmin, f2cmax, integer, lsame_(), r_sign(), slamch_(), slartg_(), slas2_(), slasq1_(), slasr_(), slasv2_(), sqrt(), srot_(), sscal_(), sswap_(), u_ref, vt_ref, and xerbla_().
Referenced by sgesvd_().
22957 { 22958 /* System generated locals */ 22959 integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 22960 i__2; 22961 real r__1, r__2, r__3, r__4; 22962 doublereal d__1; 22963 22964 /* Builtin functions */ 22965 // double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *, real *); 22966 22967 /* Local variables */ 22968 static real abse; 22969 static integer idir; 22970 static real abss; 22971 static integer oldm; 22972 static real cosl; 22973 static integer isub, iter; 22974 static real unfl, sinl, cosr, smin, smax, sinr; 22975 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 22976 integer *, real *, real *), slas2_(real *, real *, real *, real *, 22977 real *); 22978 static real f, g, h__; 22979 static integer i__, j, m; 22980 static real r__; 22981 extern logical lsame_(const char *, const char *); 22982 static real oldcs; 22983 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 22984 static integer oldll; 22985 static real shift, sigmn, oldsn; 22986 static integer maxit; 22987 static real sminl; 22988 extern /* Subroutine */ int slasr_(const char *, const char *, const char *, integer *, 22989 integer *, real *, real *, real *, integer *); 22990 static real sigmx; 22991 static logical lower; 22992 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 22993 integer *), slasq1_(integer *, real *, real *, real *, integer *), 22994 slasv2_(real *, real *, real *, real *, real *, real *, real *, 22995 real *, real *); 22996 static real cs; 22997 static integer ll; 22998 static real sn, mu; 22999 extern doublereal slamch_(const char *); 23000 extern /* Subroutine */ int xerbla_(const char *, integer *); 23001 static real sminoa; 23002 extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * 23003 ); 23004 static real thresh; 23005 static logical rotate; 23006 static real sminlo; 23007 static integer nm1; 23008 static real tolmul; 23009 static integer nm12, nm13, lll; 23010 static real eps, sll, tol; 23011 23012 23013 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 23014 #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] 23015 #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] 23016 23017 23018 /* -- LAPACK routine (version 3.0) -- 23019 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 23020 Courant Institute, Argonne National Lab, and Rice University 23021 October 31, 1999 23022 23023 23024 Purpose 23025 ======= 23026 23027 SBDSQR computes the singular value decomposition (SVD) of a real 23028 N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' 23029 denotes the transpose of P), where S is a diagonal matrix with 23030 non-negative diagonal elements (the singular values of B), and Q 23031 and P are orthogonal matrices. 23032 23033 The routine computes S, and optionally computes U * Q, P' * VT, 23034 or Q' * C, for given real input matrices U, VT, and C. 23035 23036 See "Computing Small Singular Values of Bidiagonal Matrices With 23037 Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, 23038 LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, 23039 no. 5, pp. 873-912, Sept 1990) and 23040 "Accurate singular values and differential qd algorithms," by 23041 B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics 23042 Department, University of California at Berkeley, July 1992 23043 for a detailed description of the algorithm. 23044 23045 Arguments 23046 ========= 23047 23048 UPLO (input) CHARACTER*1 23049 = 'U': B is upper bidiagonal; 23050 = 'L': B is lower bidiagonal. 23051 23052 N (input) INTEGER 23053 The order of the matrix B. N >= 0. 23054 23055 NCVT (input) INTEGER 23056 The number of columns of the matrix VT. NCVT >= 0. 23057 23058 NRU (input) INTEGER 23059 The number of rows of the matrix U. NRU >= 0. 23060 23061 NCC (input) INTEGER 23062 The number of columns of the matrix C. NCC >= 0. 23063 23064 D (input/output) REAL array, dimension (N) 23065 On entry, the n diagonal elements of the bidiagonal matrix B. 23066 On exit, if INFO=0, the singular values of B in decreasing 23067 order. 23068 23069 E (input/output) REAL array, dimension (N) 23070 On entry, the elements of E contain the 23071 offdiagonal elements of the bidiagonal matrix whose SVD 23072 is desired. On normal exit (INFO = 0), E is destroyed. 23073 If the algorithm does not converge (INFO > 0), D and E 23074 will contain the diagonal and superdiagonal elements of a 23075 bidiagonal matrix orthogonally equivalent to the one given 23076 as input. E(N) is used for workspace. 23077 23078 VT (input/output) REAL array, dimension (LDVT, NCVT) 23079 On entry, an N-by-NCVT matrix VT. 23080 On exit, VT is overwritten by P' * VT. 23081 VT is not referenced if NCVT = 0. 23082 23083 LDVT (input) INTEGER 23084 The leading dimension of the array VT. 23085 LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. 23086 23087 U (input/output) REAL array, dimension (LDU, N) 23088 On entry, an NRU-by-N matrix U. 23089 On exit, U is overwritten by U * Q. 23090 U is not referenced if NRU = 0. 23091 23092 LDU (input) INTEGER 23093 The leading dimension of the array U. LDU >= max(1,NRU). 23094 23095 C (input/output) REAL array, dimension (LDC, NCC) 23096 On entry, an N-by-NCC matrix C. 23097 On exit, C is overwritten by Q' * C. 23098 C is not referenced if NCC = 0. 23099 23100 LDC (input) INTEGER 23101 The leading dimension of the array C. 23102 LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. 23103 23104 WORK (workspace) REAL array, dimension (4*N) 23105 23106 INFO (output) INTEGER 23107 = 0: successful exit 23108 < 0: If INFO = -i, the i-th argument had an illegal value 23109 > 0: the algorithm did not converge; D and E contain the 23110 elements of a bidiagonal matrix which is orthogonally 23111 similar to the input matrix B; if INFO = i, i 23112 elements of E have not converged to zero. 23113 23114 Internal Parameters 23115 =================== 23116 23117 TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) 23118 TOLMUL controls the convergence criterion of the QR loop. 23119 If it is positive, TOLMUL*EPS is the desired relative 23120 precision in the computed singular values. 23121 If it is negative, abs(TOLMUL*EPS*sigma_max) is the 23122 desired absolute accuracy in the computed singular 23123 values (corresponds to relative accuracy 23124 abs(TOLMUL*EPS) in the largest singular value. 23125 abs(TOLMUL) should be between 1 and 1/EPS, and preferably 23126 between 10 (for fast convergence) and .1/EPS 23127 (for there to be some accuracy in the results). 23128 Default is to lose at either one eighth or 2 of the 23129 available decimal digits in each computed singular value 23130 (whichever is smaller). 23131 23132 MAXITR INTEGER, default = 6 23133 MAXITR controls the maximum number of passes of the 23134 algorithm through its inner loop. The algorithms stops 23135 (and so fails to converge) if the number of passes 23136 through the inner loop exceeds MAXITR*N**2. 23137 23138 ===================================================================== 23139 23140 23141 Test the input parameters. 23142 23143 Parameter adjustments */ 23144 --d__; 23145 --e; 23146 vt_dim1 = *ldvt; 23147 vt_offset = 1 + vt_dim1 * 1; 23148 vt -= vt_offset; 23149 u_dim1 = *ldu; 23150 u_offset = 1 + u_dim1 * 1; 23151 u -= u_offset; 23152 c_dim1 = *ldc; 23153 c_offset = 1 + c_dim1 * 1; 23154 c__ -= c_offset; 23155 --work; 23156 23157 /* Function Body */ 23158 *info = 0; 23159 lower = lsame_(uplo, "L"); 23160 if (! lsame_(uplo, "U") && ! lower) { 23161 *info = -1; 23162 } else if (*n < 0) { 23163 *info = -2; 23164 } else if (*ncvt < 0) { 23165 *info = -3; 23166 } else if (*nru < 0) { 23167 *info = -4; 23168 } else if (*ncc < 0) { 23169 *info = -5; 23170 } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < f2cmax(1,*n)) { 23171 *info = -9; 23172 } else if (*ldu < f2cmax(1,*nru)) { 23173 *info = -11; 23174 } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < f2cmax(1,*n)) { 23175 *info = -13; 23176 } 23177 if (*info != 0) { 23178 i__1 = -(*info); 23179 xerbla_("SBDSQR", &i__1); 23180 return 0; 23181 } 23182 if (*n == 0) { 23183 return 0; 23184 } 23185 if (*n == 1) { 23186 goto L160; 23187 } 23188 23189 /* ROTATE is true if any singular vectors desired, false otherwise */ 23190 23191 rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; 23192 23193 /* If no singular vectors desired, use qd algorithm */ 23194 23195 if (! rotate) { 23196 slasq1_(n, &d__[1], &e[1], &work[1], info); 23197 return 0; 23198 } 23199 23200 nm1 = *n - 1; 23201 nm12 = nm1 + nm1; 23202 nm13 = nm12 + nm1; 23203 idir = 0; 23204 23205 /* Get machine constants */ 23206 23207 eps = slamch_("Epsilon"); 23208 unfl = slamch_("Safe minimum"); 23209 23210 /* If matrix lower bidiagonal, rotate to be upper bidiagonal 23211 by applying Givens rotations on the left */ 23212 23213 if (lower) { 23214 i__1 = *n - 1; 23215 for (i__ = 1; i__ <= i__1; ++i__) { 23216 slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); 23217 d__[i__] = r__; 23218 e[i__] = sn * d__[i__ + 1]; 23219 d__[i__ + 1] = cs * d__[i__ + 1]; 23220 work[i__] = cs; 23221 work[nm1 + i__] = sn; 23222 /* L10: */ 23223 } 23224 23225 /* Update singular vectors if desired */ 23226 23227 if (*nru > 0) { 23228 slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], 23229 ldu); 23230 } 23231 if (*ncc > 0) { 23232 slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], 23233 ldc); 23234 } 23235 } 23236 23237 /* Compute singular values to relative accuracy TOL 23238 (By setting TOL to be negative, algorithm will compute 23239 singular values to absolute accuracy ABS(TOL)*norm(input matrix)) 23240 23241 Computing MAX 23242 Computing MIN */ 23243 d__1 = (doublereal) eps; 23244 //chao changed pow_dd to pow 23245 r__3 = 100.f, r__4 = pow(d__1, c_b15); 23246 r__1 = 10.f, r__2 = df2cmin(r__3,r__4); 23247 tolmul = df2cmax(r__1,r__2); 23248 tol = tolmul * eps; 23249 23250 /* Compute approximate maximum, minimum singular values */ 23251 23252 smax = 0.f; 23253 i__1 = *n; 23254 for (i__ = 1; i__ <= i__1; ++i__) { 23255 /* Computing MAX */ 23256 r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1)); 23257 smax = df2cmax(r__2,r__3); 23258 /* L20: */ 23259 } 23260 i__1 = *n - 1; 23261 for (i__ = 1; i__ <= i__1; ++i__) { 23262 /* Computing MAX */ 23263 r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1)); 23264 smax = df2cmax(r__2,r__3); 23265 /* L30: */ 23266 } 23267 sminl = 0.f; 23268 if (tol >= 0.f) { 23269 23270 /* Relative accuracy desired */ 23271 23272 sminoa = dabs(d__[1]); 23273 if (sminoa == 0.f) { 23274 goto L50; 23275 } 23276 mu = sminoa; 23277 i__1 = *n; 23278 for (i__ = 2; i__ <= i__1; ++i__) { 23279 mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - 23280 1], dabs(r__1)))); 23281 sminoa = df2cmin(sminoa,mu); 23282 if (sminoa == 0.f) { 23283 goto L50; 23284 } 23285 /* L40: */ 23286 } 23287 L50: 23288 sminoa /= sqrt((real) (*n)); 23289 /* Computing MAX */ 23290 r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; 23291 thresh = df2cmax(r__1,r__2); 23292 } else { 23293 23294 /* Absolute accuracy desired 23295 23296 Computing MAX */ 23297 r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl; 23298 thresh = df2cmax(r__1,r__2); 23299 } 23300 23301 /* Prepare for main iteration loop for the singular values 23302 (MAXIT is the maximum number of passes through the inner 23303 loop permitted before nonconvergence signalled.) */ 23304 23305 maxit = *n * 6 * *n; 23306 iter = 0; 23307 oldll = -1; 23308 oldm = -1; 23309 23310 /* M points to last element of unconverged part of matrix */ 23311 23312 m = *n; 23313 23314 /* Begin main iteration loop */ 23315 23316 L60: 23317 23318 /* Check for convergence or exceeding iteration count */ 23319 23320 if (m <= 1) { 23321 goto L160; 23322 } 23323 if (iter > maxit) { 23324 goto L200; 23325 } 23326 23327 /* Find diagonal block of matrix to work on */ 23328 23329 if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) { 23330 d__[m] = 0.f; 23331 } 23332 smax = (r__1 = d__[m], dabs(r__1)); 23333 smin = smax; 23334 i__1 = m - 1; 23335 for (lll = 1; lll <= i__1; ++lll) { 23336 ll = m - lll; 23337 abss = (r__1 = d__[ll], dabs(r__1)); 23338 abse = (r__1 = e[ll], dabs(r__1)); 23339 if (tol < 0.f && abss <= thresh) { 23340 d__[ll] = 0.f; 23341 } 23342 if (abse <= thresh) { 23343 goto L80; 23344 } 23345 smin = df2cmin(smin,abss); 23346 /* Computing MAX */ 23347 r__1 = f2cmax(smax,abss); 23348 smax = df2cmax(r__1,abse); 23349 /* L70: */ 23350 } 23351 ll = 0; 23352 goto L90; 23353 L80: 23354 e[ll] = 0.f; 23355 23356 /* Matrix splits since E(LL) = 0 */ 23357 23358 if (ll == m - 1) { 23359 23360 /* Convergence of bottom singular value, return to top of loop */ 23361 23362 --m; 23363 goto L60; 23364 } 23365 L90: 23366 ++ll; 23367 23368 /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ 23369 23370 if (ll == m - 1) { 23371 23372 /* 2 by 2 block, handle separately */ 23373 23374 slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, 23375 &sinl, &cosl); 23376 d__[m - 1] = sigmx; 23377 e[m - 1] = 0.f; 23378 d__[m] = sigmn; 23379 23380 /* Compute singular vectors, if desired */ 23381 23382 if (*ncvt > 0) { 23383 srot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, & 23384 sinr); 23385 } 23386 if (*nru > 0) { 23387 srot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, & 23388 sinl); 23389 } 23390 if (*ncc > 0) { 23391 srot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, & 23392 sinl); 23393 } 23394 m += -2; 23395 goto L60; 23396 } 23397 23398 /* If working on new submatrix, choose shift direction 23399 (from larger end diagonal element towards smaller) */ 23400 23401 if (ll > oldm || m < oldll) { 23402 if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) { 23403 23404 /* Chase bulge from top (big end) to bottom (small end) */ 23405 23406 idir = 1; 23407 } else { 23408 23409 /* Chase bulge from bottom (big end) to top (small end) */ 23410 23411 idir = 2; 23412 } 23413 } 23414 23415 /* Apply convergence tests */ 23416 23417 if (idir == 1) { 23418 23419 /* Run convergence test in forward direction 23420 First apply standard test to bottom of matrix */ 23421 23422 if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs( 23423 r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= 23424 thresh) { 23425 e[m - 1] = 0.f; 23426 goto L60; 23427 } 23428 23429 if (tol >= 0.f) { 23430 23431 /* If relative accuracy desired, 23432 apply convergence criterion forward */ 23433 23434 mu = (r__1 = d__[ll], dabs(r__1)); 23435 sminl = mu; 23436 i__1 = m - 1; 23437 for (lll = ll; lll <= i__1; ++lll) { 23438 if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { 23439 e[lll] = 0.f; 23440 goto L60; 23441 } 23442 sminlo = sminl; 23443 mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = 23444 e[lll], dabs(r__1)))); 23445 sminl = df2cmin(sminl,mu); 23446 /* L100: */ 23447 } 23448 } 23449 23450 } else { 23451 23452 /* Run convergence test in backward direction 23453 First apply standard test to top of matrix */ 23454 23455 if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs( 23456 r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) { 23457 e[ll] = 0.f; 23458 goto L60; 23459 } 23460 23461 if (tol >= 0.f) { 23462 23463 /* If relative accuracy desired, 23464 apply convergence criterion backward */ 23465 23466 mu = (r__1 = d__[m], dabs(r__1)); 23467 sminl = mu; 23468 i__1 = ll; 23469 for (lll = m - 1; lll >= i__1; --lll) { 23470 if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { 23471 e[lll] = 0.f; 23472 goto L60; 23473 } 23474 sminlo = sminl; 23475 mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[ 23476 lll], dabs(r__1)))); 23477 sminl = df2cmin(sminl,mu); 23478 /* L110: */ 23479 } 23480 } 23481 } 23482 oldll = ll; 23483 oldm = m; 23484 23485 /* Compute shift. First, test if shifting would ruin relative 23486 accuracy, and if so set the shift to zero. 23487 23488 Computing MAX */ 23489 r__1 = eps, r__2 = tol * .01f; 23490 if (tol >= 0.f && *n * tol * (sminl / smax) <= df2cmax(r__1,r__2)) { 23491 23492 /* Use a zero shift to avoid loss of relative accuracy */ 23493 23494 shift = 0.f; 23495 } else { 23496 23497 /* Compute the shift from 2-by-2 block at end of matrix */ 23498 23499 if (idir == 1) { 23500 sll = (r__1 = d__[ll], dabs(r__1)); 23501 slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); 23502 } else { 23503 sll = (r__1 = d__[m], dabs(r__1)); 23504 slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); 23505 } 23506 23507 /* Test if shift negligible, and if so set to zero */ 23508 23509 if (sll > 0.f) { 23510 /* Computing 2nd power */ 23511 r__1 = shift / sll; 23512 if (r__1 * r__1 < eps) { 23513 shift = 0.f; 23514 } 23515 } 23516 } 23517 23518 /* Increment iteration count */ 23519 23520 iter = iter + m - ll; 23521 23522 /* If SHIFT = 0, do simplified QR iteration */ 23523 23524 if (shift == 0.f) { 23525 if (idir == 1) { 23526 23527 /* Chase bulge from top to bottom 23528 Save cosines and sines for later singular vector updates */ 23529 23530 cs = 1.f; 23531 oldcs = 1.f; 23532 i__1 = m - 1; 23533 for (i__ = ll; i__ <= i__1; ++i__) { 23534 r__1 = d__[i__] * cs; 23535 slartg_(&r__1, &e[i__], &cs, &sn, &r__); 23536 if (i__ > ll) { 23537 e[i__ - 1] = oldsn * r__; 23538 } 23539 r__1 = oldcs * r__; 23540 r__2 = d__[i__ + 1] * sn; 23541 slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); 23542 work[i__ - ll + 1] = cs; 23543 work[i__ - ll + 1 + nm1] = sn; 23544 work[i__ - ll + 1 + nm12] = oldcs; 23545 work[i__ - ll + 1 + nm13] = oldsn; 23546 /* L120: */ 23547 } 23548 h__ = d__[m] * cs; 23549 d__[m] = h__ * oldcs; 23550 e[m - 1] = h__ * oldsn; 23551 23552 /* Update singular vectors */ 23553 23554 if (*ncvt > 0) { 23555 i__1 = m - ll + 1; 23556 slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], & 23557 vt_ref(ll, 1), ldvt); 23558 } 23559 if (*nru > 0) { 23560 i__1 = m - ll + 1; 23561 slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 23562 + 1], &u_ref(1, ll), ldu); 23563 } 23564 if (*ncc > 0) { 23565 i__1 = m - ll + 1; 23566 slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 23567 + 1], &c___ref(ll, 1), ldc); 23568 } 23569 23570 /* Test convergence */ 23571 23572 if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { 23573 e[m - 1] = 0.f; 23574 } 23575 23576 } else { 23577 23578 /* Chase bulge from bottom to top 23579 Save cosines and sines for later singular vector updates */ 23580 23581 cs = 1.f; 23582 oldcs = 1.f; 23583 i__1 = ll + 1; 23584 for (i__ = m; i__ >= i__1; --i__) { 23585 r__1 = d__[i__] * cs; 23586 slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); 23587 if (i__ < m) { 23588 e[i__] = oldsn * r__; 23589 } 23590 r__1 = oldcs * r__; 23591 r__2 = d__[i__ - 1] * sn; 23592 slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); 23593 work[i__ - ll] = cs; 23594 work[i__ - ll + nm1] = -sn; 23595 work[i__ - ll + nm12] = oldcs; 23596 work[i__ - ll + nm13] = -oldsn; 23597 /* L130: */ 23598 } 23599 h__ = d__[ll] * cs; 23600 d__[ll] = h__ * oldcs; 23601 e[ll] = h__ * oldsn; 23602 23603 /* Update singular vectors */ 23604 23605 if (*ncvt > 0) { 23606 i__1 = m - ll + 1; 23607 slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ 23608 nm13 + 1], &vt_ref(ll, 1), ldvt); 23609 } 23610 if (*nru > 0) { 23611 i__1 = m - ll + 1; 23612 slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref( 23613 1, ll), ldu); 23614 } 23615 if (*ncc > 0) { 23616 i__1 = m - ll + 1; 23617 slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], & 23618 c___ref(ll, 1), ldc); 23619 } 23620 23621 /* Test convergence */ 23622 23623 if ((r__1 = e[ll], dabs(r__1)) <= thresh) { 23624 e[ll] = 0.f; 23625 } 23626 } 23627 } else { 23628 23629 /* Use nonzero shift */ 23630 23631 if (idir == 1) { 23632 23633 /* Chase bulge from top to bottom 23634 Save cosines and sines for later singular vector updates */ 23635 23636 f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[ 23637 ll]) + shift / d__[ll]); 23638 g = e[ll]; 23639 i__1 = m - 1; 23640 for (i__ = ll; i__ <= i__1; ++i__) { 23641 slartg_(&f, &g, &cosr, &sinr, &r__); 23642 if (i__ > ll) { 23643 e[i__ - 1] = r__; 23644 } 23645 f = cosr * d__[i__] + sinr * e[i__]; 23646 e[i__] = cosr * e[i__] - sinr * d__[i__]; 23647 g = sinr * d__[i__ + 1]; 23648 d__[i__ + 1] = cosr * d__[i__ + 1]; 23649 slartg_(&f, &g, &cosl, &sinl, &r__); 23650 d__[i__] = r__; 23651 f = cosl * e[i__] + sinl * d__[i__ + 1]; 23652 d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; 23653 if (i__ < m - 1) { 23654 g = sinl * e[i__ + 1]; 23655 e[i__ + 1] = cosl * e[i__ + 1]; 23656 } 23657 work[i__ - ll + 1] = cosr; 23658 work[i__ - ll + 1 + nm1] = sinr; 23659 work[i__ - ll + 1 + nm12] = cosl; 23660 work[i__ - ll + 1 + nm13] = sinl; 23661 /* L140: */ 23662 } 23663 e[m - 1] = f; 23664 23665 /* Update singular vectors */ 23666 23667 if (*ncvt > 0) { 23668 i__1 = m - ll + 1; 23669 slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], & 23670 vt_ref(ll, 1), ldvt); 23671 } 23672 if (*nru > 0) { 23673 i__1 = m - ll + 1; 23674 slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 23675 + 1], &u_ref(1, ll), ldu); 23676 } 23677 if (*ncc > 0) { 23678 i__1 = m - ll + 1; 23679 slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 23680 + 1], &c___ref(ll, 1), ldc); 23681 } 23682 23683 /* Test convergence */ 23684 23685 if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { 23686 e[m - 1] = 0.f; 23687 } 23688 23689 } else { 23690 23691 /* Chase bulge from bottom to top 23692 Save cosines and sines for later singular vector updates */ 23693 23694 f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[ 23695 m]) + shift / d__[m]); 23696 g = e[m - 1]; 23697 i__1 = ll + 1; 23698 for (i__ = m; i__ >= i__1; --i__) { 23699 slartg_(&f, &g, &cosr, &sinr, &r__); 23700 if (i__ < m) { 23701 e[i__] = r__; 23702 } 23703 f = cosr * d__[i__] + sinr * e[i__ - 1]; 23704 e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; 23705 g = sinr * d__[i__ - 1]; 23706 d__[i__ - 1] = cosr * d__[i__ - 1]; 23707 slartg_(&f, &g, &cosl, &sinl, &r__); 23708 d__[i__] = r__; 23709 f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; 23710 d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; 23711 if (i__ > ll + 1) { 23712 g = sinl * e[i__ - 2]; 23713 e[i__ - 2] = cosl * e[i__ - 2]; 23714 } 23715 work[i__ - ll] = cosr; 23716 work[i__ - ll + nm1] = -sinr; 23717 work[i__ - ll + nm12] = cosl; 23718 work[i__ - ll + nm13] = -sinl; 23719 /* L150: */ 23720 } 23721 e[ll] = f; 23722 23723 /* Test convergence */ 23724 23725 if ((r__1 = e[ll], dabs(r__1)) <= thresh) { 23726 e[ll] = 0.f; 23727 } 23728 23729 /* Update singular vectors if desired */ 23730 23731 if (*ncvt > 0) { 23732 i__1 = m - ll + 1; 23733 slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ 23734 nm13 + 1], &vt_ref(ll, 1), ldvt); 23735 } 23736 if (*nru > 0) { 23737 i__1 = m - ll + 1; 23738 slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref( 23739 1, ll), ldu); 23740 } 23741 if (*ncc > 0) { 23742 i__1 = m - ll + 1; 23743 slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], & 23744 c___ref(ll, 1), ldc); 23745 } 23746 } 23747 } 23748 23749 /* QR iteration finished, go back and check convergence */ 23750 23751 goto L60; 23752 23753 /* All singular values converged, so make them positive */ 23754 23755 L160: 23756 i__1 = *n; 23757 for (i__ = 1; i__ <= i__1; ++i__) { 23758 if (d__[i__] < 0.f) { 23759 d__[i__] = -d__[i__]; 23760 23761 /* Change sign of singular vectors, if desired */ 23762 23763 if (*ncvt > 0) { 23764 sscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt); 23765 } 23766 } 23767 /* L170: */ 23768 } 23769 23770 /* Sort the singular values into decreasing order (insertion sort on 23771 singular values, but only one transposition per singular vector) */ 23772 23773 i__1 = *n - 1; 23774 for (i__ = 1; i__ <= i__1; ++i__) { 23775 23776 /* Scan for smallest D(I) */ 23777 23778 isub = 1; 23779 smin = d__[1]; 23780 i__2 = *n + 1 - i__; 23781 for (j = 2; j <= i__2; ++j) { 23782 if (d__[j] <= smin) { 23783 isub = j; 23784 smin = d__[j]; 23785 } 23786 /* L180: */ 23787 } 23788 if (isub != *n + 1 - i__) { 23789 23790 /* Swap singular values and vectors */ 23791 23792 d__[isub] = d__[*n + 1 - i__]; 23793 d__[*n + 1 - i__] = smin; 23794 if (*ncvt > 0) { 23795 sswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1), 23796 ldvt); 23797 } 23798 if (*nru > 0) { 23799 sswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), & 23800 c__1); 23801 } 23802 if (*ncc > 0) { 23803 sswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1), 23804 ldc); 23805 } 23806 } 23807 /* L190: */ 23808 } 23809 goto L220; 23810 23811 /* Maximum number of iterations exceeded, failure to converge */ 23812 23813 L200: 23814 *info = 0; 23815 i__1 = *n - 1; 23816 for (i__ = 1; i__ <= i__1; ++i__) { 23817 if (e[i__] != 0.f) { 23818 ++(*info); 23819 } 23820 /* L210: */ 23821 } 23822 L220: 23823 return 0; 23824 23825 /* End of SBDSQR */ 23826 23827 } /* sbdsqr_ */
Definition at line 1163 of file lapackblas.cpp.
References integer.
Referenced by EMAN::PCA::Lanczos_ooc(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed8_(), slaed9_(), slaeda_(), slarfb_(), and slasq1_().
01165 { 01166 /* System generated locals */ 01167 integer i__1; 01168 /* Local variables */ 01169 static integer i__, m, ix, iy, mp1; 01170 /* copies a vector, x, to a vector, y. 01171 uses unrolled loops for increments equal to 1. 01172 jack dongarra, linpack, 3/11/78. 01173 modified 12/3/93, array(1) declarations changed to array(*) 01174 Parameter adjustments */ 01175 --sy; 01176 --sx; 01177 /* Function Body */ 01178 if (*n <= 0) { 01179 return 0; 01180 } 01181 if (*incx == 1 && *incy == 1) { 01182 goto L20; 01183 } 01184 /* code for unequal increments or equal increments 01185 not equal to 1 */ 01186 ix = 1; 01187 iy = 1; 01188 if (*incx < 0) { 01189 ix = (-(*n) + 1) * *incx + 1; 01190 } 01191 if (*incy < 0) { 01192 iy = (-(*n) + 1) * *incy + 1; 01193 } 01194 i__1 = *n; 01195 for (i__ = 1; i__ <= i__1; ++i__) { 01196 sy[iy] = sx[ix]; 01197 ix += *incx; 01198 iy += *incy; 01199 /* L10: */ 01200 } 01201 return 0; 01202 /* code for both increments equal to 1 01203 clean-up loop */ 01204 L20: 01205 m = *n % 7; 01206 if (m == 0) { 01207 goto L40; 01208 } 01209 i__1 = m; 01210 for (i__ = 1; i__ <= i__1; ++i__) { 01211 sy[i__] = sx[i__]; 01212 /* L30: */ 01213 } 01214 if (*n < 7) { 01215 return 0; 01216 } 01217 L40: 01218 mp1 = m + 1; 01219 i__1 = *n; 01220 for (i__ = mp1; i__ <= i__1; i__ += 7) { 01221 sy[i__] = sx[i__]; 01222 sy[i__ + 1] = sx[i__ + 1]; 01223 sy[i__ + 2] = sx[i__ + 2]; 01224 sy[i__ + 3] = sx[i__ + 3]; 01225 sy[i__ + 4] = sx[i__ + 4]; 01226 sy[i__ + 5] = sx[i__ + 5]; 01227 sy[i__ + 6] = sx[i__ + 6]; 01228 /* L50: */ 01229 } 01230 return 0; 01231 } /* scopy_ */
doublereal sdot_ | ( | integer * | n, | |
real * | sx, | |||
integer * | incx, | |||
real * | sy, | |||
integer * | incy | |||
) |
Definition at line 1236 of file lapackblas.cpp.
References integer.
Referenced by EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_().
01237 { 01238 /* System generated locals */ 01239 integer i__1; 01240 real ret_val; 01241 /* Local variables */ 01242 static integer i__, m; 01243 static real stemp; 01244 static integer ix, iy, mp1; 01245 /* forms the dot product of two vectors. 01246 uses unrolled loops for increments equal to one. 01247 jack dongarra, linpack, 3/11/78. 01248 modified 12/3/93, array(1) declarations changed to array(*) 01249 Parameter adjustments */ 01250 --sy; 01251 --sx; 01252 /* Function Body */ 01253 stemp = 0.f; 01254 ret_val = 0.f; 01255 if (*n <= 0) { 01256 return ret_val; 01257 } 01258 if (*incx == 1 && *incy == 1) { 01259 goto L20; 01260 } 01261 /* code for unequal increments or equal increments 01262 not equal to 1 */ 01263 ix = 1; 01264 iy = 1; 01265 if (*incx < 0) { 01266 ix = (-(*n) + 1) * *incx + 1; 01267 } 01268 if (*incy < 0) { 01269 iy = (-(*n) + 1) * *incy + 1; 01270 } 01271 i__1 = *n; 01272 for (i__ = 1; i__ <= i__1; ++i__) { 01273 stemp += sx[ix] * sy[iy]; 01274 ix += *incx; 01275 iy += *incy; 01276 /* L10: */ 01277 } 01278 ret_val = stemp; 01279 return ret_val; 01280 /* code for both increments equal to 1 01281 clean-up loop */ 01282 L20: 01283 m = *n % 5; 01284 if (m == 0) { 01285 goto L40; 01286 } 01287 i__1 = m; 01288 for (i__ = 1; i__ <= i__1; ++i__) { 01289 stemp += sx[i__] * sy[i__]; 01290 /* L30: */ 01291 } 01292 if (*n < 5) { 01293 goto L60; 01294 } 01295 L40: 01296 mp1 = m + 1; 01297 i__1 = *n; 01298 for (i__ = mp1; i__ <= i__1; i__ += 5) { 01299 stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ 01300 i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + 01301 4] * sy[i__ + 4]; 01302 /* L50: */ 01303 } 01304 L60: 01305 ret_val = stemp; 01306 return ret_val; 01307 } /* sdot_ */
int sgebd2_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | d__, | |||
real * | e, | |||
real * | tauq, | |||
real * | taup, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 21343 of file lapackblas.cpp.
References a_ref, f2cmax, f2cmin, integer, slarf_(), slarfg_(), and xerbla_().
Referenced by sgebrd_().
21345 { 21346 /* -- LAPACK routine (version 3.0) -- 21347 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 21348 Courant Institute, Argonne National Lab, and Rice University 21349 February 29, 1992 21350 21351 21352 Purpose 21353 ======= 21354 21355 SGEBD2 reduces a real general m by n matrix A to upper or lower 21356 bidiagonal form B by an orthogonal transformation: Q' * A * P = B. 21357 21358 If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. 21359 21360 Arguments 21361 ========= 21362 21363 M (input) INTEGER 21364 The number of rows in the matrix A. M >= 0. 21365 21366 N (input) INTEGER 21367 The number of columns in the matrix A. N >= 0. 21368 21369 A (input/output) REAL array, dimension (LDA,N) 21370 On entry, the m by n general matrix to be reduced. 21371 On exit, 21372 if m >= n, the diagonal and the first superdiagonal are 21373 overwritten with the upper bidiagonal matrix B; the 21374 elements below the diagonal, with the array TAUQ, represent 21375 the orthogonal matrix Q as a product of elementary 21376 reflectors, and the elements above the first superdiagonal, 21377 with the array TAUP, represent the orthogonal matrix P as 21378 a product of elementary reflectors; 21379 if m < n, the diagonal and the first subdiagonal are 21380 overwritten with the lower bidiagonal matrix B; the 21381 elements below the first subdiagonal, with the array TAUQ, 21382 represent the orthogonal matrix Q as a product of 21383 elementary reflectors, and the elements above the diagonal, 21384 with the array TAUP, represent the orthogonal matrix P as 21385 a product of elementary reflectors. 21386 See Further Details. 21387 21388 LDA (input) INTEGER 21389 The leading dimension of the array A. LDA >= max(1,M). 21390 21391 D (output) REAL array, dimension (min(M,N)) 21392 The diagonal elements of the bidiagonal matrix B: 21393 D(i) = A(i,i). 21394 21395 E (output) REAL array, dimension (min(M,N)-1) 21396 The off-diagonal elements of the bidiagonal matrix B: 21397 if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; 21398 if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. 21399 21400 TAUQ (output) REAL array dimension (min(M,N)) 21401 The scalar factors of the elementary reflectors which 21402 represent the orthogonal matrix Q. See Further Details. 21403 21404 TAUP (output) REAL array, dimension (min(M,N)) 21405 The scalar factors of the elementary reflectors which 21406 represent the orthogonal matrix P. See Further Details. 21407 21408 WORK (workspace) REAL array, dimension (max(M,N)) 21409 21410 INFO (output) INTEGER 21411 = 0: successful exit. 21412 < 0: if INFO = -i, the i-th argument had an illegal value. 21413 21414 Further Details 21415 =============== 21416 21417 The matrices Q and P are represented as products of elementary 21418 reflectors: 21419 21420 If m >= n, 21421 21422 Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) 21423 21424 Each H(i) and G(i) has the form: 21425 21426 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' 21427 21428 where tauq and taup are real scalars, and v and u are real vectors; 21429 v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); 21430 u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); 21431 tauq is stored in TAUQ(i) and taup in TAUP(i). 21432 21433 If m < n, 21434 21435 Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) 21436 21437 Each H(i) and G(i) has the form: 21438 21439 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' 21440 21441 where tauq and taup are real scalars, and v and u are real vectors; 21442 v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); 21443 u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); 21444 tauq is stored in TAUQ(i) and taup in TAUP(i). 21445 21446 The contents of A on exit are illustrated by the following examples: 21447 21448 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): 21449 21450 ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) 21451 ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) 21452 ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) 21453 ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) 21454 ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) 21455 ( v1 v2 v3 v4 v5 ) 21456 21457 where d and e denote diagonal and off-diagonal elements of B, vi 21458 denotes an element of the vector defining H(i), and ui an element of 21459 the vector defining G(i). 21460 21461 ===================================================================== 21462 21463 21464 Test the input parameters 21465 21466 Parameter adjustments */ 21467 /* Table of constant values */ 21468 static integer c__1 = 1; 21469 21470 /* System generated locals */ 21471 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 21472 /* Local variables */ 21473 static integer i__; 21474 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 21475 integer *, real *, real *, integer *, real *), xerbla_( 21476 const char *, integer *), slarfg_(integer *, real *, real *, 21477 integer *, real *); 21478 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 21479 21480 21481 a_dim1 = *lda; 21482 a_offset = 1 + a_dim1 * 1; 21483 a -= a_offset; 21484 --d__; 21485 --e; 21486 --tauq; 21487 --taup; 21488 --work; 21489 21490 /* Function Body */ 21491 *info = 0; 21492 if (*m < 0) { 21493 *info = -1; 21494 } else if (*n < 0) { 21495 *info = -2; 21496 } else if (*lda < f2cmax(1,*m)) { 21497 *info = -4; 21498 } 21499 if (*info < 0) { 21500 i__1 = -(*info); 21501 xerbla_("SGEBD2", &i__1); 21502 return 0; 21503 } 21504 21505 if (*m >= *n) { 21506 21507 /* Reduce to upper bidiagonal form */ 21508 21509 i__1 = *n; 21510 for (i__ = 1; i__ <= i__1; ++i__) { 21511 21512 /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 21513 21514 Computing MIN */ 21515 i__2 = i__ + 1; 21516 i__3 = *m - i__ + 1; 21517 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1, 21518 &tauq[i__]); 21519 d__[i__] = a_ref(i__, i__); 21520 a_ref(i__, i__) = 1.f; 21521 21522 /* Apply H(i) to A(i:m,i+1:n) from the left */ 21523 21524 i__2 = *m - i__ + 1; 21525 i__3 = *n - i__; 21526 slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tauq[i__], 21527 &a_ref(i__, i__ + 1), lda, &work[1]); 21528 a_ref(i__, i__) = d__[i__]; 21529 21530 if (i__ < *n) { 21531 21532 /* Generate elementary reflector G(i) to annihilate 21533 A(i,i+2:n) 21534 21535 Computing MIN */ 21536 i__2 = i__ + 2; 21537 i__3 = *n - i__; 21538 slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n)) 21539 , lda, &taup[i__]); 21540 e[i__] = a_ref(i__, i__ + 1); 21541 a_ref(i__, i__ + 1) = 1.f; 21542 21543 /* Apply G(i) to A(i+1:m,i+1:n) from the right */ 21544 21545 i__2 = *m - i__; 21546 i__3 = *n - i__; 21547 slarf_("Right", &i__2, &i__3, &a_ref(i__, i__ + 1), lda, & 21548 taup[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]); 21549 a_ref(i__, i__ + 1) = e[i__]; 21550 } else { 21551 taup[i__] = 0.f; 21552 } 21553 /* L10: */ 21554 } 21555 } else { 21556 21557 /* Reduce to lower bidiagonal form */ 21558 21559 i__1 = *m; 21560 for (i__ = 1; i__ <= i__1; ++i__) { 21561 21562 /* Generate elementary reflector G(i) to annihilate A(i,i+1:n) 21563 21564 Computing MIN */ 21565 i__2 = i__ + 1; 21566 i__3 = *n - i__ + 1; 21567 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, & 21568 taup[i__]); 21569 d__[i__] = a_ref(i__, i__); 21570 a_ref(i__, i__) = 1.f; 21571 21572 /* Apply G(i) to A(i+1:m,i:n) from the right 21573 21574 Computing MIN */ 21575 i__2 = i__ + 1; 21576 i__3 = *m - i__; 21577 i__4 = *n - i__ + 1; 21578 slarf_("Right", &i__3, &i__4, &a_ref(i__, i__), lda, &taup[i__], & 21579 a_ref(f2cmin(i__2,*m), i__), lda, &work[1]); 21580 a_ref(i__, i__) = d__[i__]; 21581 21582 if (i__ < *m) { 21583 21584 /* Generate elementary reflector H(i) to annihilate 21585 A(i+2:m,i) 21586 21587 Computing MIN */ 21588 i__2 = i__ + 2; 21589 i__3 = *m - i__; 21590 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__) 21591 , &c__1, &tauq[i__]); 21592 e[i__] = a_ref(i__ + 1, i__); 21593 a_ref(i__ + 1, i__) = 1.f; 21594 21595 /* Apply H(i) to A(i+1:m,i+1:n) from the left */ 21596 21597 i__2 = *m - i__; 21598 i__3 = *n - i__; 21599 slarf_("Left", &i__2, &i__3, &a_ref(i__ + 1, i__), &c__1, & 21600 tauq[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]); 21601 a_ref(i__ + 1, i__) = e[i__]; 21602 } else { 21603 tauq[i__] = 0.f; 21604 } 21605 /* L20: */ 21606 } 21607 } 21608 return 0; 21609 21610 /* End of SGEBD2 */ 21611 21612 } /* sgebd2_ */
int sgebrd_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | d__, | |||
real * | e, | |||
real * | tauq, | |||
real * | taup, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 21027 of file lapackblas.cpp.
References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), integer, nx, sgebd2_(), sgemm_(), slabrd_(), and xerbla_().
Referenced by sgesvd_().
21030 { 21031 /* -- LAPACK routine (version 3.0) -- 21032 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 21033 Courant Institute, Argonne National Lab, and Rice University 21034 June 30, 1999 21035 21036 21037 Purpose 21038 ======= 21039 21040 SGEBRD reduces a general real M-by-N matrix A to upper or lower 21041 bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. 21042 21043 If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. 21044 21045 Arguments 21046 ========= 21047 21048 M (input) INTEGER 21049 The number of rows in the matrix A. M >= 0. 21050 21051 N (input) INTEGER 21052 The number of columns in the matrix A. N >= 0. 21053 21054 A (input/output) REAL array, dimension (LDA,N) 21055 On entry, the M-by-N general matrix to be reduced. 21056 On exit, 21057 if m >= n, the diagonal and the first superdiagonal are 21058 overwritten with the upper bidiagonal matrix B; the 21059 elements below the diagonal, with the array TAUQ, represent 21060 the orthogonal matrix Q as a product of elementary 21061 reflectors, and the elements above the first superdiagonal, 21062 with the array TAUP, represent the orthogonal matrix P as 21063 a product of elementary reflectors; 21064 if m < n, the diagonal and the first subdiagonal are 21065 overwritten with the lower bidiagonal matrix B; the 21066 elements below the first subdiagonal, with the array TAUQ, 21067 represent the orthogonal matrix Q as a product of 21068 elementary reflectors, and the elements above the diagonal, 21069 with the array TAUP, represent the orthogonal matrix P as 21070 a product of elementary reflectors. 21071 See Further Details. 21072 21073 LDA (input) INTEGER 21074 The leading dimension of the array A. LDA >= max(1,M). 21075 21076 D (output) REAL array, dimension (min(M,N)) 21077 The diagonal elements of the bidiagonal matrix B: 21078 D(i) = A(i,i). 21079 21080 E (output) REAL array, dimension (min(M,N)-1) 21081 The off-diagonal elements of the bidiagonal matrix B: 21082 if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; 21083 if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. 21084 21085 TAUQ (output) REAL array dimension (min(M,N)) 21086 The scalar factors of the elementary reflectors which 21087 represent the orthogonal matrix Q. See Further Details. 21088 21089 TAUP (output) REAL array, dimension (min(M,N)) 21090 The scalar factors of the elementary reflectors which 21091 represent the orthogonal matrix P. See Further Details. 21092 21093 WORK (workspace/output) REAL array, dimension (LWORK) 21094 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 21095 21096 LWORK (input) INTEGER 21097 The length of the array WORK. LWORK >= max(1,M,N). 21098 For optimum performance LWORK >= (M+N)*NB, where NB 21099 is the optimal blocksize. 21100 21101 If LWORK = -1, then a workspace query is assumed; the routine 21102 only calculates the optimal size of the WORK array, returns 21103 this value as the first entry of the WORK array, and no error 21104 message related to LWORK is issued by XERBLA. 21105 21106 INFO (output) INTEGER 21107 = 0: successful exit 21108 < 0: if INFO = -i, the i-th argument had an illegal value. 21109 21110 Further Details 21111 =============== 21112 21113 The matrices Q and P are represented as products of elementary 21114 reflectors: 21115 21116 If m >= n, 21117 21118 Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) 21119 21120 Each H(i) and G(i) has the form: 21121 21122 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' 21123 21124 where tauq and taup are real scalars, and v and u are real vectors; 21125 v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); 21126 u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); 21127 tauq is stored in TAUQ(i) and taup in TAUP(i). 21128 21129 If m < n, 21130 21131 Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) 21132 21133 Each H(i) and G(i) has the form: 21134 21135 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' 21136 21137 where tauq and taup are real scalars, and v and u are real vectors; 21138 v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); 21139 u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); 21140 tauq is stored in TAUQ(i) and taup in TAUP(i). 21141 21142 The contents of A on exit are illustrated by the following examples: 21143 21144 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): 21145 21146 ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) 21147 ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) 21148 ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) 21149 ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) 21150 ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) 21151 ( v1 v2 v3 v4 v5 ) 21152 21153 where d and e denote diagonal and off-diagonal elements of B, vi 21154 denotes an element of the vector defining H(i), and ui an element of 21155 the vector defining G(i). 21156 21157 ===================================================================== 21158 21159 21160 Test the input parameters 21161 21162 Parameter adjustments */ 21163 /* Table of constant values */ 21164 static integer c__1 = 1; 21165 static integer c_n1 = -1; 21166 static integer c__3 = 3; 21167 static integer c__2 = 2; 21168 static real c_b21 = -1.f; 21169 static real c_b22 = 1.f; 21170 21171 /* System generated locals */ 21172 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 21173 /* Local variables */ 21174 static integer i__, j, nbmin, iinfo; 21175 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 21176 integer *, real *, real *, integer *, real *, integer *, real *, 21177 real *, integer *); 21178 static integer minmn; 21179 extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer 21180 *, real *, real *, real *, real *, real *, integer *); 21181 static integer nb, nx; 21182 extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real 21183 *, integer *, real *, real *, real *, real *, real *, integer *, 21184 real *, integer *); 21185 static real ws; 21186 extern /* Subroutine */ int xerbla_(const char *, integer *); 21187 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 21188 integer *, integer *, ftnlen, ftnlen); 21189 static integer ldwrkx, ldwrky, lwkopt; 21190 static logical lquery; 21191 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 21192 21193 21194 a_dim1 = *lda; 21195 a_offset = 1 + a_dim1 * 1; 21196 a -= a_offset; 21197 --d__; 21198 --e; 21199 --tauq; 21200 --taup; 21201 --work; 21202 21203 /* Function Body */ 21204 *info = 0; 21205 /* Computing MAX */ 21206 i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( 21207 ftnlen)6, (ftnlen)1); 21208 nb = f2cmax(i__1,i__2); 21209 lwkopt = (*m + *n) * nb; 21210 work[1] = (real) lwkopt; 21211 lquery = *lwork == -1; 21212 if (*m < 0) { 21213 *info = -1; 21214 } else if (*n < 0) { 21215 *info = -2; 21216 } else if (*lda < f2cmax(1,*m)) { 21217 *info = -4; 21218 } else /* if(complicated condition) */ { 21219 /* Computing MAX */ 21220 i__1 = f2cmax(1,*m); 21221 if (*lwork < f2cmax(i__1,*n) && ! lquery) { 21222 *info = -10; 21223 } 21224 } 21225 if (*info < 0) { 21226 i__1 = -(*info); 21227 xerbla_("SGEBRD", &i__1); 21228 return 0; 21229 } else if (lquery) { 21230 return 0; 21231 } 21232 21233 /* Quick return if possible */ 21234 21235 minmn = f2cmin(*m,*n); 21236 if (minmn == 0) { 21237 work[1] = 1.f; 21238 return 0; 21239 } 21240 21241 ws = (real) f2cmax(*m,*n); 21242 ldwrkx = *m; 21243 ldwrky = *n; 21244 21245 if (nb > 1 && nb < minmn) { 21246 21247 /* Set the crossover point NX. 21248 21249 Computing MAX */ 21250 i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( 21251 ftnlen)6, (ftnlen)1); 21252 nx = f2cmax(i__1,i__2); 21253 21254 /* Determine when to switch from blocked to unblocked code. */ 21255 21256 if (nx < minmn) { 21257 ws = (real) ((*m + *n) * nb); 21258 if ((real) (*lwork) < ws) { 21259 21260 /* Not enough work space for the optimal NB, consider using 21261 a smaller block size. */ 21262 21263 nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( 21264 ftnlen)6, (ftnlen)1); 21265 if (*lwork >= (*m + *n) * nbmin) { 21266 nb = *lwork / (*m + *n); 21267 } else { 21268 nb = 1; 21269 nx = minmn; 21270 } 21271 } 21272 } 21273 } else { 21274 nx = minmn; 21275 } 21276 21277 i__1 = minmn - nx; 21278 i__2 = nb; 21279 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 21280 21281 /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return 21282 the matrices X and Y which are needed to update the unreduced 21283 part of the matrix */ 21284 21285 i__3 = *m - i__ + 1; 21286 i__4 = *n - i__ + 1; 21287 slabrd_(&i__3, &i__4, &nb, &a_ref(i__, i__), lda, &d__[i__], &e[i__], 21288 &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb 21289 + 1], &ldwrky); 21290 21291 /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update 21292 of the form A := A - V*Y' - X*U' */ 21293 21294 i__3 = *m - i__ - nb + 1; 21295 i__4 = *n - i__ - nb + 1; 21296 sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a_ref( 21297 i__ + nb, i__), lda, &work[ldwrkx * nb + nb + 1], &ldwrky, & 21298 c_b22, &a_ref(i__ + nb, i__ + nb), lda) 21299 ; 21300 i__3 = *m - i__ - nb + 1; 21301 i__4 = *n - i__ - nb + 1; 21302 sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, & 21303 work[nb + 1], &ldwrkx, &a_ref(i__, i__ + nb), lda, &c_b22, & 21304 a_ref(i__ + nb, i__ + nb), lda); 21305 21306 /* Copy diagonal and off-diagonal elements of B back into A */ 21307 21308 if (*m >= *n) { 21309 i__3 = i__ + nb - 1; 21310 for (j = i__; j <= i__3; ++j) { 21311 a_ref(j, j) = d__[j]; 21312 a_ref(j, j + 1) = e[j]; 21313 /* L10: */ 21314 } 21315 } else { 21316 i__3 = i__ + nb - 1; 21317 for (j = i__; j <= i__3; ++j) { 21318 a_ref(j, j) = d__[j]; 21319 a_ref(j + 1, j) = e[j]; 21320 /* L20: */ 21321 } 21322 } 21323 /* L30: */ 21324 } 21325 21326 /* Use unblocked code to reduce the remainder of the matrix */ 21327 21328 i__2 = *m - i__ + 1; 21329 i__1 = *n - i__ + 1; 21330 sgebd2_(&i__2, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tauq[ 21331 i__], &taup[i__], &work[1], &iinfo); 21332 work[1] = ws; 21333 return 0; 21334 21335 /* End of SGEBRD */ 21336 21337 } /* sgebrd_ */
int sgelq2_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 22808 of file lapackblas.cpp.
References a_ref, f2cmax, f2cmin, integer, slarf_(), slarfg_(), and xerbla_().
Referenced by sgelqf_().
22810 { 22811 /* -- LAPACK routine (version 3.0) -- 22812 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 22813 Courant Institute, Argonne National Lab, and Rice University 22814 February 29, 1992 22815 22816 22817 Purpose 22818 ======= 22819 22820 SGELQ2 computes an LQ factorization of a real m by n matrix A: 22821 A = L * Q. 22822 22823 Arguments 22824 ========= 22825 22826 M (input) INTEGER 22827 The number of rows of the matrix A. M >= 0. 22828 22829 N (input) INTEGER 22830 The number of columns of the matrix A. N >= 0. 22831 22832 A (input/output) REAL array, dimension (LDA,N) 22833 On entry, the m by n matrix A. 22834 On exit, the elements on and below the diagonal of the array 22835 contain the m by min(m,n) lower trapezoidal matrix L (L is 22836 lower triangular if m <= n); the elements above the diagonal, 22837 with the array TAU, represent the orthogonal matrix Q as a 22838 product of elementary reflectors (see Further Details). 22839 22840 LDA (input) INTEGER 22841 The leading dimension of the array A. LDA >= max(1,M). 22842 22843 TAU (output) REAL array, dimension (min(M,N)) 22844 The scalar factors of the elementary reflectors (see Further 22845 Details). 22846 22847 WORK (workspace) REAL array, dimension (M) 22848 22849 INFO (output) INTEGER 22850 = 0: successful exit 22851 < 0: if INFO = -i, the i-th argument had an illegal value 22852 22853 Further Details 22854 =============== 22855 22856 The matrix Q is represented as a product of elementary reflectors 22857 22858 Q = H(k) . . . H(2) H(1), where k = min(m,n). 22859 22860 Each H(i) has the form 22861 22862 H(i) = I - tau * v * v' 22863 22864 where tau is a real scalar, and v is a real vector with 22865 v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), 22866 and tau in TAU(i). 22867 22868 ===================================================================== 22869 22870 22871 Test the input arguments 22872 22873 Parameter adjustments */ 22874 /* System generated locals */ 22875 integer a_dim1, a_offset, i__1, i__2, i__3; 22876 /* Local variables */ 22877 static integer i__, k; 22878 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 22879 integer *, real *, real *, integer *, real *), xerbla_( 22880 const char *, integer *), slarfg_(integer *, real *, real *, 22881 integer *, real *); 22882 static real aii; 22883 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 22884 22885 a_dim1 = *lda; 22886 a_offset = 1 + a_dim1 * 1; 22887 a -= a_offset; 22888 --tau; 22889 --work; 22890 22891 /* Function Body */ 22892 *info = 0; 22893 if (*m < 0) { 22894 *info = -1; 22895 } else if (*n < 0) { 22896 *info = -2; 22897 } else if (*lda < f2cmax(1,*m)) { 22898 *info = -4; 22899 } 22900 if (*info != 0) { 22901 i__1 = -(*info); 22902 xerbla_("SGELQ2", &i__1); 22903 return 0; 22904 } 22905 22906 k = f2cmin(*m,*n); 22907 22908 i__1 = k; 22909 for (i__ = 1; i__ <= i__1; ++i__) { 22910 22911 /* Generate elementary reflector H(i) to annihilate A(i,i+1:n) 22912 22913 Computing MIN */ 22914 i__2 = i__ + 1; 22915 i__3 = *n - i__ + 1; 22916 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &tau[ 22917 i__]); 22918 if (i__ < *m) { 22919 22920 /* Apply H(i) to A(i+1:m,i:n) from the right */ 22921 22922 aii = a_ref(i__, i__); 22923 a_ref(i__, i__) = 1.f; 22924 i__2 = *m - i__; 22925 i__3 = *n - i__ + 1; 22926 slarf_("Right", &i__2, &i__3, &a_ref(i__, i__), lda, &tau[i__], & 22927 a_ref(i__ + 1, i__), lda, &work[1]); 22928 a_ref(i__, i__) = aii; 22929 } 22930 /* L10: */ 22931 } 22932 return 0; 22933 22934 /* End of SGELQ2 */ 22935 22936 } /* sgelq2_ */
int sgelqf_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 21958 of file lapackblas.cpp.
References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), integer, nx, sgelq2_(), slarfb_(), slarft_(), and xerbla_().
Referenced by sgesvd_().
21960 { 21961 /* -- LAPACK routine (version 3.0) -- 21962 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 21963 Courant Institute, Argonne National Lab, and Rice University 21964 June 30, 1999 21965 21966 21967 Purpose 21968 ======= 21969 21970 SGELQF computes an LQ factorization of a real M-by-N matrix A: 21971 A = L * Q. 21972 21973 Arguments 21974 ========= 21975 21976 M (input) INTEGER 21977 The number of rows of the matrix A. M >= 0. 21978 21979 N (input) INTEGER 21980 The number of columns of the matrix A. N >= 0. 21981 21982 A (input/output) REAL array, dimension (LDA,N) 21983 On entry, the M-by-N matrix A. 21984 On exit, the elements on and below the diagonal of the array 21985 contain the m-by-min(m,n) lower trapezoidal matrix L (L is 21986 lower triangular if m <= n); the elements above the diagonal, 21987 with the array TAU, represent the orthogonal matrix Q as a 21988 product of elementary reflectors (see Further Details). 21989 21990 LDA (input) INTEGER 21991 The leading dimension of the array A. LDA >= max(1,M). 21992 21993 TAU (output) REAL array, dimension (min(M,N)) 21994 The scalar factors of the elementary reflectors (see Further 21995 Details). 21996 21997 WORK (workspace/output) REAL array, dimension (LWORK) 21998 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 21999 22000 LWORK (input) INTEGER 22001 The dimension of the array WORK. LWORK >= max(1,M). 22002 For optimum performance LWORK >= M*NB, where NB is the 22003 optimal blocksize. 22004 22005 If LWORK = -1, then a workspace query is assumed; the routine 22006 only calculates the optimal size of the WORK array, returns 22007 this value as the first entry of the WORK array, and no error 22008 message related to LWORK is issued by XERBLA. 22009 22010 INFO (output) INTEGER 22011 = 0: successful exit 22012 < 0: if INFO = -i, the i-th argument had an illegal value 22013 22014 Further Details 22015 =============== 22016 22017 The matrix Q is represented as a product of elementary reflectors 22018 22019 Q = H(k) . . . H(2) H(1), where k = min(m,n). 22020 22021 Each H(i) has the form 22022 22023 H(i) = I - tau * v * v' 22024 22025 where tau is a real scalar, and v is a real vector with 22026 v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), 22027 and tau in TAU(i). 22028 22029 ===================================================================== 22030 22031 22032 Test the input arguments 22033 22034 Parameter adjustments */ 22035 /* Table of constant values */ 22036 static integer c__1 = 1; 22037 static integer c_n1 = -1; 22038 static integer c__3 = 3; 22039 static integer c__2 = 2; 22040 22041 /* System generated locals */ 22042 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 22043 /* Local variables */ 22044 static integer i__, k, nbmin, iinfo; 22045 extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer 22046 *, real *, real *, integer *); 22047 static integer ib, nb, nx; 22048 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 22049 integer *, integer *, integer *, real *, integer *, real *, 22050 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 22051 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 22052 integer *, integer *, ftnlen, ftnlen); 22053 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 22054 real *, integer *, real *, real *, integer *); 22055 static integer ldwork, lwkopt; 22056 static logical lquery; 22057 static integer iws; 22058 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 22059 22060 22061 a_dim1 = *lda; 22062 a_offset = 1 + a_dim1 * 1; 22063 a -= a_offset; 22064 --tau; 22065 --work; 22066 22067 /* Function Body */ 22068 *info = 0; 22069 nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) 22070 1); 22071 lwkopt = *m * nb; 22072 work[1] = (real) lwkopt; 22073 lquery = *lwork == -1; 22074 if (*m < 0) { 22075 *info = -1; 22076 } else if (*n < 0) { 22077 *info = -2; 22078 } else if (*lda < f2cmax(1,*m)) { 22079 *info = -4; 22080 } else if (*lwork < f2cmax(1,*m) && ! lquery) { 22081 *info = -7; 22082 } 22083 if (*info != 0) { 22084 i__1 = -(*info); 22085 xerbla_("SGELQF", &i__1); 22086 return 0; 22087 } else if (lquery) { 22088 return 0; 22089 } 22090 22091 /* Quick return if possible */ 22092 22093 k = f2cmin(*m,*n); 22094 if (k == 0) { 22095 work[1] = 1.f; 22096 return 0; 22097 } 22098 22099 nbmin = 2; 22100 nx = 0; 22101 iws = *m; 22102 if (nb > 1 && nb < k) { 22103 22104 /* Determine when to cross over from blocked to unblocked code. 22105 22106 Computing MAX */ 22107 i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, ( 22108 ftnlen)6, (ftnlen)1); 22109 nx = f2cmax(i__1,i__2); 22110 if (nx < k) { 22111 22112 /* Determine if workspace is large enough for blocked code. */ 22113 22114 ldwork = *m; 22115 iws = ldwork * nb; 22116 if (*lwork < iws) { 22117 22118 /* Not enough workspace to use optimal NB: reduce NB and 22119 determine the minimum value of NB. */ 22120 22121 nb = *lwork / ldwork; 22122 /* Computing MAX */ 22123 i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, & 22124 c_n1, (ftnlen)6, (ftnlen)1); 22125 nbmin = f2cmax(i__1,i__2); 22126 } 22127 } 22128 } 22129 22130 if (nb >= nbmin && nb < k && nx < k) { 22131 22132 /* Use blocked code initially */ 22133 22134 i__1 = k - nx; 22135 i__2 = nb; 22136 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 22137 /* Computing MIN */ 22138 i__3 = k - i__ + 1; 22139 ib = f2cmin(i__3,nb); 22140 22141 /* Compute the LQ factorization of the current block 22142 A(i:i+ib-1,i:n) */ 22143 22144 i__3 = *n - i__ + 1; 22145 sgelq2_(&ib, &i__3, &a_ref(i__, i__), lda, &tau[i__], &work[1], & 22146 iinfo); 22147 if (i__ + ib <= *m) { 22148 22149 /* Form the triangular factor of the block reflector 22150 H = H(i) H(i+1) . . . H(i+ib-1) */ 22151 22152 i__3 = *n - i__ + 1; 22153 slarft_("Forward", "Rowwise", &i__3, &ib, &a_ref(i__, i__), 22154 lda, &tau[i__], &work[1], &ldwork); 22155 22156 /* Apply H to A(i+ib:m,i:n) from the right */ 22157 22158 i__3 = *m - i__ - ib + 1; 22159 i__4 = *n - i__ + 1; 22160 slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 22161 &i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, 22162 &a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork); 22163 } 22164 /* L10: */ 22165 } 22166 } else { 22167 i__ = 1; 22168 } 22169 22170 /* Use unblocked code to factor the last or only block. */ 22171 22172 if (i__ <= k) { 22173 i__2 = *m - i__ + 1; 22174 i__1 = *n - i__ + 1; 22175 sgelq2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], & 22176 iinfo); 22177 } 22178 22179 work[1] = (real) iws; 22180 return 0; 22181 22182 /* End of SGELQF */ 22183 22184 } /* sgelqf_ */
int sgemm_ | ( | const char * | transa, | |
const char * | transb, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | alpha, | |||
real * | a, | |||
integer * | lda, | |||
real * | b, | |||
integer * | ldb, | |||
real * | beta, | |||
real * | c__, | |||
integer * | ldc | |||
) |
Definition at line 1312 of file lapackblas.cpp.
References a_ref, b_ref, c___ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by sgebrd_(), sgesvd_(), slaed0_(), slaed3_(), slaed7_(), slarfb_(), and sstedc_().
01315 { 01316 /* System generated locals */ 01317 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 01318 i__3; 01319 /* Local variables */ 01320 static integer info; 01321 static logical nota, notb; 01322 static real temp; 01323 static integer i__, j, l, ncola; 01324 extern logical lsame_(const char *, const char *); 01325 static integer nrowa, nrowb; 01326 extern /* Subroutine */ int xerbla_(const char *, integer *); 01327 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 01328 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 01329 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 01330 /* Purpose 01331 ======= 01332 SGEMM performs one of the matrix-matrix operations 01333 C := alpha*op( A )*op( B ) + beta*C, 01334 where op( X ) is one of 01335 op( X ) = X or op( X ) = X', 01336 alpha and beta are scalars, and A, B and C are matrices, with op( A ) 01337 an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 01338 Parameters 01339 ========== 01340 TRANSA - CHARACTER*1. 01341 On entry, TRANSA specifies the form of op( A ) to be used in 01342 the matrix multiplication as follows: 01343 TRANSA = 'N' or 'n', op( A ) = A. 01344 TRANSA = 'T' or 't', op( A ) = A'. 01345 TRANSA = 'C' or 'c', op( A ) = A'. 01346 Unchanged on exit. 01347 TRANSB - CHARACTER*1. 01348 On entry, TRANSB specifies the form of op( B ) to be used in 01349 the matrix multiplication as follows: 01350 TRANSB = 'N' or 'n', op( B ) = B. 01351 TRANSB = 'T' or 't', op( B ) = B'. 01352 TRANSB = 'C' or 'c', op( B ) = B'. 01353 Unchanged on exit. 01354 M - INTEGER. 01355 On entry, M specifies the number of rows of the matrix 01356 op( A ) and of the matrix C. M must be at least zero. 01357 Unchanged on exit. 01358 N - INTEGER. 01359 On entry, N specifies the number of columns of the matrix 01360 op( B ) and the number of columns of the matrix C. N must be 01361 at least zero. 01362 Unchanged on exit. 01363 K - INTEGER. 01364 On entry, K specifies the number of columns of the matrix 01365 op( A ) and the number of rows of the matrix op( B ). K must 01366 be at least zero. 01367 Unchanged on exit. 01368 ALPHA - REAL . 01369 On entry, ALPHA specifies the scalar alpha. 01370 Unchanged on exit. 01371 A - REAL array of DIMENSION ( LDA, ka ), where ka is 01372 k when TRANSA = 'N' or 'n', and is m otherwise. 01373 Before entry with TRANSA = 'N' or 'n', the leading m by k 01374 part of the array A must contain the matrix A, otherwise 01375 the leading k by m part of the array A must contain the 01376 matrix A. 01377 Unchanged on exit. 01378 LDA - INTEGER. 01379 On entry, LDA specifies the first dimension of A as declared 01380 in the calling (sub) program. When TRANSA = 'N' or 'n' then 01381 LDA must be at least f2cmax( 1, m ), otherwise LDA must be at 01382 least f2cmax( 1, k ). 01383 Unchanged on exit. 01384 B - REAL array of DIMENSION ( LDB, kb ), where kb is 01385 n when TRANSB = 'N' or 'n', and is k otherwise. 01386 Before entry with TRANSB = 'N' or 'n', the leading k by n 01387 part of the array B must contain the matrix B, otherwise 01388 the leading n by k part of the array B must contain the 01389 matrix B. 01390 Unchanged on exit. 01391 LDB - INTEGER. 01392 On entry, LDB specifies the first dimension of B as declared 01393 in the calling (sub) program. When TRANSB = 'N' or 'n' then 01394 LDB must be at least f2cmax( 1, k ), otherwise LDB must be at 01395 least f2cmax( 1, n ). 01396 Unchanged on exit. 01397 BETA - REAL . 01398 On entry, BETA specifies the scalar beta. When BETA is 01399 supplied as zero then C need not be set on input. 01400 Unchanged on exit. 01401 C - REAL array of DIMENSION ( LDC, n ). 01402 Before entry, the leading m by n part of the array C must 01403 contain the matrix C, except when beta is zero, in which 01404 case C need not be set on entry. 01405 On exit, the array C is overwritten by the m by n matrix 01406 ( alpha*op( A )*op( B ) + beta*C ). 01407 LDC - INTEGER. 01408 On entry, LDC specifies the first dimension of C as declared 01409 in the calling (sub) program. LDC must be at least 01410 f2cmax( 1, m ). 01411 Unchanged on exit. 01412 Level 3 Blas routine. 01413 -- Written on 8-February-1989. 01414 Jack Dongarra, Argonne National Laboratory. 01415 Iain Duff, AERE Harwell. 01416 Jeremy Du Croz, Numerical Algorithms Group Ltd. 01417 Sven Hammarling, Numerical Algorithms Group Ltd. 01418 Set NOTA and NOTB as true if A and B respectively are not 01419 transposed and set NROWA, NCOLA and NROWB as the number of rows 01420 and columns of A and the number of rows of B respectively. 01421 Parameter adjustments */ 01422 a_dim1 = *lda; 01423 a_offset = 1 + a_dim1 * 1; 01424 a -= a_offset; 01425 b_dim1 = *ldb; 01426 b_offset = 1 + b_dim1 * 1; 01427 b -= b_offset; 01428 c_dim1 = *ldc; 01429 c_offset = 1 + c_dim1 * 1; 01430 c__ -= c_offset; 01431 /* Function Body */ 01432 nota = lsame_(transa, "N"); 01433 notb = lsame_(transb, "N"); 01434 if (nota) { 01435 nrowa = *m; 01436 ncola = *k; 01437 } else { 01438 nrowa = *k; 01439 ncola = *m; 01440 } 01441 if (notb) { 01442 nrowb = *k; 01443 } else { 01444 nrowb = *n; 01445 } 01446 /* Test the input parameters. */ 01447 info = 0; 01448 if (! nota && ! lsame_(transa, "C") && ! lsame_( 01449 transa, "T")) { 01450 info = 1; 01451 } else if (! notb && ! lsame_(transb, "C") && ! 01452 lsame_(transb, "T")) { 01453 info = 2; 01454 } else if (*m < 0) { 01455 info = 3; 01456 } else if (*n < 0) { 01457 info = 4; 01458 } else if (*k < 0) { 01459 info = 5; 01460 } else if (*lda < f2cmax(1,nrowa)) { 01461 info = 8; 01462 } else if (*ldb < f2cmax(1,nrowb)) { 01463 info = 10; 01464 } else if (*ldc < f2cmax(1,*m)) { 01465 info = 13; 01466 } 01467 if (info != 0) { 01468 xerbla_("SGEMM ", &info); 01469 return 0; 01470 } 01471 /* Quick return if possible. */ 01472 if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { 01473 return 0; 01474 } 01475 /* And if alpha.eq.zero. */ 01476 if (*alpha == 0.f) { 01477 if (*beta == 0.f) { 01478 i__1 = *n; 01479 for (j = 1; j <= i__1; ++j) { 01480 i__2 = *m; 01481 for (i__ = 1; i__ <= i__2; ++i__) { 01482 c___ref(i__, j) = 0.f; 01483 /* L10: */ 01484 } 01485 /* L20: */ 01486 } 01487 } else { 01488 i__1 = *n; 01489 for (j = 1; j <= i__1; ++j) { 01490 i__2 = *m; 01491 for (i__ = 1; i__ <= i__2; ++i__) { 01492 c___ref(i__, j) = *beta * c___ref(i__, j); 01493 /* L30: */ 01494 } 01495 /* L40: */ 01496 } 01497 } 01498 return 0; 01499 } 01500 /* Start the operations. */ 01501 if (notb) { 01502 if (nota) { 01503 /* Form C := alpha*A*B + beta*C. */ 01504 i__1 = *n; 01505 for (j = 1; j <= i__1; ++j) { 01506 if (*beta == 0.f) { 01507 i__2 = *m; 01508 for (i__ = 1; i__ <= i__2; ++i__) { 01509 c___ref(i__, j) = 0.f; 01510 /* L50: */ 01511 } 01512 } else if (*beta != 1.f) { 01513 i__2 = *m; 01514 for (i__ = 1; i__ <= i__2; ++i__) { 01515 c___ref(i__, j) = *beta * c___ref(i__, j); 01516 /* L60: */ 01517 } 01518 } 01519 i__2 = *k; 01520 for (l = 1; l <= i__2; ++l) { 01521 if (b_ref(l, j) != 0.f) { 01522 temp = *alpha * b_ref(l, j); 01523 i__3 = *m; 01524 for (i__ = 1; i__ <= i__3; ++i__) { 01525 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( 01526 i__, l); 01527 /* L70: */ 01528 } 01529 } 01530 /* L80: */ 01531 } 01532 /* L90: */ 01533 } 01534 } else { 01535 /* Form C := alpha*A'*B + beta*C */ 01536 i__1 = *n; 01537 for (j = 1; j <= i__1; ++j) { 01538 i__2 = *m; 01539 for (i__ = 1; i__ <= i__2; ++i__) { 01540 temp = 0.f; 01541 i__3 = *k; 01542 for (l = 1; l <= i__3; ++l) { 01543 temp += a_ref(l, i__) * b_ref(l, j); 01544 /* L100: */ 01545 } 01546 if (*beta == 0.f) { 01547 c___ref(i__, j) = *alpha * temp; 01548 } else { 01549 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, 01550 j); 01551 } 01552 /* L110: */ 01553 } 01554 /* L120: */ 01555 } 01556 } 01557 } else { 01558 if (nota) { 01559 /* Form C := alpha*A*B' + beta*C */ 01560 i__1 = *n; 01561 for (j = 1; j <= i__1; ++j) { 01562 if (*beta == 0.f) { 01563 i__2 = *m; 01564 for (i__ = 1; i__ <= i__2; ++i__) { 01565 c___ref(i__, j) = 0.f; 01566 /* L130: */ 01567 } 01568 } else if (*beta != 1.f) { 01569 i__2 = *m; 01570 for (i__ = 1; i__ <= i__2; ++i__) { 01571 c___ref(i__, j) = *beta * c___ref(i__, j); 01572 /* L140: */ 01573 } 01574 } 01575 i__2 = *k; 01576 for (l = 1; l <= i__2; ++l) { 01577 if (b_ref(j, l) != 0.f) { 01578 temp = *alpha * b_ref(j, l); 01579 i__3 = *m; 01580 for (i__ = 1; i__ <= i__3; ++i__) { 01581 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( 01582 i__, l); 01583 /* L150: */ 01584 } 01585 } 01586 /* L160: */ 01587 } 01588 /* L170: */ 01589 } 01590 } else { 01591 /* Form C := alpha*A'*B' + beta*C */ 01592 i__1 = *n; 01593 for (j = 1; j <= i__1; ++j) { 01594 i__2 = *m; 01595 for (i__ = 1; i__ <= i__2; ++i__) { 01596 temp = 0.f; 01597 i__3 = *k; 01598 for (l = 1; l <= i__3; ++l) { 01599 temp += a_ref(l, i__) * b_ref(j, l); 01600 /* L180: */ 01601 } 01602 if (*beta == 0.f) { 01603 c___ref(i__, j) = *alpha * temp; 01604 } else { 01605 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, 01606 j); 01607 } 01608 /* L190: */ 01609 } 01610 /* L200: */ 01611 } 01612 } 01613 } 01614 return 0; 01615 /* End of SGEMM . */ 01616 } /* sgemm_ */
int sgemv_ | ( | const char * | trans, | |
integer * | m, | |||
integer * | n, | |||
real * | alpha, | |||
real * | a, | |||
integer * | lda, | |||
real * | x, | |||
integer * | incx, | |||
real * | beta, | |||
real * | y, | |||
integer * | incy | |||
) |
Definition at line 1624 of file lapackblas.cpp.
References a_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by EMAN::PCAlarge::analyze(), EMAN::PCA::dopca_lan(), EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), slabrd_(), slaeda_(), slarf_(), slarft_(), and slatrd_().
01627 { 01628 /* System generated locals */ 01629 integer a_dim1, a_offset, i__1, i__2; 01630 /* Local variables */ 01631 static integer info; 01632 static real temp; 01633 static integer lenx, leny, i__, j; 01634 extern logical lsame_(const char *, const char *); 01635 static integer ix, iy, jx, jy, kx, ky; 01636 extern /* Subroutine */ int xerbla_(const char *, integer *); 01637 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 01638 /* Purpose 01639 ======= 01640 SGEMV performs one of the matrix-vector operations 01641 y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, 01642 where alpha and beta are scalars, x and y are vectors and A is an 01643 m by n matrix. 01644 Parameters 01645 ========== 01646 TRANS - CHARACTER*1. 01647 On entry, TRANS specifies the operation to be performed as 01648 follows: 01649 TRANS = 'N' or 'n' y := alpha*A*x + beta*y. 01650 TRANS = 'T' or 't' y := alpha*A'*x + beta*y. 01651 TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. 01652 Unchanged on exit. 01653 M - INTEGER. 01654 On entry, M specifies the number of rows of the matrix A. 01655 M must be at least zero. 01656 Unchanged on exit. 01657 N - INTEGER. 01658 On entry, N specifies the number of columns of the matrix A. 01659 N must be at least zero. 01660 Unchanged on exit. 01661 ALPHA - REAL . 01662 On entry, ALPHA specifies the scalar alpha. 01663 Unchanged on exit. 01664 A - REAL array of DIMENSION ( LDA, n ). 01665 Before entry, the leading m by n part of the array A must 01666 contain the matrix of coefficients. 01667 Unchanged on exit. 01668 LDA - INTEGER. 01669 On entry, LDA specifies the first dimension of A as declared 01670 in the calling (sub) program. LDA must be at least 01671 f2cmax( 1, m ). 01672 Unchanged on exit. 01673 X - REAL array of DIMENSION at least 01674 ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' 01675 and at least 01676 ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. 01677 Before entry, the incremented array X must contain the 01678 vector x. 01679 Unchanged on exit. 01680 INCX - INTEGER. 01681 On entry, INCX specifies the increment for the elements of 01682 X. INCX must not be zero. 01683 Unchanged on exit. 01684 BETA - REAL . 01685 On entry, BETA specifies the scalar beta. When BETA is 01686 supplied as zero then Y need not be set on input. 01687 Unchanged on exit. 01688 Y - REAL array of DIMENSION at least 01689 ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' 01690 and at least 01691 ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. 01692 Before entry with BETA non-zero, the incremented array Y 01693 must contain the vector y. On exit, Y is overwritten by the 01694 updated vector y. 01695 INCY - INTEGER. 01696 On entry, INCY specifies the increment for the elements of 01697 Y. INCY must not be zero. 01698 Unchanged on exit. 01699 Level 2 Blas routine. 01700 -- Written on 22-October-1986. 01701 Jack Dongarra, Argonne National Lab. 01702 Jeremy Du Croz, Nag Central Office. 01703 Sven Hammarling, Nag Central Office. 01704 Richard Hanson, Sandia National Labs. 01705 Test the input parameters. 01706 Parameter adjustments */ 01707 a_dim1 = *lda; 01708 a_offset = 1 + a_dim1 * 1; 01709 a -= a_offset; 01710 --x; 01711 --y; 01712 /* Function Body */ 01713 info = 0; 01714 if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") 01715 ) { 01716 info = 1; 01717 } else if (*m < 0) { 01718 info = 2; 01719 } else if (*n < 0) { 01720 info = 3; 01721 } else if (*lda < f2cmax(1,*m)) { 01722 info = 6; 01723 } else if (*incx == 0) { 01724 info = 8; 01725 } else if (*incy == 0) { 01726 info = 11; 01727 } 01728 if (info != 0) { 01729 xerbla_("SGEMV ", &info); 01730 return 0; 01731 } 01732 /* Quick return if possible. */ 01733 if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { 01734 return 0; 01735 } 01736 /* Set LENX and LENY, the lengths of the vectors x and y, and set 01737 up the start points in X and Y. */ 01738 if (lsame_(trans, "N")) { 01739 lenx = *n; 01740 leny = *m; 01741 } else { 01742 lenx = *m; 01743 leny = *n; 01744 } 01745 if (*incx > 0) { 01746 kx = 1; 01747 } else { 01748 kx = 1 - (lenx - 1) * *incx; 01749 } 01750 if (*incy > 0) { 01751 ky = 1; 01752 } else { 01753 ky = 1 - (leny - 1) * *incy; 01754 } 01755 /* Start the operations. In this version the elements of A are 01756 accessed sequentially with one pass through A. 01757 First form y := beta*y. */ 01758 if (*beta != 1.f) { 01759 if (*incy == 1) { 01760 if (*beta == 0.f) { 01761 i__1 = leny; 01762 for (i__ = 1; i__ <= i__1; ++i__) { 01763 y[i__] = 0.f; 01764 /* L10: */ 01765 } 01766 } else { 01767 i__1 = leny; 01768 for (i__ = 1; i__ <= i__1; ++i__) { 01769 y[i__] = *beta * y[i__]; 01770 /* L20: */ 01771 } 01772 } 01773 } else { 01774 iy = ky; 01775 if (*beta == 0.f) { 01776 i__1 = leny; 01777 for (i__ = 1; i__ <= i__1; ++i__) { 01778 y[iy] = 0.f; 01779 iy += *incy; 01780 /* L30: */ 01781 } 01782 } else { 01783 i__1 = leny; 01784 for (i__ = 1; i__ <= i__1; ++i__) { 01785 y[iy] = *beta * y[iy]; 01786 iy += *incy; 01787 /* L40: */ 01788 } 01789 } 01790 } 01791 } 01792 if (*alpha == 0.f) { 01793 return 0; 01794 } 01795 if (lsame_(trans, "N")) { 01796 /* Form y := alpha*A*x + y. */ 01797 jx = kx; 01798 if (*incy == 1) { 01799 i__1 = *n; 01800 for (j = 1; j <= i__1; ++j) { 01801 if (x[jx] != 0.f) { 01802 temp = *alpha * x[jx]; 01803 i__2 = *m; 01804 for (i__ = 1; i__ <= i__2; ++i__) { 01805 y[i__] += temp * a_ref(i__, j); 01806 /* L50: */ 01807 } 01808 } 01809 jx += *incx; 01810 /* L60: */ 01811 } 01812 } else { 01813 i__1 = *n; 01814 for (j = 1; j <= i__1; ++j) { 01815 if (x[jx] != 0.f) { 01816 temp = *alpha * x[jx]; 01817 iy = ky; 01818 i__2 = *m; 01819 for (i__ = 1; i__ <= i__2; ++i__) { 01820 y[iy] += temp * a_ref(i__, j); 01821 iy += *incy; 01822 /* L70: */ 01823 } 01824 } 01825 jx += *incx; 01826 /* L80: */ 01827 } 01828 } 01829 } else { 01830 /* Form y := alpha*A'*x + y. */ 01831 jy = ky; 01832 if (*incx == 1) { 01833 i__1 = *n; 01834 for (j = 1; j <= i__1; ++j) { 01835 temp = 0.f; 01836 i__2 = *m; 01837 for (i__ = 1; i__ <= i__2; ++i__) { 01838 temp += a_ref(i__, j) * x[i__]; 01839 /* L90: */ 01840 } 01841 y[jy] += *alpha * temp; 01842 jy += *incy; 01843 /* L100: */ 01844 } 01845 } else { 01846 i__1 = *n; 01847 for (j = 1; j <= i__1; ++j) { 01848 temp = 0.f; 01849 ix = kx; 01850 i__2 = *m; 01851 for (i__ = 1; i__ <= i__2; ++i__) { 01852 temp += a_ref(i__, j) * x[ix]; 01853 ix += *incx; 01854 /* L110: */ 01855 } 01856 y[jy] += *alpha * temp; 01857 jy += *incy; 01858 /* L120: */ 01859 } 01860 } 01861 } 01862 return 0; 01863 /* End of SGEMV . */ 01864 } /* sgemv_ */
int sgeqr2_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 24684 of file lapackblas.cpp.
References a_ref, f2cmax, f2cmin, integer, slarf_(), slarfg_(), and xerbla_().
Referenced by sgeqrf_().
24686 { 24687 /* -- LAPACK routine (version 3.0) -- 24688 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 24689 Courant Institute, Argonne National Lab, and Rice University 24690 February 29, 1992 24691 24692 24693 Purpose 24694 ======= 24695 24696 SGEQR2 computes a QR factorization of a real m by n matrix A: 24697 A = Q * R. 24698 24699 Arguments 24700 ========= 24701 24702 M (input) INTEGER 24703 The number of rows of the matrix A. M >= 0. 24704 24705 N (input) INTEGER 24706 The number of columns of the matrix A. N >= 0. 24707 24708 A (input/output) REAL array, dimension (LDA,N) 24709 On entry, the m by n matrix A. 24710 On exit, the elements on and above the diagonal of the array 24711 contain the min(m,n) by n upper trapezoidal matrix R (R is 24712 upper triangular if m >= n); the elements below the diagonal, 24713 with the array TAU, represent the orthogonal matrix Q as a 24714 product of elementary reflectors (see Further Details). 24715 24716 LDA (input) INTEGER 24717 The leading dimension of the array A. LDA >= max(1,M). 24718 24719 TAU (output) REAL array, dimension (min(M,N)) 24720 The scalar factors of the elementary reflectors (see Further 24721 Details). 24722 24723 WORK (workspace) REAL array, dimension (N) 24724 24725 INFO (output) INTEGER 24726 = 0: successful exit 24727 < 0: if INFO = -i, the i-th argument had an illegal value 24728 24729 Further Details 24730 =============== 24731 24732 The matrix Q is represented as a product of elementary reflectors 24733 24734 Q = H(1) H(2) . . . H(k), where k = min(m,n). 24735 24736 Each H(i) has the form 24737 24738 H(i) = I - tau * v * v' 24739 24740 where tau is a real scalar, and v is a real vector with 24741 v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), 24742 and tau in TAU(i). 24743 24744 ===================================================================== 24745 24746 24747 Test the input arguments 24748 24749 Parameter adjustments */ 24750 /* Table of constant values */ 24751 static integer c__1 = 1; 24752 24753 /* System generated locals */ 24754 integer a_dim1, a_offset, i__1, i__2, i__3; 24755 /* Local variables */ 24756 static integer i__, k; 24757 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 24758 integer *, real *, real *, integer *, real *), xerbla_( 24759 const char *, integer *), slarfg_(integer *, real *, real *, 24760 integer *, real *); 24761 static real aii; 24762 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 24763 24764 24765 a_dim1 = *lda; 24766 a_offset = 1 + a_dim1 * 1; 24767 a -= a_offset; 24768 --tau; 24769 --work; 24770 24771 /* Function Body */ 24772 *info = 0; 24773 if (*m < 0) { 24774 *info = -1; 24775 } else if (*n < 0) { 24776 *info = -2; 24777 } else if (*lda < f2cmax(1,*m)) { 24778 *info = -4; 24779 } 24780 if (*info != 0) { 24781 i__1 = -(*info); 24782 xerbla_("SGEQR2", &i__1); 24783 return 0; 24784 } 24785 24786 k = f2cmin(*m,*n); 24787 24788 i__1 = k; 24789 for (i__ = 1; i__ <= i__1; ++i__) { 24790 24791 /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 24792 24793 Computing MIN */ 24794 i__2 = i__ + 1; 24795 i__3 = *m - i__ + 1; 24796 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1, & 24797 tau[i__]); 24798 if (i__ < *n) { 24799 24800 /* Apply H(i) to A(i:m,i+1:n) from the left */ 24801 24802 aii = a_ref(i__, i__); 24803 a_ref(i__, i__) = 1.f; 24804 i__2 = *m - i__ + 1; 24805 i__3 = *n - i__; 24806 slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tau[i__], & 24807 a_ref(i__, i__ + 1), lda, &work[1]); 24808 a_ref(i__, i__) = aii; 24809 } 24810 /* L10: */ 24811 } 24812 return 0; 24813 24814 /* End of SGEQR2 */ 24815 24816 } /* sgeqr2_ */
int sgeqrf_ | ( | integer * | m, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 23835 of file lapackblas.cpp.
References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), integer, nx, sgeqr2_(), slarfb_(), slarft_(), and xerbla_().
Referenced by sgesvd_().
23837 { 23838 /* -- LAPACK routine (version 3.0) -- 23839 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 23840 Courant Institute, Argonne National Lab, and Rice University 23841 June 30, 1999 23842 23843 23844 Purpose 23845 ======= 23846 23847 SGEQRF computes a QR factorization of a real M-by-N matrix A: 23848 A = Q * R. 23849 23850 Arguments 23851 ========= 23852 23853 M (input) INTEGER 23854 The number of rows of the matrix A. M >= 0. 23855 23856 N (input) INTEGER 23857 The number of columns of the matrix A. N >= 0. 23858 23859 A (input/output) REAL array, dimension (LDA,N) 23860 On entry, the M-by-N matrix A. 23861 On exit, the elements on and above the diagonal of the array 23862 contain the min(M,N)-by-N upper trapezoidal matrix R (R is 23863 upper triangular if m >= n); the elements below the diagonal, 23864 with the array TAU, represent the orthogonal matrix Q as a 23865 product of min(m,n) elementary reflectors (see Further 23866 Details). 23867 23868 LDA (input) INTEGER 23869 The leading dimension of the array A. LDA >= max(1,M). 23870 23871 TAU (output) REAL array, dimension (min(M,N)) 23872 The scalar factors of the elementary reflectors (see Further 23873 Details). 23874 23875 WORK (workspace/output) REAL array, dimension (LWORK) 23876 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 23877 23878 LWORK (input) INTEGER 23879 The dimension of the array WORK. LWORK >= max(1,N). 23880 For optimum performance LWORK >= N*NB, where NB is 23881 the optimal blocksize. 23882 23883 If LWORK = -1, then a workspace query is assumed; the routine 23884 only calculates the optimal size of the WORK array, returns 23885 this value as the first entry of the WORK array, and no error 23886 message related to LWORK is issued by XERBLA. 23887 23888 INFO (output) INTEGER 23889 = 0: successful exit 23890 < 0: if INFO = -i, the i-th argument had an illegal value 23891 23892 Further Details 23893 =============== 23894 23895 The matrix Q is represented as a product of elementary reflectors 23896 23897 Q = H(1) H(2) . . . H(k), where k = min(m,n). 23898 23899 Each H(i) has the form 23900 23901 H(i) = I - tau * v * v' 23902 23903 where tau is a real scalar, and v is a real vector with 23904 v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), 23905 and tau in TAU(i). 23906 23907 ===================================================================== 23908 23909 23910 Test the input arguments 23911 23912 Parameter adjustments */ 23913 /* Table of constant values */ 23914 static integer c__1 = 1; 23915 static integer c_n1 = -1; 23916 static integer c__3 = 3; 23917 static integer c__2 = 2; 23918 23919 /* System generated locals */ 23920 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 23921 /* Local variables */ 23922 static integer i__, k, nbmin, iinfo; 23923 extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer 23924 *, real *, real *, integer *); 23925 static integer ib, nb, nx; 23926 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 23927 integer *, integer *, integer *, real *, integer *, real *, 23928 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 23929 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 23930 integer *, integer *, ftnlen, ftnlen); 23931 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 23932 real *, integer *, real *, real *, integer *); 23933 static integer ldwork, lwkopt; 23934 static logical lquery; 23935 static integer iws; 23936 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 23937 23938 23939 a_dim1 = *lda; 23940 a_offset = 1 + a_dim1 * 1; 23941 a -= a_offset; 23942 --tau; 23943 --work; 23944 23945 /* Function Body */ 23946 *info = 0; 23947 nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) 23948 1); 23949 lwkopt = *n * nb; 23950 work[1] = (real) lwkopt; 23951 lquery = *lwork == -1; 23952 if (*m < 0) { 23953 *info = -1; 23954 } else if (*n < 0) { 23955 *info = -2; 23956 } else if (*lda < f2cmax(1,*m)) { 23957 *info = -4; 23958 } else if (*lwork < f2cmax(1,*n) && ! lquery) { 23959 *info = -7; 23960 } 23961 if (*info != 0) { 23962 i__1 = -(*info); 23963 xerbla_("SGEQRF", &i__1); 23964 return 0; 23965 } else if (lquery) { 23966 return 0; 23967 } 23968 23969 /* Quick return if possible */ 23970 23971 k = f2cmin(*m,*n); 23972 if (k == 0) { 23973 work[1] = 1.f; 23974 return 0; 23975 } 23976 23977 nbmin = 2; 23978 nx = 0; 23979 iws = *n; 23980 if (nb > 1 && nb < k) { 23981 23982 /* Determine when to cross over from blocked to unblocked code. 23983 23984 Computing MAX */ 23985 i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( 23986 ftnlen)6, (ftnlen)1); 23987 nx = f2cmax(i__1,i__2); 23988 if (nx < k) { 23989 23990 /* Determine if workspace is large enough for blocked code. */ 23991 23992 ldwork = *n; 23993 iws = ldwork * nb; 23994 if (*lwork < iws) { 23995 23996 /* Not enough workspace to use optimal NB: reduce NB and 23997 determine the minimum value of NB. */ 23998 23999 nb = *lwork / ldwork; 24000 /* Computing MAX */ 24001 i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & 24002 c_n1, (ftnlen)6, (ftnlen)1); 24003 nbmin = f2cmax(i__1,i__2); 24004 } 24005 } 24006 } 24007 24008 if (nb >= nbmin && nb < k && nx < k) { 24009 24010 /* Use blocked code initially */ 24011 24012 i__1 = k - nx; 24013 i__2 = nb; 24014 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 24015 /* Computing MIN */ 24016 i__3 = k - i__ + 1; 24017 ib = f2cmin(i__3,nb); 24018 24019 /* Compute the QR factorization of the current block 24020 A(i:m,i:i+ib-1) */ 24021 24022 i__3 = *m - i__ + 1; 24023 sgeqr2_(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], & 24024 iinfo); 24025 if (i__ + ib <= *n) { 24026 24027 /* Form the triangular factor of the block reflector 24028 H = H(i) H(i+1) . . . H(i+ib-1) */ 24029 24030 i__3 = *m - i__ + 1; 24031 slarft_("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__), 24032 lda, &tau[i__], &work[1], &ldwork); 24033 24034 /* Apply H' to A(i:m,i+ib:n) from the left */ 24035 24036 i__3 = *m - i__ + 1; 24037 i__4 = *n - i__ - ib + 1; 24038 slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & 24039 i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, & 24040 a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork); 24041 } 24042 /* L10: */ 24043 } 24044 } else { 24045 i__ = 1; 24046 } 24047 24048 /* Use unblocked code to factor the last or only block. */ 24049 24050 if (i__ <= k) { 24051 i__2 = *m - i__ + 1; 24052 i__1 = *n - i__ + 1; 24053 sgeqr2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], & 24054 iinfo); 24055 } 24056 24057 work[1] = (real) iws; 24058 return 0; 24059 24060 /* End of SGEQRF */ 24061 24062 } /* sgeqrf_ */
int sger_ | ( | integer * | m, | |
integer * | n, | |||
real * | alpha, | |||
real * | x, | |||
integer * | incx, | |||
real * | y, | |||
integer * | incy, | |||
real * | a, | |||
integer * | lda | |||
) |
Definition at line 1870 of file lapackblas.cpp.
References a_ref, f2cmax, integer, and xerbla_().
Referenced by slarf_().
01872 { 01873 /* System generated locals */ 01874 integer a_dim1, a_offset, i__1, i__2; 01875 /* Local variables */ 01876 static integer info; 01877 static real temp; 01878 static integer i__, j, ix, jy, kx; 01879 extern /* Subroutine */ int xerbla_(const char *, integer *); 01880 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 01881 /* Purpose 01882 ======= 01883 SGER performs the rank 1 operation 01884 A := alpha*x*y' + A, 01885 where alpha is a scalar, x is an m element vector, y is an n element 01886 vector and A is an m by n matrix. 01887 Parameters 01888 ========== 01889 M - INTEGER. 01890 On entry, M specifies the number of rows of the matrix A. 01891 M must be at least zero. 01892 Unchanged on exit. 01893 N - INTEGER. 01894 On entry, N specifies the number of columns of the matrix A. 01895 N must be at least zero. 01896 Unchanged on exit. 01897 ALPHA - REAL . 01898 On entry, ALPHA specifies the scalar alpha. 01899 Unchanged on exit. 01900 X - REAL array of dimension at least 01901 ( 1 + ( m - 1 )*abs( INCX ) ). 01902 Before entry, the incremented array X must contain the m 01903 element vector x. 01904 Unchanged on exit. 01905 INCX - INTEGER. 01906 On entry, INCX specifies the increment for the elements of 01907 X. INCX must not be zero. 01908 Unchanged on exit. 01909 Y - REAL array of dimension at least 01910 ( 1 + ( n - 1 )*abs( INCY ) ). 01911 Before entry, the incremented array Y must contain the n 01912 element vector y. 01913 Unchanged on exit. 01914 INCY - INTEGER. 01915 On entry, INCY specifies the increment for the elements of 01916 Y. INCY must not be zero. 01917 Unchanged on exit. 01918 A - REAL array of DIMENSION ( LDA, n ). 01919 Before entry, the leading m by n part of the array A must 01920 contain the matrix of coefficients. On exit, A is 01921 overwritten by the updated matrix. 01922 LDA - INTEGER. 01923 On entry, LDA specifies the first dimension of A as declared 01924 in the calling (sub) program. LDA must be at least 01925 f2cmax( 1, m ). 01926 Unchanged on exit. 01927 Level 2 Blas routine. 01928 -- Written on 22-October-1986. 01929 Jack Dongarra, Argonne National Lab. 01930 Jeremy Du Croz, Nag Central Office. 01931 Sven Hammarling, Nag Central Office. 01932 Richard Hanson, Sandia National Labs. 01933 Test the input parameters. 01934 Parameter adjustments */ 01935 --x; 01936 --y; 01937 a_dim1 = *lda; 01938 a_offset = 1 + a_dim1 * 1; 01939 a -= a_offset; 01940 /* Function Body */ 01941 info = 0; 01942 if (*m < 0) { 01943 info = 1; 01944 } else if (*n < 0) { 01945 info = 2; 01946 } else if (*incx == 0) { 01947 info = 5; 01948 } else if (*incy == 0) { 01949 info = 7; 01950 } else if (*lda < f2cmax(1,*m)) { 01951 info = 9; 01952 } 01953 if (info != 0) { 01954 xerbla_("SGER ", &info); 01955 return 0; 01956 } 01957 /* Quick return if possible. */ 01958 if (*m == 0 || *n == 0 || *alpha == 0.f) { 01959 return 0; 01960 } 01961 /* Start the operations. In this version the elements of A are 01962 accessed sequentially with one pass through A. */ 01963 if (*incy > 0) { 01964 jy = 1; 01965 } else { 01966 jy = 1 - (*n - 1) * *incy; 01967 } 01968 if (*incx == 1) { 01969 i__1 = *n; 01970 for (j = 1; j <= i__1; ++j) { 01971 if (y[jy] != 0.f) { 01972 temp = *alpha * y[jy]; 01973 i__2 = *m; 01974 for (i__ = 1; i__ <= i__2; ++i__) { 01975 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp; 01976 /* L10: */ 01977 } 01978 } 01979 jy += *incy; 01980 /* L20: */ 01981 } 01982 } else { 01983 if (*incx > 0) { 01984 kx = 1; 01985 } else { 01986 kx = 1 - (*m - 1) * *incx; 01987 } 01988 i__1 = *n; 01989 for (j = 1; j <= i__1; ++j) { 01990 if (y[jy] != 0.f) { 01991 temp = *alpha * y[jy]; 01992 ix = kx; 01993 i__2 = *m; 01994 for (i__ = 1; i__ <= i__2; ++i__) { 01995 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp; 01996 ix += *incx; 01997 /* L30: */ 01998 } 01999 } 02000 jy += *incy; 02001 /* L40: */ 02002 } 02003 } 02004 return 0; 02005 /* End of SGER . */ 02006 } /* sger_ */
int sgesvd_ | ( | char * | jobu, | |
char * | jobvt, | |||
integer * | m, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | s, | |||
real * | u, | |||
integer * | ldu, | |||
real * | vt, | |||
integer * | ldvt, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 16378 of file lapackblas.cpp.
References a_ref, f2cmax, f2cmin, ierr, ilaenv_(), integer, lsame_(), s_cat(), sbdsqr_(), sgebrd_(), sgelqf_(), sgemm_(), sgeqrf_(), slacpy_(), slamch_(), slange_(), slascl_(), slaset_(), sorgbr_(), sorglq_(), sorgqr_(), sormbr_(), sqrt(), u_ref, vt_ref, and xerbla_().
Referenced by svd().
16381 { 16382 /* System generated locals */ 16383 typedef const char *address; 16384 16385 address a__1[2]; 16386 integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 16387 i__2, i__3, i__4; 16388 char ch__1[2]; 16389 16390 /* Builtin functions 16391 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen); 16392 //double sqrt(doublereal); 16393 16394 /* Local variables */ 16395 static integer iscl; 16396 static real anrm; 16397 static integer ierr, itau, ncvt, nrvt, i__; 16398 extern logical lsame_(const char *, const char *); 16399 static integer chunk; 16400 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 16401 integer *, real *, real *, integer *, real *, integer *, real *, 16402 real *, integer *); 16403 static integer minmn, wrkbl, itaup, itauq, mnthr, iwork; 16404 static logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; 16405 static integer ie, ir, bdspac, iu; 16406 extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 16407 *, real *, real *, real *, real *, real *, integer *, integer *); 16408 extern doublereal slamch_(const char *), slange_(const char *, integer *, 16409 integer *, real *, integer *, real *); 16410 extern /* Subroutine */ int xerbla_(const char *, integer *); 16411 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 16412 integer *, integer *, ftnlen, ftnlen); 16413 static real bignum; 16414 extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 16415 *, real *, real *, integer *, integer *), slascl_(const char *, integer 16416 *, integer *, real *, real *, integer *, integer *, real *, 16417 integer *, integer *), sgeqrf_(integer *, integer *, real 16418 *, integer *, real *, real *, integer *, integer *), slacpy_(const char 16419 *, integer *, integer *, real *, integer *, real *, integer *), slaset_(const char *, integer *, integer *, real *, real *, 16420 real *, integer *), sbdsqr_(const char *, integer *, integer *, 16421 integer *, integer *, real *, real *, real *, integer *, real *, 16422 integer *, real *, integer *, real *, integer *), sorgbr_( 16423 const char *, integer *, integer *, integer *, real *, integer *, real * 16424 , real *, integer *, integer *), sormbr_(const char *, const char *, 16425 const char *, integer *, integer *, integer *, real *, integer *, real * 16426 , real *, integer *, real *, integer *, integer *); 16427 static integer ldwrkr, minwrk, ldwrku, maxwrk; 16428 extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real 16429 *, integer *, real *, real *, integer *, integer *); 16430 static real smlnum; 16431 extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 16432 *, integer *, real *, real *, integer *, integer *); 16433 static logical lquery, wntuas, wntvas; 16434 static integer blk, ncu; 16435 static real dum[1], eps; 16436 static integer nru; 16437 16438 16439 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 16440 #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] 16441 #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] 16442 16443 16444 /* -- LAPACK driver routine (version 3.0) -- 16445 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 16446 Courant Institute, Argonne National Lab, and Rice University 16447 October 31, 1999 16448 16449 16450 Purpose 16451 ======= 16452 16453 SGESVD computes the singular value decomposition (SVD) of a real 16454 M-by-N matrix A, optionally computing the left and/or right singular 16455 vectors. The SVD is written 16456 16457 A = U * SIGMA * transpose(V) 16458 16459 where SIGMA is an M-by-N matrix which is zero except for its 16460 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and 16461 V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA 16462 are the singular values of A; they are real and non-negative, and 16463 are returned in descending order. The first min(m,n) columns of 16464 U and V are the left and right singular vectors of A. 16465 16466 Note that the routine returns V**T, not V. 16467 16468 Arguments 16469 ========= 16470 16471 JOBU (input) CHARACTER*1 16472 Specifies options for computing all or part of the matrix U: 16473 = 'A': all M columns of U are returned in array U: 16474 = 'S': the first min(m,n) columns of U (the left singular 16475 vectors) are returned in the array U; 16476 = 'O': the first min(m,n) columns of U (the left singular 16477 vectors) are overwritten on the array A; 16478 = 'N': no columns of U (no left singular vectors) are 16479 computed. 16480 16481 JOBVT (input) CHARACTER*1 16482 Specifies options for computing all or part of the matrix 16483 V**T: 16484 = 'A': all N rows of V**T are returned in the array VT; 16485 = 'S': the first min(m,n) rows of V**T (the right singular 16486 vectors) are returned in the array VT; 16487 = 'O': the first min(m,n) rows of V**T (the right singular 16488 vectors) are overwritten on the array A; 16489 = 'N': no rows of V**T (no right singular vectors) are 16490 computed. 16491 16492 JOBVT and JOBU cannot both be 'O'. 16493 16494 M (input) INTEGER 16495 The number of rows of the input matrix A. M >= 0. 16496 16497 N (input) INTEGER 16498 The number of columns of the input matrix A. N >= 0. 16499 16500 A (input/output) REAL array, dimension (LDA,N) 16501 On entry, the M-by-N matrix A. 16502 On exit, 16503 if JOBU = 'O', A is overwritten with the first min(m,n) 16504 columns of U (the left singular vectors, 16505 stored columnwise); 16506 if JOBVT = 'O', A is overwritten with the first min(m,n) 16507 rows of V**T (the right singular vectors, 16508 stored rowwise); 16509 if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A 16510 are destroyed. 16511 16512 LDA (input) INTEGER 16513 The leading dimension of the array A. LDA >= max(1,M). 16514 16515 S (output) REAL array, dimension (min(M,N)) 16516 The singular values of A, sorted so that S(i) >= S(i+1). 16517 16518 U (output) REAL array, dimension (LDU,UCOL) 16519 (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. 16520 If JOBU = 'A', U contains the M-by-M orthogonal matrix U; 16521 if JOBU = 'S', U contains the first min(m,n) columns of U 16522 (the left singular vectors, stored columnwise); 16523 if JOBU = 'N' or 'O', U is not referenced. 16524 16525 LDU (input) INTEGER 16526 The leading dimension of the array U. LDU >= 1; if 16527 JOBU = 'S' or 'A', LDU >= M. 16528 16529 VT (output) REAL array, dimension (LDVT,N) 16530 If JOBVT = 'A', VT contains the N-by-N orthogonal matrix 16531 V**T; 16532 if JOBVT = 'S', VT contains the first min(m,n) rows of 16533 V**T (the right singular vectors, stored rowwise); 16534 if JOBVT = 'N' or 'O', VT is not referenced. 16535 16536 LDVT (input) INTEGER 16537 The leading dimension of the array VT. LDVT >= 1; if 16538 JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). 16539 16540 WORK (workspace/output) REAL array, dimension (LWORK) 16541 On exit, if INFO = 0, WORK(1) returns the optimal LWORK; 16542 if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged 16543 superdiagonal elements of an upper bidiagonal matrix B 16544 whose diagonal is in S (not necessarily sorted). B 16545 satisfies A = U * B * VT, so it has the same singular values 16546 as A, and singular vectors related by U and VT. 16547 16548 LWORK (input) INTEGER 16549 The dimension of the array WORK. LWORK >= 1. 16550 LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). 16551 For good performance, LWORK should generally be larger. 16552 16553 If LWORK = -1, then a workspace query is assumed; the routine 16554 only calculates the optimal size of the WORK array, returns 16555 this value as the first entry of the WORK array, and no error 16556 message related to LWORK is issued by XERBLA. 16557 16558 INFO (output) INTEGER 16559 = 0: successful exit. 16560 < 0: if INFO = -i, the i-th argument had an illegal value. 16561 > 0: if SBDSQR did not converge, INFO specifies how many 16562 superdiagonals of an intermediate bidiagonal form B 16563 did not converge to zero. See the description of WORK 16564 above for details. 16565 16566 ===================================================================== 16567 16568 16569 Test the input arguments 16570 16571 Parameter adjustments */ 16572 a_dim1 = *lda; 16573 a_offset = 1 + a_dim1 * 1; 16574 a -= a_offset; 16575 --s; 16576 u_dim1 = *ldu; 16577 u_offset = 1 + u_dim1 * 1; 16578 u -= u_offset; 16579 vt_dim1 = *ldvt; 16580 vt_offset = 1 + vt_dim1 * 1; 16581 vt -= vt_offset; 16582 --work; 16583 16584 /* Function Body */ 16585 *info = 0; 16586 minmn = f2cmin(*m,*n); 16587 /* Writing concatenation */ 16588 i__1[0] = 1, a__1[0] = jobu; 16589 i__1[1] = 1, a__1[1] = jobvt; 16590 s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); 16591 mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (ftnlen)6, ( 16592 ftnlen)2); 16593 wntua = lsame_(jobu, "A"); 16594 wntus = lsame_(jobu, "S"); 16595 wntuas = wntua || wntus; 16596 wntuo = lsame_(jobu, "O"); 16597 wntun = lsame_(jobu, "N"); 16598 wntva = lsame_(jobvt, "A"); 16599 wntvs = lsame_(jobvt, "S"); 16600 wntvas = wntva || wntvs; 16601 wntvo = lsame_(jobvt, "O"); 16602 wntvn = lsame_(jobvt, "N"); 16603 minwrk = 1; 16604 lquery = *lwork == -1; 16605 16606 if (! (wntua || wntus || wntuo || wntun)) { 16607 *info = -1; 16608 } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { 16609 *info = -2; 16610 } else if (*m < 0) { 16611 *info = -3; 16612 } else if (*n < 0) { 16613 *info = -4; 16614 } else if (*lda < f2cmax(1,*m)) { 16615 *info = -6; 16616 } else if (*ldu < 1 || wntuas && *ldu < *m) { 16617 *info = -9; 16618 } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { 16619 *info = -11; 16620 } 16621 16622 /* Compute workspace 16623 (Note: Comments in the code beginning "Workspace:" describe the 16624 minimal amount of workspace needed at that point in the code, 16625 as well as the preferred amount for good performance. 16626 NB refers to the optimal block size for the immediately 16627 following subroutine, as returned by ILAENV.) */ 16628 16629 if (*info == 0 && (*lwork >= 1 || lquery) && *m > 0 && *n > 0) { 16630 if (*m >= *n) { 16631 16632 /* Compute space needed for SBDSQR */ 16633 16634 bdspac = *n * 5; 16635 if (*m >= mnthr) { 16636 if (wntun) { 16637 16638 /* Path 1 (M much larger than N, JOBU='N') */ 16639 16640 maxwrk = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16641 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16642 /* Computing MAX */ 16643 i__2 = maxwrk, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16644 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16645 ftnlen)1); 16646 maxwrk = f2cmax(i__2,i__3); 16647 if (wntvo || wntvas) { 16648 /* Computing MAX */ 16649 i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(& 16650 c__1, "SORGBR", "P", n, n, n, &c_n1, (ftnlen) 16651 6, (ftnlen)1); 16652 maxwrk = f2cmax(i__2,i__3); 16653 } 16654 maxwrk = f2cmax(maxwrk,bdspac); 16655 /* Computing MAX */ 16656 i__2 = *n << 2; 16657 minwrk = f2cmax(i__2,bdspac); 16658 maxwrk = f2cmax(maxwrk,minwrk); 16659 } else if (wntuo && wntvn) { 16660 16661 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ 16662 16663 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16664 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16665 /* Computing MAX */ 16666 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 16667 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16668 wrkbl = f2cmax(i__2,i__3); 16669 /* Computing MAX */ 16670 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16671 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16672 ftnlen)1); 16673 wrkbl = f2cmax(i__2,i__3); 16674 /* Computing MAX */ 16675 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16676 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16677 wrkbl = f2cmax(i__2,i__3); 16678 wrkbl = f2cmax(wrkbl,bdspac); 16679 /* Computing MAX */ 16680 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; 16681 maxwrk = f2cmax(i__2,i__3); 16682 /* Computing MAX */ 16683 i__2 = *n * 3 + *m; 16684 minwrk = f2cmax(i__2,bdspac); 16685 maxwrk = f2cmax(maxwrk,minwrk); 16686 } else if (wntuo && wntvas) { 16687 16688 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 16689 'A') */ 16690 16691 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16692 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16693 /* Computing MAX */ 16694 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 16695 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16696 wrkbl = f2cmax(i__2,i__3); 16697 /* Computing MAX */ 16698 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16699 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16700 ftnlen)1); 16701 wrkbl = f2cmax(i__2,i__3); 16702 /* Computing MAX */ 16703 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16704 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16705 wrkbl = f2cmax(i__2,i__3); 16706 /* Computing MAX */ 16707 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16708 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16709 1); 16710 wrkbl = f2cmax(i__2,i__3); 16711 wrkbl = f2cmax(wrkbl,bdspac); 16712 /* Computing MAX */ 16713 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; 16714 maxwrk = f2cmax(i__2,i__3); 16715 /* Computing MAX */ 16716 i__2 = *n * 3 + *m; 16717 minwrk = f2cmax(i__2,bdspac); 16718 maxwrk = f2cmax(maxwrk,minwrk); 16719 } else if (wntus && wntvn) { 16720 16721 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ 16722 16723 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16724 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16725 /* Computing MAX */ 16726 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 16727 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16728 wrkbl = f2cmax(i__2,i__3); 16729 /* Computing MAX */ 16730 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16731 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16732 ftnlen)1); 16733 wrkbl = f2cmax(i__2,i__3); 16734 /* Computing MAX */ 16735 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16736 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16737 wrkbl = f2cmax(i__2,i__3); 16738 wrkbl = f2cmax(wrkbl,bdspac); 16739 maxwrk = *n * *n + wrkbl; 16740 /* Computing MAX */ 16741 i__2 = *n * 3 + *m; 16742 minwrk = f2cmax(i__2,bdspac); 16743 maxwrk = f2cmax(maxwrk,minwrk); 16744 } else if (wntus && wntvo) { 16745 16746 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ 16747 16748 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16749 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16750 /* Computing MAX */ 16751 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 16752 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16753 wrkbl = f2cmax(i__2,i__3); 16754 /* Computing MAX */ 16755 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16756 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16757 ftnlen)1); 16758 wrkbl = f2cmax(i__2,i__3); 16759 /* Computing MAX */ 16760 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16761 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16762 wrkbl = f2cmax(i__2,i__3); 16763 /* Computing MAX */ 16764 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16765 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16766 1); 16767 wrkbl = f2cmax(i__2,i__3); 16768 wrkbl = f2cmax(wrkbl,bdspac); 16769 maxwrk = (*n << 1) * *n + wrkbl; 16770 /* Computing MAX */ 16771 i__2 = *n * 3 + *m; 16772 minwrk = f2cmax(i__2,bdspac); 16773 maxwrk = f2cmax(maxwrk,minwrk); 16774 } else if (wntus && wntvas) { 16775 16776 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or 16777 'A') */ 16778 16779 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16780 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16781 /* Computing MAX */ 16782 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR", 16783 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16784 wrkbl = f2cmax(i__2,i__3); 16785 /* Computing MAX */ 16786 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16787 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16788 ftnlen)1); 16789 wrkbl = f2cmax(i__2,i__3); 16790 /* Computing MAX */ 16791 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16792 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16793 wrkbl = f2cmax(i__2,i__3); 16794 /* Computing MAX */ 16795 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16796 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16797 1); 16798 wrkbl = f2cmax(i__2,i__3); 16799 wrkbl = f2cmax(wrkbl,bdspac); 16800 maxwrk = *n * *n + wrkbl; 16801 /* Computing MAX */ 16802 i__2 = *n * 3 + *m; 16803 minwrk = f2cmax(i__2,bdspac); 16804 maxwrk = f2cmax(maxwrk,minwrk); 16805 } else if (wntua && wntvn) { 16806 16807 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ 16808 16809 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16810 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16811 /* Computing MAX */ 16812 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 16813 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); 16814 wrkbl = f2cmax(i__2,i__3); 16815 /* Computing MAX */ 16816 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16817 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16818 ftnlen)1); 16819 wrkbl = f2cmax(i__2,i__3); 16820 /* Computing MAX */ 16821 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16822 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16823 wrkbl = f2cmax(i__2,i__3); 16824 wrkbl = f2cmax(wrkbl,bdspac); 16825 maxwrk = *n * *n + wrkbl; 16826 /* Computing MAX */ 16827 i__2 = *n * 3 + *m; 16828 minwrk = f2cmax(i__2,bdspac); 16829 maxwrk = f2cmax(maxwrk,minwrk); 16830 } else if (wntua && wntvo) { 16831 16832 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ 16833 16834 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16835 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16836 /* Computing MAX */ 16837 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 16838 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); 16839 wrkbl = f2cmax(i__2,i__3); 16840 /* Computing MAX */ 16841 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16842 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16843 ftnlen)1); 16844 wrkbl = f2cmax(i__2,i__3); 16845 /* Computing MAX */ 16846 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16847 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16848 wrkbl = f2cmax(i__2,i__3); 16849 /* Computing MAX */ 16850 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16851 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16852 1); 16853 wrkbl = f2cmax(i__2,i__3); 16854 wrkbl = f2cmax(wrkbl,bdspac); 16855 maxwrk = (*n << 1) * *n + wrkbl; 16856 /* Computing MAX */ 16857 i__2 = *n * 3 + *m; 16858 minwrk = f2cmax(i__2,bdspac); 16859 maxwrk = f2cmax(maxwrk,minwrk); 16860 } else if (wntua && wntvas) { 16861 16862 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or 16863 'A') */ 16864 16865 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & 16866 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16867 /* Computing MAX */ 16868 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR", 16869 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); 16870 wrkbl = f2cmax(i__2,i__3); 16871 /* Computing MAX */ 16872 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1, 16873 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( 16874 ftnlen)1); 16875 wrkbl = f2cmax(i__2,i__3); 16876 /* Computing MAX */ 16877 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR" 16878 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16879 wrkbl = f2cmax(i__2,i__3); 16880 /* Computing MAX */ 16881 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16882 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16883 1); 16884 wrkbl = f2cmax(i__2,i__3); 16885 wrkbl = f2cmax(wrkbl,bdspac); 16886 maxwrk = *n * *n + wrkbl; 16887 /* Computing MAX */ 16888 i__2 = *n * 3 + *m; 16889 minwrk = f2cmax(i__2,bdspac); 16890 maxwrk = f2cmax(maxwrk,minwrk); 16891 } 16892 } else { 16893 16894 /* Path 10 (M at least N, but not much larger) */ 16895 16896 maxwrk = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, 16897 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16898 if (wntus || wntuo) { 16899 /* Computing MAX */ 16900 i__2 = maxwrk, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORG" 16901 "BR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); 16902 maxwrk = f2cmax(i__2,i__3); 16903 } 16904 if (wntua) { 16905 /* Computing MAX */ 16906 i__2 = maxwrk, i__3 = *n * 3 + *m * ilaenv_(&c__1, "SORG" 16907 "BR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); 16908 maxwrk = f2cmax(i__2,i__3); 16909 } 16910 if (! wntvn) { 16911 /* Computing MAX */ 16912 i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 16913 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) 16914 1); 16915 maxwrk = f2cmax(i__2,i__3); 16916 } 16917 maxwrk = f2cmax(maxwrk,bdspac); 16918 /* Computing MAX */ 16919 i__2 = *n * 3 + *m; 16920 minwrk = f2cmax(i__2,bdspac); 16921 maxwrk = f2cmax(maxwrk,minwrk); 16922 } 16923 } else { 16924 16925 /* Compute space needed for SBDSQR */ 16926 16927 bdspac = *m * 5; 16928 if (*n >= mnthr) { 16929 if (wntvn) { 16930 16931 /* Path 1t(N much larger than M, JOBVT='N') */ 16932 16933 maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 16934 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16935 /* Computing MAX */ 16936 i__2 = maxwrk, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 16937 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 16938 ftnlen)1); 16939 maxwrk = f2cmax(i__2,i__3); 16940 if (wntuo || wntuas) { 16941 /* Computing MAX */ 16942 i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1, 16943 "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, ( 16944 ftnlen)1); 16945 maxwrk = f2cmax(i__2,i__3); 16946 } 16947 maxwrk = f2cmax(maxwrk,bdspac); 16948 /* Computing MAX */ 16949 i__2 = *m << 2; 16950 minwrk = f2cmax(i__2,bdspac); 16951 maxwrk = f2cmax(maxwrk,minwrk); 16952 } else if (wntvo && wntun) { 16953 16954 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ 16955 16956 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 16957 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16958 /* Computing MAX */ 16959 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 16960 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 16961 wrkbl = f2cmax(i__2,i__3); 16962 /* Computing MAX */ 16963 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 16964 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 16965 ftnlen)1); 16966 wrkbl = f2cmax(i__2,i__3); 16967 /* Computing MAX */ 16968 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 16969 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 16970 1); 16971 wrkbl = f2cmax(i__2,i__3); 16972 wrkbl = f2cmax(wrkbl,bdspac); 16973 /* Computing MAX */ 16974 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; 16975 maxwrk = f2cmax(i__2,i__3); 16976 /* Computing MAX */ 16977 i__2 = *m * 3 + *n; 16978 minwrk = f2cmax(i__2,bdspac); 16979 maxwrk = f2cmax(maxwrk,minwrk); 16980 } else if (wntvo && wntuas) { 16981 16982 /* Path 3t(N much larger than M, JOBU='S' or 'A', 16983 JOBVT='O') */ 16984 16985 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 16986 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 16987 /* Computing MAX */ 16988 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 16989 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 16990 wrkbl = f2cmax(i__2,i__3); 16991 /* Computing MAX */ 16992 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 16993 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 16994 ftnlen)1); 16995 wrkbl = f2cmax(i__2,i__3); 16996 /* Computing MAX */ 16997 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 16998 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 16999 1); 17000 wrkbl = f2cmax(i__2,i__3); 17001 /* Computing MAX */ 17002 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR" 17003 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1); 17004 wrkbl = f2cmax(i__2,i__3); 17005 wrkbl = f2cmax(wrkbl,bdspac); 17006 /* Computing MAX */ 17007 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; 17008 maxwrk = f2cmax(i__2,i__3); 17009 /* Computing MAX */ 17010 i__2 = *m * 3 + *n; 17011 minwrk = f2cmax(i__2,bdspac); 17012 maxwrk = f2cmax(maxwrk,minwrk); 17013 } else if (wntvs && wntun) { 17014 17015 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ 17016 17017 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17018 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17019 /* Computing MAX */ 17020 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 17021 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17022 wrkbl = f2cmax(i__2,i__3); 17023 /* Computing MAX */ 17024 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17025 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17026 ftnlen)1); 17027 wrkbl = f2cmax(i__2,i__3); 17028 /* Computing MAX */ 17029 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17030 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17031 1); 17032 wrkbl = f2cmax(i__2,i__3); 17033 wrkbl = f2cmax(wrkbl,bdspac); 17034 maxwrk = *m * *m + wrkbl; 17035 /* Computing MAX */ 17036 i__2 = *m * 3 + *n; 17037 minwrk = f2cmax(i__2,bdspac); 17038 maxwrk = f2cmax(maxwrk,minwrk); 17039 } else if (wntvs && wntuo) { 17040 17041 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ 17042 17043 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17044 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17045 /* Computing MAX */ 17046 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 17047 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17048 wrkbl = f2cmax(i__2,i__3); 17049 /* Computing MAX */ 17050 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17051 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17052 ftnlen)1); 17053 wrkbl = f2cmax(i__2,i__3); 17054 /* Computing MAX */ 17055 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17056 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17057 1); 17058 wrkbl = f2cmax(i__2,i__3); 17059 /* Computing MAX */ 17060 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR" 17061 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1); 17062 wrkbl = f2cmax(i__2,i__3); 17063 wrkbl = f2cmax(wrkbl,bdspac); 17064 maxwrk = (*m << 1) * *m + wrkbl; 17065 /* Computing MAX */ 17066 i__2 = *m * 3 + *n; 17067 minwrk = f2cmax(i__2,bdspac); 17068 maxwrk = f2cmax(maxwrk,minwrk); 17069 } else if (wntvs && wntuas) { 17070 17071 /* Path 6t(N much larger than M, JOBU='S' or 'A', 17072 JOBVT='S') */ 17073 17074 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17075 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17076 /* Computing MAX */ 17077 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ", 17078 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17079 wrkbl = f2cmax(i__2,i__3); 17080 /* Computing MAX */ 17081 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17082 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17083 ftnlen)1); 17084 wrkbl = f2cmax(i__2,i__3); 17085 /* Computing MAX */ 17086 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17087 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17088 1); 17089 wrkbl = f2cmax(i__2,i__3); 17090 /* Computing MAX */ 17091 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR" 17092 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1); 17093 wrkbl = f2cmax(i__2,i__3); 17094 wrkbl = f2cmax(wrkbl,bdspac); 17095 maxwrk = *m * *m + wrkbl; 17096 /* Computing MAX */ 17097 i__2 = *m * 3 + *n; 17098 minwrk = f2cmax(i__2,bdspac); 17099 maxwrk = f2cmax(maxwrk,minwrk); 17100 } else if (wntva && wntun) { 17101 17102 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ 17103 17104 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17105 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17106 /* Computing MAX */ 17107 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 17108 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17109 wrkbl = f2cmax(i__2,i__3); 17110 /* Computing MAX */ 17111 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17112 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17113 ftnlen)1); 17114 wrkbl = f2cmax(i__2,i__3); 17115 /* Computing MAX */ 17116 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17117 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17118 1); 17119 wrkbl = f2cmax(i__2,i__3); 17120 wrkbl = f2cmax(wrkbl,bdspac); 17121 maxwrk = *m * *m + wrkbl; 17122 /* Computing MAX */ 17123 i__2 = *m * 3 + *n; 17124 minwrk = f2cmax(i__2,bdspac); 17125 maxwrk = f2cmax(maxwrk,minwrk); 17126 } else if (wntva && wntuo) { 17127 17128 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ 17129 17130 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17131 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17132 /* Computing MAX */ 17133 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 17134 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17135 wrkbl = f2cmax(i__2,i__3); 17136 /* Computing MAX */ 17137 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17138 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17139 ftnlen)1); 17140 wrkbl = f2cmax(i__2,i__3); 17141 /* Computing MAX */ 17142 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17143 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17144 1); 17145 wrkbl = f2cmax(i__2,i__3); 17146 /* Computing MAX */ 17147 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR" 17148 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1); 17149 wrkbl = f2cmax(i__2,i__3); 17150 wrkbl = f2cmax(wrkbl,bdspac); 17151 maxwrk = (*m << 1) * *m + wrkbl; 17152 /* Computing MAX */ 17153 i__2 = *m * 3 + *n; 17154 minwrk = f2cmax(i__2,bdspac); 17155 maxwrk = f2cmax(maxwrk,minwrk); 17156 } else if (wntva && wntuas) { 17157 17158 /* Path 9t(N much larger than M, JOBU='S' or 'A', 17159 JOBVT='A') */ 17160 17161 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & 17162 c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17163 /* Computing MAX */ 17164 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ", 17165 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17166 wrkbl = f2cmax(i__2,i__3); 17167 /* Computing MAX */ 17168 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1, 17169 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( 17170 ftnlen)1); 17171 wrkbl = f2cmax(i__2,i__3); 17172 /* Computing MAX */ 17173 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17174 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17175 1); 17176 wrkbl = f2cmax(i__2,i__3); 17177 /* Computing MAX */ 17178 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR" 17179 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1); 17180 wrkbl = f2cmax(i__2,i__3); 17181 wrkbl = f2cmax(wrkbl,bdspac); 17182 maxwrk = *m * *m + wrkbl; 17183 /* Computing MAX */ 17184 i__2 = *m * 3 + *n; 17185 minwrk = f2cmax(i__2,bdspac); 17186 maxwrk = f2cmax(maxwrk,minwrk); 17187 } 17188 } else { 17189 17190 /* Path 10t(N greater than M, but not much larger) */ 17191 17192 maxwrk = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, 17193 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); 17194 if (wntvs || wntvo) { 17195 /* Computing MAX */ 17196 i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORG" 17197 "BR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17198 maxwrk = f2cmax(i__2,i__3); 17199 } 17200 if (wntva) { 17201 /* Computing MAX */ 17202 i__2 = maxwrk, i__3 = *m * 3 + *n * ilaenv_(&c__1, "SORG" 17203 "BR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); 17204 maxwrk = f2cmax(i__2,i__3); 17205 } 17206 if (! wntun) { 17207 /* Computing MAX */ 17208 i__2 = maxwrk, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1, 17209 "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen) 17210 1); 17211 maxwrk = f2cmax(i__2,i__3); 17212 } 17213 maxwrk = f2cmax(maxwrk,bdspac); 17214 /* Computing MAX */ 17215 i__2 = *m * 3 + *n; 17216 minwrk = f2cmax(i__2,bdspac); 17217 maxwrk = f2cmax(maxwrk,minwrk); 17218 } 17219 } 17220 work[1] = (real) maxwrk; 17221 } 17222 17223 if (*lwork < minwrk && ! lquery) { 17224 *info = -13; 17225 } 17226 if (*info != 0) { 17227 i__2 = -(*info); 17228 xerbla_("SGESVD", &i__2); 17229 return 0; 17230 } else if (lquery) { 17231 return 0; 17232 } 17233 17234 /* Quick return if possible */ 17235 17236 if (*m == 0 || *n == 0) { 17237 if (*lwork >= 1) { 17238 work[1] = 1.f; 17239 } 17240 return 0; 17241 } 17242 17243 /* Get machine constants */ 17244 17245 eps = slamch_("P"); 17246 smlnum = sqrt(slamch_("S")) / eps; 17247 bignum = 1.f / smlnum; 17248 17249 /* Scale A if max element outside range [SMLNUM,BIGNUM] */ 17250 17251 anrm = slange_("M", m, n, &a[a_offset], lda, dum); 17252 iscl = 0; 17253 if (anrm > 0.f && anrm < smlnum) { 17254 iscl = 1; 17255 slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & 17256 ierr); 17257 } else if (anrm > bignum) { 17258 iscl = 1; 17259 slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & 17260 ierr); 17261 } 17262 17263 if (*m >= *n) { 17264 17265 /* A has at least as many rows as columns. If A has sufficiently 17266 more rows than columns, first reduce using the QR 17267 decomposition (if sufficient workspace available) */ 17268 17269 if (*m >= mnthr) { 17270 17271 if (wntun) { 17272 17273 /* Path 1 (M much larger than N, JOBU='N') 17274 No left singular vectors to be computed */ 17275 17276 itau = 1; 17277 iwork = itau + *n; 17278 17279 /* Compute A=Q*R 17280 (Workspace: need 2*N, prefer N+N*NB) */ 17281 17282 i__2 = *lwork - iwork + 1; 17283 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & 17284 i__2, &ierr); 17285 17286 /* Zero out below R */ 17287 17288 i__2 = *n - 1; 17289 i__3 = *n - 1; 17290 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 1), 17291 lda); 17292 ie = 1; 17293 itauq = ie + *n; 17294 itaup = itauq + *n; 17295 iwork = itaup + *n; 17296 17297 /* Bidiagonalize R in A 17298 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 17299 17300 i__2 = *lwork - iwork + 1; 17301 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ 17302 itauq], &work[itaup], &work[iwork], &i__2, &ierr); 17303 ncvt = 0; 17304 if (wntvo || wntvas) { 17305 17306 /* If right singular vectors desired, generate P'. 17307 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 17308 17309 i__2 = *lwork - iwork + 1; 17310 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & 17311 work[iwork], &i__2, &ierr); 17312 ncvt = *n; 17313 } 17314 iwork = ie + *n; 17315 17316 /* Perform bidiagonal QR iteration, computing right 17317 singular vectors of A in A if desired 17318 (Workspace: need BDSPAC) */ 17319 17320 sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ 17321 a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], 17322 info); 17323 17324 /* If right singular vectors desired in VT, copy them there */ 17325 17326 if (wntvas) { 17327 slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 17328 ldvt); 17329 } 17330 17331 } else if (wntuo && wntvn) { 17332 17333 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') 17334 N left singular vectors to be overwritten on A and 17335 no right singular vectors to be computed 17336 17337 Computing MAX */ 17338 i__2 = *n << 2; 17339 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { 17340 17341 /* Sufficient workspace for a fast algorithm */ 17342 17343 ir = 1; 17344 /* Computing MAX */ 17345 i__2 = wrkbl, i__3 = *lda * *n + *n; 17346 if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) { 17347 17348 /* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ 17349 17350 ldwrku = *lda; 17351 ldwrkr = *lda; 17352 } else /* if(complicated condition) */ { 17353 /* Computing MAX */ 17354 i__2 = wrkbl, i__3 = *lda * *n + *n; 17355 if (*lwork >= f2cmax(i__2,i__3) + *n * *n) { 17356 17357 /* WORK(IU) is LDA by N, WORK(IR) is N by N */ 17358 17359 ldwrku = *lda; 17360 ldwrkr = *n; 17361 } else { 17362 17363 /* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ 17364 17365 ldwrku = (*lwork - *n * *n - *n) / *n; 17366 ldwrkr = *n; 17367 } 17368 } 17369 itau = ir + ldwrkr * *n; 17370 iwork = itau + *n; 17371 17372 /* Compute A=Q*R 17373 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17374 17375 i__2 = *lwork - iwork + 1; 17376 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 17377 , &i__2, &ierr); 17378 17379 /* Copy R to WORK(IR) and zero out below it */ 17380 17381 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); 17382 i__2 = *n - 1; 17383 i__3 = *n - 1; 17384 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir + 1] 17385 , &ldwrkr); 17386 17387 /* Generate Q in A 17388 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17389 17390 i__2 = *lwork - iwork + 1; 17391 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ 17392 iwork], &i__2, &ierr); 17393 ie = itau; 17394 itauq = ie + *n; 17395 itaup = itauq + *n; 17396 iwork = itaup + *n; 17397 17398 /* Bidiagonalize R in WORK(IR) 17399 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 17400 17401 i__2 = *lwork - iwork + 1; 17402 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ 17403 itauq], &work[itaup], &work[iwork], &i__2, &ierr); 17404 17405 /* Generate left vectors bidiagonalizing R 17406 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 17407 17408 i__2 = *lwork - iwork + 1; 17409 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & 17410 work[iwork], &i__2, &ierr); 17411 iwork = ie + *n; 17412 17413 /* Perform bidiagonal QR iteration, computing left 17414 singular vectors of R in WORK(IR) 17415 (Workspace: need N*N+BDSPAC) */ 17416 17417 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & 17418 c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] 17419 , info); 17420 iu = ie + *n; 17421 17422 /* Multiply Q in A by left singular vectors of R in 17423 WORK(IR), storing result in WORK(IU) and copying to A 17424 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ 17425 17426 i__2 = *m; 17427 i__3 = ldwrku; 17428 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 17429 i__3) { 17430 /* Computing MIN */ 17431 i__4 = *m - i__ + 1; 17432 chunk = f2cmin(i__4,ldwrku); 17433 sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1) 17434 , lda, &work[ir], &ldwrkr, &c_b416, &work[iu], 17435 &ldwrku); 17436 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref( 17437 i__, 1), lda); 17438 /* L10: */ 17439 } 17440 17441 } else { 17442 17443 /* Insufficient workspace for a fast algorithm */ 17444 17445 ie = 1; 17446 itauq = ie + *n; 17447 itaup = itauq + *n; 17448 iwork = itaup + *n; 17449 17450 /* Bidiagonalize A 17451 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ 17452 17453 i__3 = *lwork - iwork + 1; 17454 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ 17455 itauq], &work[itaup], &work[iwork], &i__3, &ierr); 17456 17457 /* Generate left vectors bidiagonalizing A 17458 (Workspace: need 4*N, prefer 3*N+N*NB) */ 17459 17460 i__3 = *lwork - iwork + 1; 17461 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & 17462 work[iwork], &i__3, &ierr); 17463 iwork = ie + *n; 17464 17465 /* Perform bidiagonal QR iteration, computing left 17466 singular vectors of A in A 17467 (Workspace: need BDSPAC) */ 17468 17469 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & 17470 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 17471 info); 17472 17473 } 17474 17475 } else if (wntuo && wntvas) { 17476 17477 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') 17478 N left singular vectors to be overwritten on A and 17479 N right singular vectors to be computed in VT 17480 17481 Computing MAX */ 17482 i__3 = *n << 2; 17483 if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) { 17484 17485 /* Sufficient workspace for a fast algorithm */ 17486 17487 ir = 1; 17488 /* Computing MAX */ 17489 i__3 = wrkbl, i__2 = *lda * *n + *n; 17490 if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) { 17491 17492 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ 17493 17494 ldwrku = *lda; 17495 ldwrkr = *lda; 17496 } else /* if(complicated condition) */ { 17497 /* Computing MAX */ 17498 i__3 = wrkbl, i__2 = *lda * *n + *n; 17499 if (*lwork >= f2cmax(i__3,i__2) + *n * *n) { 17500 17501 /* WORK(IU) is LDA by N and WORK(IR) is N by N */ 17502 17503 ldwrku = *lda; 17504 ldwrkr = *n; 17505 } else { 17506 17507 /* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ 17508 17509 ldwrku = (*lwork - *n * *n - *n) / *n; 17510 ldwrkr = *n; 17511 } 17512 } 17513 itau = ir + ldwrkr * *n; 17514 iwork = itau + *n; 17515 17516 /* Compute A=Q*R 17517 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17518 17519 i__3 = *lwork - iwork + 1; 17520 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 17521 , &i__3, &ierr); 17522 17523 /* Copy R to VT, zeroing out below it */ 17524 17525 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 17526 ldvt); 17527 i__3 = *n - 1; 17528 i__2 = *n - 1; 17529 slaset_("L", &i__3, &i__2, &c_b416, &c_b416, &vt_ref(2, 1) 17530 , ldvt); 17531 17532 /* Generate Q in A 17533 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17534 17535 i__3 = *lwork - iwork + 1; 17536 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ 17537 iwork], &i__3, &ierr); 17538 ie = itau; 17539 itauq = ie + *n; 17540 itaup = itauq + *n; 17541 iwork = itaup + *n; 17542 17543 /* Bidiagonalize R in VT, copying result to WORK(IR) 17544 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 17545 17546 i__3 = *lwork - iwork + 1; 17547 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & 17548 work[itauq], &work[itaup], &work[iwork], &i__3, & 17549 ierr); 17550 slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & 17551 ldwrkr); 17552 17553 /* Generate left vectors bidiagonalizing R in WORK(IR) 17554 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 17555 17556 i__3 = *lwork - iwork + 1; 17557 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & 17558 work[iwork], &i__3, &ierr); 17559 17560 /* Generate right vectors bidiagonalizing R in VT 17561 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */ 17562 17563 i__3 = *lwork - iwork + 1; 17564 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 17565 &work[iwork], &i__3, &ierr); 17566 iwork = ie + *n; 17567 17568 /* Perform bidiagonal QR iteration, computing left 17569 singular vectors of R in WORK(IR) and computing right 17570 singular vectors of R in VT 17571 (Workspace: need N*N+BDSPAC) */ 17572 17573 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ 17574 vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, 17575 &work[iwork], info); 17576 iu = ie + *n; 17577 17578 /* Multiply Q in A by left singular vectors of R in 17579 WORK(IR), storing result in WORK(IU) and copying to A 17580 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ 17581 17582 i__3 = *m; 17583 i__2 = ldwrku; 17584 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += 17585 i__2) { 17586 /* Computing MIN */ 17587 i__4 = *m - i__ + 1; 17588 chunk = f2cmin(i__4,ldwrku); 17589 sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1) 17590 , lda, &work[ir], &ldwrkr, &c_b416, &work[iu], 17591 &ldwrku); 17592 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref( 17593 i__, 1), lda); 17594 /* L20: */ 17595 } 17596 17597 } else { 17598 17599 /* Insufficient workspace for a fast algorithm */ 17600 17601 itau = 1; 17602 iwork = itau + *n; 17603 17604 /* Compute A=Q*R 17605 (Workspace: need 2*N, prefer N+N*NB) */ 17606 17607 i__2 = *lwork - iwork + 1; 17608 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 17609 , &i__2, &ierr); 17610 17611 /* Copy R to VT, zeroing out below it */ 17612 17613 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 17614 ldvt); 17615 i__2 = *n - 1; 17616 i__3 = *n - 1; 17617 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(2, 1) 17618 , ldvt); 17619 17620 /* Generate Q in A 17621 (Workspace: need 2*N, prefer N+N*NB) */ 17622 17623 i__2 = *lwork - iwork + 1; 17624 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ 17625 iwork], &i__2, &ierr); 17626 ie = itau; 17627 itauq = ie + *n; 17628 itaup = itauq + *n; 17629 iwork = itaup + *n; 17630 17631 /* Bidiagonalize R in VT 17632 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 17633 17634 i__2 = *lwork - iwork + 1; 17635 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & 17636 work[itauq], &work[itaup], &work[iwork], &i__2, & 17637 ierr); 17638 17639 /* Multiply Q in A by left vectors bidiagonalizing R 17640 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 17641 17642 i__2 = *lwork - iwork + 1; 17643 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & 17644 work[itauq], &a[a_offset], lda, &work[iwork], & 17645 i__2, &ierr); 17646 17647 /* Generate right vectors bidiagonalizing R in VT 17648 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 17649 17650 i__2 = *lwork - iwork + 1; 17651 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 17652 &work[iwork], &i__2, &ierr); 17653 iwork = ie + *n; 17654 17655 /* Perform bidiagonal QR iteration, computing left 17656 singular vectors of A in A and computing right 17657 singular vectors of A in VT 17658 (Workspace: need BDSPAC) */ 17659 17660 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ 17661 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & 17662 work[iwork], info); 17663 17664 } 17665 17666 } else if (wntus) { 17667 17668 if (wntvn) { 17669 17670 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') 17671 N left singular vectors to be computed in U and 17672 no right singular vectors to be computed 17673 17674 Computing MAX */ 17675 i__2 = *n << 2; 17676 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { 17677 17678 /* Sufficient workspace for a fast algorithm */ 17679 17680 ir = 1; 17681 if (*lwork >= wrkbl + *lda * *n) { 17682 17683 /* WORK(IR) is LDA by N */ 17684 17685 ldwrkr = *lda; 17686 } else { 17687 17688 /* WORK(IR) is N by N */ 17689 17690 ldwrkr = *n; 17691 } 17692 itau = ir + ldwrkr * *n; 17693 iwork = itau + *n; 17694 17695 /* Compute A=Q*R 17696 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17697 17698 i__2 = *lwork - iwork + 1; 17699 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 17700 iwork], &i__2, &ierr); 17701 17702 /* Copy R to WORK(IR), zeroing out below it */ 17703 17704 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], & 17705 ldwrkr); 17706 i__2 = *n - 1; 17707 i__3 = *n - 1; 17708 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir 17709 + 1], &ldwrkr); 17710 17711 /* Generate Q in A 17712 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 17713 17714 i__2 = *lwork - iwork + 1; 17715 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & 17716 work[iwork], &i__2, &ierr); 17717 ie = itau; 17718 itauq = ie + *n; 17719 itaup = itauq + *n; 17720 iwork = itaup + *n; 17721 17722 /* Bidiagonalize R in WORK(IR) 17723 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 17724 17725 i__2 = *lwork - iwork + 1; 17726 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & 17727 work[itauq], &work[itaup], &work[iwork], & 17728 i__2, &ierr); 17729 17730 /* Generate left vectors bidiagonalizing R in WORK(IR) 17731 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 17732 17733 i__2 = *lwork - iwork + 1; 17734 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] 17735 , &work[iwork], &i__2, &ierr); 17736 iwork = ie + *n; 17737 17738 /* Perform bidiagonal QR iteration, computing left 17739 singular vectors of R in WORK(IR) 17740 (Workspace: need N*N+BDSPAC) */ 17741 17742 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 17743 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & 17744 work[iwork], info); 17745 17746 /* Multiply Q in A by left singular vectors of R in 17747 WORK(IR), storing result in U 17748 (Workspace: need N*N) */ 17749 17750 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 17751 &work[ir], &ldwrkr, &c_b416, &u[u_offset], 17752 ldu); 17753 17754 } else { 17755 17756 /* Insufficient workspace for a fast algorithm */ 17757 17758 itau = 1; 17759 iwork = itau + *n; 17760 17761 /* Compute A=Q*R, copying result to U 17762 (Workspace: need 2*N, prefer N+N*NB) */ 17763 17764 i__2 = *lwork - iwork + 1; 17765 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 17766 iwork], &i__2, &ierr); 17767 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 17768 ldu); 17769 17770 /* Generate Q in U 17771 (Workspace: need 2*N, prefer N+N*NB) */ 17772 17773 i__2 = *lwork - iwork + 1; 17774 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & 17775 work[iwork], &i__2, &ierr); 17776 ie = itau; 17777 itauq = ie + *n; 17778 itaup = itauq + *n; 17779 iwork = itaup + *n; 17780 17781 /* Zero out below R in A */ 17782 17783 i__2 = *n - 1; 17784 i__3 = *n - 1; 17785 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 17786 1), lda); 17787 17788 /* Bidiagonalize R in A 17789 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 17790 17791 i__2 = *lwork - iwork + 1; 17792 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & 17793 work[itauq], &work[itaup], &work[iwork], & 17794 i__2, &ierr); 17795 17796 /* Multiply Q in U by left vectors bidiagonalizing R 17797 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 17798 17799 i__2 = *lwork - iwork + 1; 17800 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & 17801 work[itauq], &u[u_offset], ldu, &work[iwork], 17802 &i__2, &ierr) 17803 ; 17804 iwork = ie + *n; 17805 17806 /* Perform bidiagonal QR iteration, computing left 17807 singular vectors of A in U 17808 (Workspace: need BDSPAC) */ 17809 17810 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 17811 dum, &c__1, &u[u_offset], ldu, dum, &c__1, & 17812 work[iwork], info); 17813 17814 } 17815 17816 } else if (wntvo) { 17817 17818 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') 17819 N left singular vectors to be computed in U and 17820 N right singular vectors to be overwritten on A 17821 17822 Computing MAX */ 17823 i__2 = *n << 2; 17824 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { 17825 17826 /* Sufficient workspace for a fast algorithm */ 17827 17828 iu = 1; 17829 if (*lwork >= wrkbl + (*lda << 1) * *n) { 17830 17831 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ 17832 17833 ldwrku = *lda; 17834 ir = iu + ldwrku * *n; 17835 ldwrkr = *lda; 17836 } else if (*lwork >= wrkbl + (*lda + *n) * *n) { 17837 17838 /* WORK(IU) is LDA by N and WORK(IR) is N by N */ 17839 17840 ldwrku = *lda; 17841 ir = iu + ldwrku * *n; 17842 ldwrkr = *n; 17843 } else { 17844 17845 /* WORK(IU) is N by N and WORK(IR) is N by N */ 17846 17847 ldwrku = *n; 17848 ir = iu + ldwrku * *n; 17849 ldwrkr = *n; 17850 } 17851 itau = ir + ldwrkr * *n; 17852 iwork = itau + *n; 17853 17854 /* Compute A=Q*R 17855 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ 17856 17857 i__2 = *lwork - iwork + 1; 17858 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 17859 iwork], &i__2, &ierr); 17860 17861 /* Copy R to WORK(IU), zeroing out below it */ 17862 17863 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & 17864 ldwrku); 17865 i__2 = *n - 1; 17866 i__3 = *n - 1; 17867 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 17868 + 1], &ldwrku); 17869 17870 /* Generate Q in A 17871 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ 17872 17873 i__2 = *lwork - iwork + 1; 17874 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & 17875 work[iwork], &i__2, &ierr); 17876 ie = itau; 17877 itauq = ie + *n; 17878 itaup = itauq + *n; 17879 iwork = itaup + *n; 17880 17881 /* Bidiagonalize R in WORK(IU), copying result to 17882 WORK(IR) 17883 (Workspace: need 2*N*N+4*N, 17884 prefer 2*N*N+3*N+2*N*NB) */ 17885 17886 i__2 = *lwork - iwork + 1; 17887 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & 17888 work[itauq], &work[itaup], &work[iwork], & 17889 i__2, &ierr); 17890 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & 17891 ldwrkr); 17892 17893 /* Generate left bidiagonalizing vectors in WORK(IU) 17894 (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ 17895 17896 i__2 = *lwork - iwork + 1; 17897 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] 17898 , &work[iwork], &i__2, &ierr); 17899 17900 /* Generate right bidiagonalizing vectors in WORK(IR) 17901 (Workspace: need 2*N*N+4*N-1, 17902 prefer 2*N*N+3*N+(N-1)*NB) */ 17903 17904 i__2 = *lwork - iwork + 1; 17905 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] 17906 , &work[iwork], &i__2, &ierr); 17907 iwork = ie + *n; 17908 17909 /* Perform bidiagonal QR iteration, computing left 17910 singular vectors of R in WORK(IU) and computing 17911 right singular vectors of R in WORK(IR) 17912 (Workspace: need 2*N*N+BDSPAC) */ 17913 17914 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ 17915 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 17916 &work[iwork], info); 17917 17918 /* Multiply Q in A by left singular vectors of R in 17919 WORK(IU), storing result in U 17920 (Workspace: need N*N) */ 17921 17922 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 17923 &work[iu], &ldwrku, &c_b416, &u[u_offset], 17924 ldu); 17925 17926 /* Copy right singular vectors of R to A 17927 (Workspace: need N*N) */ 17928 17929 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 17930 lda); 17931 17932 } else { 17933 17934 /* Insufficient workspace for a fast algorithm */ 17935 17936 itau = 1; 17937 iwork = itau + *n; 17938 17939 /* Compute A=Q*R, copying result to U 17940 (Workspace: need 2*N, prefer N+N*NB) */ 17941 17942 i__2 = *lwork - iwork + 1; 17943 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 17944 iwork], &i__2, &ierr); 17945 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 17946 ldu); 17947 17948 /* Generate Q in U 17949 (Workspace: need 2*N, prefer N+N*NB) */ 17950 17951 i__2 = *lwork - iwork + 1; 17952 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & 17953 work[iwork], &i__2, &ierr); 17954 ie = itau; 17955 itauq = ie + *n; 17956 itaup = itauq + *n; 17957 iwork = itaup + *n; 17958 17959 /* Zero out below R in A */ 17960 17961 i__2 = *n - 1; 17962 i__3 = *n - 1; 17963 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 17964 1), lda); 17965 17966 /* Bidiagonalize R in A 17967 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 17968 17969 i__2 = *lwork - iwork + 1; 17970 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & 17971 work[itauq], &work[itaup], &work[iwork], & 17972 i__2, &ierr); 17973 17974 /* Multiply Q in U by left vectors bidiagonalizing R 17975 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 17976 17977 i__2 = *lwork - iwork + 1; 17978 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & 17979 work[itauq], &u[u_offset], ldu, &work[iwork], 17980 &i__2, &ierr) 17981 ; 17982 17983 /* Generate right vectors bidiagonalizing R in A 17984 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 17985 17986 i__2 = *lwork - iwork + 1; 17987 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], 17988 &work[iwork], &i__2, &ierr); 17989 iwork = ie + *n; 17990 17991 /* Perform bidiagonal QR iteration, computing left 17992 singular vectors of A in U and computing right 17993 singular vectors of A in A 17994 (Workspace: need BDSPAC) */ 17995 17996 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ 17997 a_offset], lda, &u[u_offset], ldu, dum, &c__1, 17998 &work[iwork], info); 17999 18000 } 18001 18002 } else if (wntvas) { 18003 18004 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' 18005 or 'A') 18006 N left singular vectors to be computed in U and 18007 N right singular vectors to be computed in VT 18008 18009 Computing MAX */ 18010 i__2 = *n << 2; 18011 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { 18012 18013 /* Sufficient workspace for a fast algorithm */ 18014 18015 iu = 1; 18016 if (*lwork >= wrkbl + *lda * *n) { 18017 18018 /* WORK(IU) is LDA by N */ 18019 18020 ldwrku = *lda; 18021 } else { 18022 18023 /* WORK(IU) is N by N */ 18024 18025 ldwrku = *n; 18026 } 18027 itau = iu + ldwrku * *n; 18028 iwork = itau + *n; 18029 18030 /* Compute A=Q*R 18031 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 18032 18033 i__2 = *lwork - iwork + 1; 18034 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18035 iwork], &i__2, &ierr); 18036 18037 /* Copy R to WORK(IU), zeroing out below it */ 18038 18039 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & 18040 ldwrku); 18041 i__2 = *n - 1; 18042 i__3 = *n - 1; 18043 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 18044 + 1], &ldwrku); 18045 18046 /* Generate Q in A 18047 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 18048 18049 i__2 = *lwork - iwork + 1; 18050 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & 18051 work[iwork], &i__2, &ierr); 18052 ie = itau; 18053 itauq = ie + *n; 18054 itaup = itauq + *n; 18055 iwork = itaup + *n; 18056 18057 /* Bidiagonalize R in WORK(IU), copying result to VT 18058 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 18059 18060 i__2 = *lwork - iwork + 1; 18061 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & 18062 work[itauq], &work[itaup], &work[iwork], & 18063 i__2, &ierr); 18064 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], 18065 ldvt); 18066 18067 /* Generate left bidiagonalizing vectors in WORK(IU) 18068 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 18069 18070 i__2 = *lwork - iwork + 1; 18071 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] 18072 , &work[iwork], &i__2, &ierr); 18073 18074 /* Generate right bidiagonalizing vectors in VT 18075 (Workspace: need N*N+4*N-1, 18076 prefer N*N+3*N+(N-1)*NB) */ 18077 18078 i__2 = *lwork - iwork + 1; 18079 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ 18080 itaup], &work[iwork], &i__2, &ierr) 18081 ; 18082 iwork = ie + *n; 18083 18084 /* Perform bidiagonal QR iteration, computing left 18085 singular vectors of R in WORK(IU) and computing 18086 right singular vectors of R in VT 18087 (Workspace: need N*N+BDSPAC) */ 18088 18089 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ 18090 vt_offset], ldvt, &work[iu], &ldwrku, dum, & 18091 c__1, &work[iwork], info); 18092 18093 /* Multiply Q in A by left singular vectors of R in 18094 WORK(IU), storing result in U 18095 (Workspace: need N*N) */ 18096 18097 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda, 18098 &work[iu], &ldwrku, &c_b416, &u[u_offset], 18099 ldu); 18100 18101 } else { 18102 18103 /* Insufficient workspace for a fast algorithm */ 18104 18105 itau = 1; 18106 iwork = itau + *n; 18107 18108 /* Compute A=Q*R, copying result to U 18109 (Workspace: need 2*N, prefer N+N*NB) */ 18110 18111 i__2 = *lwork - iwork + 1; 18112 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18113 iwork], &i__2, &ierr); 18114 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18115 ldu); 18116 18117 /* Generate Q in U 18118 (Workspace: need 2*N, prefer N+N*NB) */ 18119 18120 i__2 = *lwork - iwork + 1; 18121 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & 18122 work[iwork], &i__2, &ierr); 18123 18124 /* Copy R to VT, zeroing out below it */ 18125 18126 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 18127 ldvt); 18128 i__2 = *n - 1; 18129 i__3 = *n - 1; 18130 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref( 18131 2, 1), ldvt); 18132 ie = itau; 18133 itauq = ie + *n; 18134 itaup = itauq + *n; 18135 iwork = itaup + *n; 18136 18137 /* Bidiagonalize R in VT 18138 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 18139 18140 i__2 = *lwork - iwork + 1; 18141 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 18142 &work[itauq], &work[itaup], &work[iwork], & 18143 i__2, &ierr); 18144 18145 /* Multiply Q in U by left bidiagonalizing vectors 18146 in VT 18147 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 18148 18149 i__2 = *lwork - iwork + 1; 18150 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 18151 &work[itauq], &u[u_offset], ldu, &work[iwork], 18152 &i__2, &ierr); 18153 18154 /* Generate right bidiagonalizing vectors in VT 18155 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 18156 18157 i__2 = *lwork - iwork + 1; 18158 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ 18159 itaup], &work[iwork], &i__2, &ierr) 18160 ; 18161 iwork = ie + *n; 18162 18163 /* Perform bidiagonal QR iteration, computing left 18164 singular vectors of A in U and computing right 18165 singular vectors of A in VT 18166 (Workspace: need BDSPAC) */ 18167 18168 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ 18169 vt_offset], ldvt, &u[u_offset], ldu, dum, & 18170 c__1, &work[iwork], info); 18171 18172 } 18173 18174 } 18175 18176 } else if (wntua) { 18177 18178 if (wntvn) { 18179 18180 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') 18181 M left singular vectors to be computed in U and 18182 no right singular vectors to be computed 18183 18184 Computing MAX */ 18185 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); 18186 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { 18187 18188 /* Sufficient workspace for a fast algorithm */ 18189 18190 ir = 1; 18191 if (*lwork >= wrkbl + *lda * *n) { 18192 18193 /* WORK(IR) is LDA by N */ 18194 18195 ldwrkr = *lda; 18196 } else { 18197 18198 /* WORK(IR) is N by N */ 18199 18200 ldwrkr = *n; 18201 } 18202 itau = ir + ldwrkr * *n; 18203 iwork = itau + *n; 18204 18205 /* Compute A=Q*R, copying result to U 18206 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 18207 18208 i__2 = *lwork - iwork + 1; 18209 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18210 iwork], &i__2, &ierr); 18211 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18212 ldu); 18213 18214 /* Copy R to WORK(IR), zeroing out below it */ 18215 18216 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], & 18217 ldwrkr); 18218 i__2 = *n - 1; 18219 i__3 = *n - 1; 18220 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir 18221 + 1], &ldwrkr); 18222 18223 /* Generate Q in U 18224 (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ 18225 18226 i__2 = *lwork - iwork + 1; 18227 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18228 work[iwork], &i__2, &ierr); 18229 ie = itau; 18230 itauq = ie + *n; 18231 itaup = itauq + *n; 18232 iwork = itaup + *n; 18233 18234 /* Bidiagonalize R in WORK(IR) 18235 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 18236 18237 i__2 = *lwork - iwork + 1; 18238 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & 18239 work[itauq], &work[itaup], &work[iwork], & 18240 i__2, &ierr); 18241 18242 /* Generate left bidiagonalizing vectors in WORK(IR) 18243 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 18244 18245 i__2 = *lwork - iwork + 1; 18246 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] 18247 , &work[iwork], &i__2, &ierr); 18248 iwork = ie + *n; 18249 18250 /* Perform bidiagonal QR iteration, computing left 18251 singular vectors of R in WORK(IR) 18252 (Workspace: need N*N+BDSPAC) */ 18253 18254 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 18255 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & 18256 work[iwork], info); 18257 18258 /* Multiply Q in U by left singular vectors of R in 18259 WORK(IR), storing result in A 18260 (Workspace: need N*N) */ 18261 18262 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 18263 &work[ir], &ldwrkr, &c_b416, &a[a_offset], 18264 lda); 18265 18266 /* Copy left singular vectors of A from A to U */ 18267 18268 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 18269 ldu); 18270 18271 } else { 18272 18273 /* Insufficient workspace for a fast algorithm */ 18274 18275 itau = 1; 18276 iwork = itau + *n; 18277 18278 /* Compute A=Q*R, copying result to U 18279 (Workspace: need 2*N, prefer N+N*NB) */ 18280 18281 i__2 = *lwork - iwork + 1; 18282 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18283 iwork], &i__2, &ierr); 18284 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18285 ldu); 18286 18287 /* Generate Q in U 18288 (Workspace: need N+M, prefer N+M*NB) */ 18289 18290 i__2 = *lwork - iwork + 1; 18291 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18292 work[iwork], &i__2, &ierr); 18293 ie = itau; 18294 itauq = ie + *n; 18295 itaup = itauq + *n; 18296 iwork = itaup + *n; 18297 18298 /* Zero out below R in A */ 18299 18300 i__2 = *n - 1; 18301 i__3 = *n - 1; 18302 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 18303 1), lda); 18304 18305 /* Bidiagonalize R in A 18306 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 18307 18308 i__2 = *lwork - iwork + 1; 18309 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & 18310 work[itauq], &work[itaup], &work[iwork], & 18311 i__2, &ierr); 18312 18313 /* Multiply Q in U by left bidiagonalizing vectors 18314 in A 18315 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 18316 18317 i__2 = *lwork - iwork + 1; 18318 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & 18319 work[itauq], &u[u_offset], ldu, &work[iwork], 18320 &i__2, &ierr) 18321 ; 18322 iwork = ie + *n; 18323 18324 /* Perform bidiagonal QR iteration, computing left 18325 singular vectors of A in U 18326 (Workspace: need BDSPAC) */ 18327 18328 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 18329 dum, &c__1, &u[u_offset], ldu, dum, &c__1, & 18330 work[iwork], info); 18331 18332 } 18333 18334 } else if (wntvo) { 18335 18336 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') 18337 M left singular vectors to be computed in U and 18338 N right singular vectors to be overwritten on A 18339 18340 Computing MAX */ 18341 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); 18342 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { 18343 18344 /* Sufficient workspace for a fast algorithm */ 18345 18346 iu = 1; 18347 if (*lwork >= wrkbl + (*lda << 1) * *n) { 18348 18349 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ 18350 18351 ldwrku = *lda; 18352 ir = iu + ldwrku * *n; 18353 ldwrkr = *lda; 18354 } else if (*lwork >= wrkbl + (*lda + *n) * *n) { 18355 18356 /* WORK(IU) is LDA by N and WORK(IR) is N by N */ 18357 18358 ldwrku = *lda; 18359 ir = iu + ldwrku * *n; 18360 ldwrkr = *n; 18361 } else { 18362 18363 /* WORK(IU) is N by N and WORK(IR) is N by N */ 18364 18365 ldwrku = *n; 18366 ir = iu + ldwrku * *n; 18367 ldwrkr = *n; 18368 } 18369 itau = ir + ldwrkr * *n; 18370 iwork = itau + *n; 18371 18372 /* Compute A=Q*R, copying result to U 18373 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ 18374 18375 i__2 = *lwork - iwork + 1; 18376 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18377 iwork], &i__2, &ierr); 18378 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18379 ldu); 18380 18381 /* Generate Q in U 18382 (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */ 18383 18384 i__2 = *lwork - iwork + 1; 18385 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18386 work[iwork], &i__2, &ierr); 18387 18388 /* Copy R to WORK(IU), zeroing out below it */ 18389 18390 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & 18391 ldwrku); 18392 i__2 = *n - 1; 18393 i__3 = *n - 1; 18394 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 18395 + 1], &ldwrku); 18396 ie = itau; 18397 itauq = ie + *n; 18398 itaup = itauq + *n; 18399 iwork = itaup + *n; 18400 18401 /* Bidiagonalize R in WORK(IU), copying result to 18402 WORK(IR) 18403 (Workspace: need 2*N*N+4*N, 18404 prefer 2*N*N+3*N+2*N*NB) */ 18405 18406 i__2 = *lwork - iwork + 1; 18407 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & 18408 work[itauq], &work[itaup], &work[iwork], & 18409 i__2, &ierr); 18410 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & 18411 ldwrkr); 18412 18413 /* Generate left bidiagonalizing vectors in WORK(IU) 18414 (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ 18415 18416 i__2 = *lwork - iwork + 1; 18417 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] 18418 , &work[iwork], &i__2, &ierr); 18419 18420 /* Generate right bidiagonalizing vectors in WORK(IR) 18421 (Workspace: need 2*N*N+4*N-1, 18422 prefer 2*N*N+3*N+(N-1)*NB) */ 18423 18424 i__2 = *lwork - iwork + 1; 18425 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] 18426 , &work[iwork], &i__2, &ierr); 18427 iwork = ie + *n; 18428 18429 /* Perform bidiagonal QR iteration, computing left 18430 singular vectors of R in WORK(IU) and computing 18431 right singular vectors of R in WORK(IR) 18432 (Workspace: need 2*N*N+BDSPAC) */ 18433 18434 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ 18435 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 18436 &work[iwork], info); 18437 18438 /* Multiply Q in U by left singular vectors of R in 18439 WORK(IU), storing result in A 18440 (Workspace: need N*N) */ 18441 18442 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 18443 &work[iu], &ldwrku, &c_b416, &a[a_offset], 18444 lda); 18445 18446 /* Copy left singular vectors of A from A to U */ 18447 18448 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 18449 ldu); 18450 18451 /* Copy right singular vectors of R from WORK(IR) to A */ 18452 18453 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 18454 lda); 18455 18456 } else { 18457 18458 /* Insufficient workspace for a fast algorithm */ 18459 18460 itau = 1; 18461 iwork = itau + *n; 18462 18463 /* Compute A=Q*R, copying result to U 18464 (Workspace: need 2*N, prefer N+N*NB) */ 18465 18466 i__2 = *lwork - iwork + 1; 18467 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18468 iwork], &i__2, &ierr); 18469 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18470 ldu); 18471 18472 /* Generate Q in U 18473 (Workspace: need N+M, prefer N+M*NB) */ 18474 18475 i__2 = *lwork - iwork + 1; 18476 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18477 work[iwork], &i__2, &ierr); 18478 ie = itau; 18479 itauq = ie + *n; 18480 itaup = itauq + *n; 18481 iwork = itaup + *n; 18482 18483 /* Zero out below R in A */ 18484 18485 i__2 = *n - 1; 18486 i__3 = *n - 1; 18487 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 18488 1), lda); 18489 18490 /* Bidiagonalize R in A 18491 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 18492 18493 i__2 = *lwork - iwork + 1; 18494 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & 18495 work[itauq], &work[itaup], &work[iwork], & 18496 i__2, &ierr); 18497 18498 /* Multiply Q in U by left bidiagonalizing vectors 18499 in A 18500 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 18501 18502 i__2 = *lwork - iwork + 1; 18503 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & 18504 work[itauq], &u[u_offset], ldu, &work[iwork], 18505 &i__2, &ierr) 18506 ; 18507 18508 /* Generate right bidiagonalizing vectors in A 18509 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 18510 18511 i__2 = *lwork - iwork + 1; 18512 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], 18513 &work[iwork], &i__2, &ierr); 18514 iwork = ie + *n; 18515 18516 /* Perform bidiagonal QR iteration, computing left 18517 singular vectors of A in U and computing right 18518 singular vectors of A in A 18519 (Workspace: need BDSPAC) */ 18520 18521 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ 18522 a_offset], lda, &u[u_offset], ldu, dum, &c__1, 18523 &work[iwork], info); 18524 18525 } 18526 18527 } else if (wntvas) { 18528 18529 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' 18530 or 'A') 18531 M left singular vectors to be computed in U and 18532 N right singular vectors to be computed in VT 18533 18534 Computing MAX */ 18535 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); 18536 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { 18537 18538 /* Sufficient workspace for a fast algorithm */ 18539 18540 iu = 1; 18541 if (*lwork >= wrkbl + *lda * *n) { 18542 18543 /* WORK(IU) is LDA by N */ 18544 18545 ldwrku = *lda; 18546 } else { 18547 18548 /* WORK(IU) is N by N */ 18549 18550 ldwrku = *n; 18551 } 18552 itau = iu + ldwrku * *n; 18553 iwork = itau + *n; 18554 18555 /* Compute A=Q*R, copying result to U 18556 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ 18557 18558 i__2 = *lwork - iwork + 1; 18559 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18560 iwork], &i__2, &ierr); 18561 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18562 ldu); 18563 18564 /* Generate Q in U 18565 (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ 18566 18567 i__2 = *lwork - iwork + 1; 18568 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18569 work[iwork], &i__2, &ierr); 18570 18571 /* Copy R to WORK(IU), zeroing out below it */ 18572 18573 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & 18574 ldwrku); 18575 i__2 = *n - 1; 18576 i__3 = *n - 1; 18577 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu 18578 + 1], &ldwrku); 18579 ie = itau; 18580 itauq = ie + *n; 18581 itaup = itauq + *n; 18582 iwork = itaup + *n; 18583 18584 /* Bidiagonalize R in WORK(IU), copying result to VT 18585 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ 18586 18587 i__2 = *lwork - iwork + 1; 18588 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & 18589 work[itauq], &work[itaup], &work[iwork], & 18590 i__2, &ierr); 18591 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], 18592 ldvt); 18593 18594 /* Generate left bidiagonalizing vectors in WORK(IU) 18595 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ 18596 18597 i__2 = *lwork - iwork + 1; 18598 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] 18599 , &work[iwork], &i__2, &ierr); 18600 18601 /* Generate right bidiagonalizing vectors in VT 18602 (Workspace: need N*N+4*N-1, 18603 prefer N*N+3*N+(N-1)*NB) */ 18604 18605 i__2 = *lwork - iwork + 1; 18606 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ 18607 itaup], &work[iwork], &i__2, &ierr) 18608 ; 18609 iwork = ie + *n; 18610 18611 /* Perform bidiagonal QR iteration, computing left 18612 singular vectors of R in WORK(IU) and computing 18613 right singular vectors of R in VT 18614 (Workspace: need N*N+BDSPAC) */ 18615 18616 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ 18617 vt_offset], ldvt, &work[iu], &ldwrku, dum, & 18618 c__1, &work[iwork], info); 18619 18620 /* Multiply Q in U by left singular vectors of R in 18621 WORK(IU), storing result in A 18622 (Workspace: need N*N) */ 18623 18624 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu, 18625 &work[iu], &ldwrku, &c_b416, &a[a_offset], 18626 lda); 18627 18628 /* Copy left singular vectors of A from A to U */ 18629 18630 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 18631 ldu); 18632 18633 } else { 18634 18635 /* Insufficient workspace for a fast algorithm */ 18636 18637 itau = 1; 18638 iwork = itau + *n; 18639 18640 /* Compute A=Q*R, copying result to U 18641 (Workspace: need 2*N, prefer N+N*NB) */ 18642 18643 i__2 = *lwork - iwork + 1; 18644 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ 18645 iwork], &i__2, &ierr); 18646 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 18647 ldu); 18648 18649 /* Generate Q in U 18650 (Workspace: need N+M, prefer N+M*NB) */ 18651 18652 i__2 = *lwork - iwork + 1; 18653 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & 18654 work[iwork], &i__2, &ierr); 18655 18656 /* Copy R from A to VT, zeroing out below it */ 18657 18658 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 18659 ldvt); 18660 i__2 = *n - 1; 18661 i__3 = *n - 1; 18662 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref( 18663 2, 1), ldvt); 18664 ie = itau; 18665 itauq = ie + *n; 18666 itaup = itauq + *n; 18667 iwork = itaup + *n; 18668 18669 /* Bidiagonalize R in VT 18670 (Workspace: need 4*N, prefer 3*N+2*N*NB) */ 18671 18672 i__2 = *lwork - iwork + 1; 18673 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 18674 &work[itauq], &work[itaup], &work[iwork], & 18675 i__2, &ierr); 18676 18677 /* Multiply Q in U by left bidiagonalizing vectors 18678 in VT 18679 (Workspace: need 3*N+M, prefer 3*N+M*NB) */ 18680 18681 i__2 = *lwork - iwork + 1; 18682 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 18683 &work[itauq], &u[u_offset], ldu, &work[iwork], 18684 &i__2, &ierr); 18685 18686 /* Generate right bidiagonalizing vectors in VT 18687 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 18688 18689 i__2 = *lwork - iwork + 1; 18690 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ 18691 itaup], &work[iwork], &i__2, &ierr) 18692 ; 18693 iwork = ie + *n; 18694 18695 /* Perform bidiagonal QR iteration, computing left 18696 singular vectors of A in U and computing right 18697 singular vectors of A in VT 18698 (Workspace: need BDSPAC) */ 18699 18700 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ 18701 vt_offset], ldvt, &u[u_offset], ldu, dum, & 18702 c__1, &work[iwork], info); 18703 18704 } 18705 18706 } 18707 18708 } 18709 18710 } else { 18711 18712 /* M .LT. MNTHR 18713 18714 Path 10 (M at least N, but not much larger) 18715 Reduce to bidiagonal form without QR decomposition */ 18716 18717 ie = 1; 18718 itauq = ie + *n; 18719 itaup = itauq + *n; 18720 iwork = itaup + *n; 18721 18722 /* Bidiagonalize A 18723 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ 18724 18725 i__2 = *lwork - iwork + 1; 18726 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & 18727 work[itaup], &work[iwork], &i__2, &ierr); 18728 if (wntuas) { 18729 18730 /* If left singular vectors desired in U, copy result to U 18731 and generate left bidiagonalizing vectors in U 18732 (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */ 18733 18734 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); 18735 if (wntus) { 18736 ncu = *n; 18737 } 18738 if (wntua) { 18739 ncu = *m; 18740 } 18741 i__2 = *lwork - iwork + 1; 18742 sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & 18743 work[iwork], &i__2, &ierr); 18744 } 18745 if (wntvas) { 18746 18747 /* If right singular vectors desired in VT, copy result to 18748 VT and generate right bidiagonalizing vectors in VT 18749 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 18750 18751 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); 18752 i__2 = *lwork - iwork + 1; 18753 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & 18754 work[iwork], &i__2, &ierr); 18755 } 18756 if (wntuo) { 18757 18758 /* If left singular vectors desired in A, generate left 18759 bidiagonalizing vectors in A 18760 (Workspace: need 4*N, prefer 3*N+N*NB) */ 18761 18762 i__2 = *lwork - iwork + 1; 18763 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ 18764 iwork], &i__2, &ierr); 18765 } 18766 if (wntvo) { 18767 18768 /* If right singular vectors desired in A, generate right 18769 bidiagonalizing vectors in A 18770 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ 18771 18772 i__2 = *lwork - iwork + 1; 18773 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ 18774 iwork], &i__2, &ierr); 18775 } 18776 iwork = ie + *n; 18777 if (wntuas || wntuo) { 18778 nru = *m; 18779 } 18780 if (wntun) { 18781 nru = 0; 18782 } 18783 if (wntvas || wntvo) { 18784 ncvt = *n; 18785 } 18786 if (wntvn) { 18787 ncvt = 0; 18788 } 18789 if (! wntuo && ! wntvo) { 18790 18791 /* Perform bidiagonal QR iteration, if desired, computing 18792 left singular vectors in U and computing right singular 18793 vectors in VT 18794 (Workspace: need BDSPAC) */ 18795 18796 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ 18797 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & 18798 work[iwork], info); 18799 } else if (! wntuo && wntvo) { 18800 18801 /* Perform bidiagonal QR iteration, if desired, computing 18802 left singular vectors in U and computing right singular 18803 vectors in A 18804 (Workspace: need BDSPAC) */ 18805 18806 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ 18807 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ 18808 iwork], info); 18809 } else { 18810 18811 /* Perform bidiagonal QR iteration, if desired, computing 18812 left singular vectors in A and computing right singular 18813 vectors in VT 18814 (Workspace: need BDSPAC) */ 18815 18816 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ 18817 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & 18818 work[iwork], info); 18819 } 18820 18821 } 18822 18823 } else { 18824 18825 /* A has more columns than rows. If A has sufficiently more 18826 columns than rows, first reduce using the LQ decomposition (if 18827 sufficient workspace available) */ 18828 18829 if (*n >= mnthr) { 18830 18831 if (wntvn) { 18832 18833 /* Path 1t(N much larger than M, JOBVT='N') 18834 No right singular vectors to be computed */ 18835 18836 itau = 1; 18837 iwork = itau + *m; 18838 18839 /* Compute A=L*Q 18840 (Workspace: need 2*M, prefer M+M*NB) */ 18841 18842 i__2 = *lwork - iwork + 1; 18843 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & 18844 i__2, &ierr); 18845 18846 /* Zero out above L */ 18847 18848 i__2 = *m - 1; 18849 i__3 = *m - 1; 18850 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 2), 18851 lda); 18852 ie = 1; 18853 itauq = ie + *m; 18854 itaup = itauq + *m; 18855 iwork = itaup + *m; 18856 18857 /* Bidiagonalize L in A 18858 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 18859 18860 i__2 = *lwork - iwork + 1; 18861 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ 18862 itauq], &work[itaup], &work[iwork], &i__2, &ierr); 18863 if (wntuo || wntuas) { 18864 18865 /* If left singular vectors desired, generate Q 18866 (Workspace: need 4*M, prefer 3*M+M*NB) */ 18867 18868 i__2 = *lwork - iwork + 1; 18869 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & 18870 work[iwork], &i__2, &ierr); 18871 } 18872 iwork = ie + *m; 18873 nru = 0; 18874 if (wntuo || wntuas) { 18875 nru = *m; 18876 } 18877 18878 /* Perform bidiagonal QR iteration, computing left singular 18879 vectors of A in A if desired 18880 (Workspace: need BDSPAC) */ 18881 18882 sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & 18883 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 18884 info); 18885 18886 /* If left singular vectors desired in U, copy them there */ 18887 18888 if (wntuas) { 18889 slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); 18890 } 18891 18892 } else if (wntvo && wntun) { 18893 18894 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') 18895 M right singular vectors to be overwritten on A and 18896 no left singular vectors to be computed 18897 18898 Computing MAX */ 18899 i__2 = *m << 2; 18900 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { 18901 18902 /* Sufficient workspace for a fast algorithm */ 18903 18904 ir = 1; 18905 /* Computing MAX */ 18906 i__2 = wrkbl, i__3 = *lda * *n + *m; 18907 if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) { 18908 18909 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ 18910 18911 ldwrku = *lda; 18912 chunk = *n; 18913 ldwrkr = *lda; 18914 } else /* if(complicated condition) */ { 18915 /* Computing MAX */ 18916 i__2 = wrkbl, i__3 = *lda * *n + *m; 18917 if (*lwork >= f2cmax(i__2,i__3) + *m * *m) { 18918 18919 /* WORK(IU) is LDA by N and WORK(IR) is M by M */ 18920 18921 ldwrku = *lda; 18922 chunk = *n; 18923 ldwrkr = *m; 18924 } else { 18925 18926 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ 18927 18928 ldwrku = *m; 18929 chunk = (*lwork - *m * *m - *m) / *m; 18930 ldwrkr = *m; 18931 } 18932 } 18933 itau = ir + ldwrkr * *m; 18934 iwork = itau + *m; 18935 18936 /* Compute A=L*Q 18937 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 18938 18939 i__2 = *lwork - iwork + 1; 18940 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 18941 , &i__2, &ierr); 18942 18943 /* Copy L to WORK(IR) and zero out above it */ 18944 18945 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); 18946 i__2 = *m - 1; 18947 i__3 = *m - 1; 18948 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir + 18949 ldwrkr], &ldwrkr); 18950 18951 /* Generate Q in A 18952 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 18953 18954 i__2 = *lwork - iwork + 1; 18955 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ 18956 iwork], &i__2, &ierr); 18957 ie = itau; 18958 itauq = ie + *m; 18959 itaup = itauq + *m; 18960 iwork = itaup + *m; 18961 18962 /* Bidiagonalize L in WORK(IR) 18963 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 18964 18965 i__2 = *lwork - iwork + 1; 18966 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ 18967 itauq], &work[itaup], &work[iwork], &i__2, &ierr); 18968 18969 /* Generate right vectors bidiagonalizing L 18970 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ 18971 18972 i__2 = *lwork - iwork + 1; 18973 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & 18974 work[iwork], &i__2, &ierr); 18975 iwork = ie + *m; 18976 18977 /* Perform bidiagonal QR iteration, computing right 18978 singular vectors of L in WORK(IR) 18979 (Workspace: need M*M+BDSPAC) */ 18980 18981 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ 18982 ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] 18983 , info); 18984 iu = ie + *m; 18985 18986 /* Multiply right singular vectors of L in WORK(IR) by Q 18987 in A, storing result in WORK(IU) and copying to A 18988 (Workspace: need M*M+2*M, prefer M*M+M*N+M) */ 18989 18990 i__2 = *n; 18991 i__3 = chunk; 18992 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 18993 i__3) { 18994 /* Computing MIN */ 18995 i__4 = *n - i__ + 1; 18996 blk = f2cmin(i__4,chunk); 18997 sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], & 18998 ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[ 18999 iu], &ldwrku); 19000 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1, 19001 i__), lda); 19002 /* L30: */ 19003 } 19004 19005 } else { 19006 19007 /* Insufficient workspace for a fast algorithm */ 19008 19009 ie = 1; 19010 itauq = ie + *m; 19011 itaup = itauq + *m; 19012 iwork = itaup + *m; 19013 19014 /* Bidiagonalize A 19015 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ 19016 19017 i__3 = *lwork - iwork + 1; 19018 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ 19019 itauq], &work[itaup], &work[iwork], &i__3, &ierr); 19020 19021 /* Generate right vectors bidiagonalizing A 19022 (Workspace: need 4*M, prefer 3*M+M*NB) */ 19023 19024 i__3 = *lwork - iwork + 1; 19025 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & 19026 work[iwork], &i__3, &ierr); 19027 iwork = ie + *m; 19028 19029 /* Perform bidiagonal QR iteration, computing right 19030 singular vectors of A in A 19031 (Workspace: need BDSPAC) */ 19032 19033 sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ 19034 a_offset], lda, dum, &c__1, dum, &c__1, &work[ 19035 iwork], info); 19036 19037 } 19038 19039 } else if (wntvo && wntuas) { 19040 19041 /* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') 19042 M right singular vectors to be overwritten on A and 19043 M left singular vectors to be computed in U 19044 19045 Computing MAX */ 19046 i__3 = *m << 2; 19047 if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) { 19048 19049 /* Sufficient workspace for a fast algorithm */ 19050 19051 ir = 1; 19052 /* Computing MAX */ 19053 i__3 = wrkbl, i__2 = *lda * *n + *m; 19054 if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) { 19055 19056 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ 19057 19058 ldwrku = *lda; 19059 chunk = *n; 19060 ldwrkr = *lda; 19061 } else /* if(complicated condition) */ { 19062 /* Computing MAX */ 19063 i__3 = wrkbl, i__2 = *lda * *n + *m; 19064 if (*lwork >= f2cmax(i__3,i__2) + *m * *m) { 19065 19066 /* WORK(IU) is LDA by N and WORK(IR) is M by M */ 19067 19068 ldwrku = *lda; 19069 chunk = *n; 19070 ldwrkr = *m; 19071 } else { 19072 19073 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ 19074 19075 ldwrku = *m; 19076 chunk = (*lwork - *m * *m - *m) / *m; 19077 ldwrkr = *m; 19078 } 19079 } 19080 itau = ir + ldwrkr * *m; 19081 iwork = itau + *m; 19082 19083 /* Compute A=L*Q 19084 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19085 19086 i__3 = *lwork - iwork + 1; 19087 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 19088 , &i__3, &ierr); 19089 19090 /* Copy L to U, zeroing about above it */ 19091 19092 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); 19093 i__3 = *m - 1; 19094 i__2 = *m - 1; 19095 slaset_("U", &i__3, &i__2, &c_b416, &c_b416, &u_ref(1, 2), 19096 ldu); 19097 19098 /* Generate Q in A 19099 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19100 19101 i__3 = *lwork - iwork + 1; 19102 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ 19103 iwork], &i__3, &ierr); 19104 ie = itau; 19105 itauq = ie + *m; 19106 itaup = itauq + *m; 19107 iwork = itaup + *m; 19108 19109 /* Bidiagonalize L in U, copying result to WORK(IR) 19110 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 19111 19112 i__3 = *lwork - iwork + 1; 19113 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ 19114 itauq], &work[itaup], &work[iwork], &i__3, &ierr); 19115 slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); 19116 19117 /* Generate right vectors bidiagonalizing L in WORK(IR) 19118 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ 19119 19120 i__3 = *lwork - iwork + 1; 19121 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & 19122 work[iwork], &i__3, &ierr); 19123 19124 /* Generate left vectors bidiagonalizing L in U 19125 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ 19126 19127 i__3 = *lwork - iwork + 1; 19128 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & 19129 work[iwork], &i__3, &ierr); 19130 iwork = ie + *m; 19131 19132 /* Perform bidiagonal QR iteration, computing left 19133 singular vectors of L in U, and computing right 19134 singular vectors of L in WORK(IR) 19135 (Workspace: need M*M+BDSPAC) */ 19136 19137 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], 19138 &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ 19139 iwork], info); 19140 iu = ie + *m; 19141 19142 /* Multiply right singular vectors of L in WORK(IR) by Q 19143 in A, storing result in WORK(IU) and copying to A 19144 (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */ 19145 19146 i__3 = *n; 19147 i__2 = chunk; 19148 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += 19149 i__2) { 19150 /* Computing MIN */ 19151 i__4 = *n - i__ + 1; 19152 blk = f2cmin(i__4,chunk); 19153 sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], & 19154 ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[ 19155 iu], &ldwrku); 19156 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1, 19157 i__), lda); 19158 /* L40: */ 19159 } 19160 19161 } else { 19162 19163 /* Insufficient workspace for a fast algorithm */ 19164 19165 itau = 1; 19166 iwork = itau + *m; 19167 19168 /* Compute A=L*Q 19169 (Workspace: need 2*M, prefer M+M*NB) */ 19170 19171 i__2 = *lwork - iwork + 1; 19172 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] 19173 , &i__2, &ierr); 19174 19175 /* Copy L to U, zeroing out above it */ 19176 19177 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); 19178 i__2 = *m - 1; 19179 i__3 = *m - 1; 19180 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1, 2), 19181 ldu); 19182 19183 /* Generate Q in A 19184 (Workspace: need 2*M, prefer M+M*NB) */ 19185 19186 i__2 = *lwork - iwork + 1; 19187 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ 19188 iwork], &i__2, &ierr); 19189 ie = itau; 19190 itauq = ie + *m; 19191 itaup = itauq + *m; 19192 iwork = itaup + *m; 19193 19194 /* Bidiagonalize L in U 19195 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 19196 19197 i__2 = *lwork - iwork + 1; 19198 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ 19199 itauq], &work[itaup], &work[iwork], &i__2, &ierr); 19200 19201 /* Multiply right vectors bidiagonalizing L by Q in A 19202 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 19203 19204 i__2 = *lwork - iwork + 1; 19205 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[ 19206 itaup], &a[a_offset], lda, &work[iwork], &i__2, & 19207 ierr); 19208 19209 /* Generate left vectors bidiagonalizing L in U 19210 (Workspace: need 4*M, prefer 3*M+M*NB) */ 19211 19212 i__2 = *lwork - iwork + 1; 19213 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & 19214 work[iwork], &i__2, &ierr); 19215 iwork = ie + *m; 19216 19217 /* Perform bidiagonal QR iteration, computing left 19218 singular vectors of A in U and computing right 19219 singular vectors of A in A 19220 (Workspace: need BDSPAC) */ 19221 19222 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[ 19223 a_offset], lda, &u[u_offset], ldu, dum, &c__1, & 19224 work[iwork], info); 19225 19226 } 19227 19228 } else if (wntvs) { 19229 19230 if (wntun) { 19231 19232 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') 19233 M right singular vectors to be computed in VT and 19234 no left singular vectors to be computed 19235 19236 Computing MAX */ 19237 i__2 = *m << 2; 19238 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { 19239 19240 /* Sufficient workspace for a fast algorithm */ 19241 19242 ir = 1; 19243 if (*lwork >= wrkbl + *lda * *m) { 19244 19245 /* WORK(IR) is LDA by M */ 19246 19247 ldwrkr = *lda; 19248 } else { 19249 19250 /* WORK(IR) is M by M */ 19251 19252 ldwrkr = *m; 19253 } 19254 itau = ir + ldwrkr * *m; 19255 iwork = itau + *m; 19256 19257 /* Compute A=L*Q 19258 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19259 19260 i__2 = *lwork - iwork + 1; 19261 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19262 iwork], &i__2, &ierr); 19263 19264 /* Copy L to WORK(IR), zeroing out above it */ 19265 19266 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], & 19267 ldwrkr); 19268 i__2 = *m - 1; 19269 i__3 = *m - 1; 19270 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir 19271 + ldwrkr], &ldwrkr); 19272 19273 /* Generate Q in A 19274 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19275 19276 i__2 = *lwork - iwork + 1; 19277 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & 19278 work[iwork], &i__2, &ierr); 19279 ie = itau; 19280 itauq = ie + *m; 19281 itaup = itauq + *m; 19282 iwork = itaup + *m; 19283 19284 /* Bidiagonalize L in WORK(IR) 19285 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 19286 19287 i__2 = *lwork - iwork + 1; 19288 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & 19289 work[itauq], &work[itaup], &work[iwork], & 19290 i__2, &ierr); 19291 19292 /* Generate right vectors bidiagonalizing L in 19293 WORK(IR) 19294 (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ 19295 19296 i__2 = *lwork - iwork + 1; 19297 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] 19298 , &work[iwork], &i__2, &ierr); 19299 iwork = ie + *m; 19300 19301 /* Perform bidiagonal QR iteration, computing right 19302 singular vectors of L in WORK(IR) 19303 (Workspace: need M*M+BDSPAC) */ 19304 19305 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & 19306 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & 19307 work[iwork], info); 19308 19309 /* Multiply right singular vectors of L in WORK(IR) by 19310 Q in A, storing result in VT 19311 (Workspace: need M*M) */ 19312 19313 sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr, 19314 &a[a_offset], lda, &c_b416, &vt[vt_offset], 19315 ldvt); 19316 19317 } else { 19318 19319 /* Insufficient workspace for a fast algorithm */ 19320 19321 itau = 1; 19322 iwork = itau + *m; 19323 19324 /* Compute A=L*Q 19325 (Workspace: need 2*M, prefer M+M*NB) */ 19326 19327 i__2 = *lwork - iwork + 1; 19328 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19329 iwork], &i__2, &ierr); 19330 19331 /* Copy result to VT */ 19332 19333 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19334 ldvt); 19335 19336 /* Generate Q in VT 19337 (Workspace: need 2*M, prefer M+M*NB) */ 19338 19339 i__2 = *lwork - iwork + 1; 19340 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & 19341 work[iwork], &i__2, &ierr); 19342 ie = itau; 19343 itauq = ie + *m; 19344 itaup = itauq + *m; 19345 iwork = itaup + *m; 19346 19347 /* Zero out above L in A */ 19348 19349 i__2 = *m - 1; 19350 i__3 = *m - 1; 19351 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 19352 2), lda); 19353 19354 /* Bidiagonalize L in A 19355 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 19356 19357 i__2 = *lwork - iwork + 1; 19358 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & 19359 work[itauq], &work[itaup], &work[iwork], & 19360 i__2, &ierr); 19361 19362 /* Multiply right vectors bidiagonalizing L by Q in VT 19363 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 19364 19365 i__2 = *lwork - iwork + 1; 19366 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & 19367 work[itaup], &vt[vt_offset], ldvt, &work[ 19368 iwork], &i__2, &ierr); 19369 iwork = ie + *m; 19370 19371 /* Perform bidiagonal QR iteration, computing right 19372 singular vectors of A in VT 19373 (Workspace: need BDSPAC) */ 19374 19375 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & 19376 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & 19377 work[iwork], info); 19378 19379 } 19380 19381 } else if (wntuo) { 19382 19383 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') 19384 M right singular vectors to be computed in VT and 19385 M left singular vectors to be overwritten on A 19386 19387 Computing MAX */ 19388 i__2 = *m << 2; 19389 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { 19390 19391 /* Sufficient workspace for a fast algorithm */ 19392 19393 iu = 1; 19394 if (*lwork >= wrkbl + (*lda << 1) * *m) { 19395 19396 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ 19397 19398 ldwrku = *lda; 19399 ir = iu + ldwrku * *m; 19400 ldwrkr = *lda; 19401 } else if (*lwork >= wrkbl + (*lda + *m) * *m) { 19402 19403 /* WORK(IU) is LDA by M and WORK(IR) is M by M */ 19404 19405 ldwrku = *lda; 19406 ir = iu + ldwrku * *m; 19407 ldwrkr = *m; 19408 } else { 19409 19410 /* WORK(IU) is M by M and WORK(IR) is M by M */ 19411 19412 ldwrku = *m; 19413 ir = iu + ldwrku * *m; 19414 ldwrkr = *m; 19415 } 19416 itau = ir + ldwrkr * *m; 19417 iwork = itau + *m; 19418 19419 /* Compute A=L*Q 19420 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ 19421 19422 i__2 = *lwork - iwork + 1; 19423 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19424 iwork], &i__2, &ierr); 19425 19426 /* Copy L to WORK(IU), zeroing out below it */ 19427 19428 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & 19429 ldwrku); 19430 i__2 = *m - 1; 19431 i__3 = *m - 1; 19432 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 19433 + ldwrku], &ldwrku); 19434 19435 /* Generate Q in A 19436 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ 19437 19438 i__2 = *lwork - iwork + 1; 19439 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & 19440 work[iwork], &i__2, &ierr); 19441 ie = itau; 19442 itauq = ie + *m; 19443 itaup = itauq + *m; 19444 iwork = itaup + *m; 19445 19446 /* Bidiagonalize L in WORK(IU), copying result to 19447 WORK(IR) 19448 (Workspace: need 2*M*M+4*M, 19449 prefer 2*M*M+3*M+2*M*NB) */ 19450 19451 i__2 = *lwork - iwork + 1; 19452 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & 19453 work[itauq], &work[itaup], &work[iwork], & 19454 i__2, &ierr); 19455 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & 19456 ldwrkr); 19457 19458 /* Generate right bidiagonalizing vectors in WORK(IU) 19459 (Workspace: need 2*M*M+4*M-1, 19460 prefer 2*M*M+3*M+(M-1)*NB) */ 19461 19462 i__2 = *lwork - iwork + 1; 19463 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] 19464 , &work[iwork], &i__2, &ierr); 19465 19466 /* Generate left bidiagonalizing vectors in WORK(IR) 19467 (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ 19468 19469 i__2 = *lwork - iwork + 1; 19470 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] 19471 , &work[iwork], &i__2, &ierr); 19472 iwork = ie + *m; 19473 19474 /* Perform bidiagonal QR iteration, computing left 19475 singular vectors of L in WORK(IR) and computing 19476 right singular vectors of L in WORK(IU) 19477 (Workspace: need 2*M*M+BDSPAC) */ 19478 19479 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ 19480 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 19481 &work[iwork], info); 19482 19483 /* Multiply right singular vectors of L in WORK(IU) by 19484 Q in A, storing result in VT 19485 (Workspace: need M*M) */ 19486 19487 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku, 19488 &a[a_offset], lda, &c_b416, &vt[vt_offset], 19489 ldvt); 19490 19491 /* Copy left singular vectors of L to A 19492 (Workspace: need M*M) */ 19493 19494 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 19495 lda); 19496 19497 } else { 19498 19499 /* Insufficient workspace for a fast algorithm */ 19500 19501 itau = 1; 19502 iwork = itau + *m; 19503 19504 /* Compute A=L*Q, copying result to VT 19505 (Workspace: need 2*M, prefer M+M*NB) */ 19506 19507 i__2 = *lwork - iwork + 1; 19508 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19509 iwork], &i__2, &ierr); 19510 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19511 ldvt); 19512 19513 /* Generate Q in VT 19514 (Workspace: need 2*M, prefer M+M*NB) */ 19515 19516 i__2 = *lwork - iwork + 1; 19517 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & 19518 work[iwork], &i__2, &ierr); 19519 ie = itau; 19520 itauq = ie + *m; 19521 itaup = itauq + *m; 19522 iwork = itaup + *m; 19523 19524 /* Zero out above L in A */ 19525 19526 i__2 = *m - 1; 19527 i__3 = *m - 1; 19528 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 19529 2), lda); 19530 19531 /* Bidiagonalize L in A 19532 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 19533 19534 i__2 = *lwork - iwork + 1; 19535 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & 19536 work[itauq], &work[itaup], &work[iwork], & 19537 i__2, &ierr); 19538 19539 /* Multiply right vectors bidiagonalizing L by Q in VT 19540 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 19541 19542 i__2 = *lwork - iwork + 1; 19543 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & 19544 work[itaup], &vt[vt_offset], ldvt, &work[ 19545 iwork], &i__2, &ierr); 19546 19547 /* Generate left bidiagonalizing vectors of L in A 19548 (Workspace: need 4*M, prefer 3*M+M*NB) */ 19549 19550 i__2 = *lwork - iwork + 1; 19551 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], 19552 &work[iwork], &i__2, &ierr); 19553 iwork = ie + *m; 19554 19555 /* Perform bidiagonal QR iteration, compute left 19556 singular vectors of A in A and compute right 19557 singular vectors of A in VT 19558 (Workspace: need BDSPAC) */ 19559 19560 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ 19561 vt_offset], ldvt, &a[a_offset], lda, dum, & 19562 c__1, &work[iwork], info); 19563 19564 } 19565 19566 } else if (wntuas) { 19567 19568 /* Path 6t(N much larger than M, JOBU='S' or 'A', 19569 JOBVT='S') 19570 M right singular vectors to be computed in VT and 19571 M left singular vectors to be computed in U 19572 19573 Computing MAX */ 19574 i__2 = *m << 2; 19575 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { 19576 19577 /* Sufficient workspace for a fast algorithm */ 19578 19579 iu = 1; 19580 if (*lwork >= wrkbl + *lda * *m) { 19581 19582 /* WORK(IU) is LDA by N */ 19583 19584 ldwrku = *lda; 19585 } else { 19586 19587 /* WORK(IU) is LDA by M */ 19588 19589 ldwrku = *m; 19590 } 19591 itau = iu + ldwrku * *m; 19592 iwork = itau + *m; 19593 19594 /* Compute A=L*Q 19595 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19596 19597 i__2 = *lwork - iwork + 1; 19598 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19599 iwork], &i__2, &ierr); 19600 19601 /* Copy L to WORK(IU), zeroing out above it */ 19602 19603 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & 19604 ldwrku); 19605 i__2 = *m - 1; 19606 i__3 = *m - 1; 19607 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 19608 + ldwrku], &ldwrku); 19609 19610 /* Generate Q in A 19611 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19612 19613 i__2 = *lwork - iwork + 1; 19614 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & 19615 work[iwork], &i__2, &ierr); 19616 ie = itau; 19617 itauq = ie + *m; 19618 itaup = itauq + *m; 19619 iwork = itaup + *m; 19620 19621 /* Bidiagonalize L in WORK(IU), copying result to U 19622 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 19623 19624 i__2 = *lwork - iwork + 1; 19625 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & 19626 work[itauq], &work[itaup], &work[iwork], & 19627 i__2, &ierr); 19628 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 19629 ldu); 19630 19631 /* Generate right bidiagonalizing vectors in WORK(IU) 19632 (Workspace: need M*M+4*M-1, 19633 prefer M*M+3*M+(M-1)*NB) */ 19634 19635 i__2 = *lwork - iwork + 1; 19636 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] 19637 , &work[iwork], &i__2, &ierr); 19638 19639 /* Generate left bidiagonalizing vectors in U 19640 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ 19641 19642 i__2 = *lwork - iwork + 1; 19643 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], 19644 &work[iwork], &i__2, &ierr); 19645 iwork = ie + *m; 19646 19647 /* Perform bidiagonal QR iteration, computing left 19648 singular vectors of L in U and computing right 19649 singular vectors of L in WORK(IU) 19650 (Workspace: need M*M+BDSPAC) */ 19651 19652 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ 19653 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & 19654 work[iwork], info); 19655 19656 /* Multiply right singular vectors of L in WORK(IU) by 19657 Q in A, storing result in VT 19658 (Workspace: need M*M) */ 19659 19660 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku, 19661 &a[a_offset], lda, &c_b416, &vt[vt_offset], 19662 ldvt); 19663 19664 } else { 19665 19666 /* Insufficient workspace for a fast algorithm */ 19667 19668 itau = 1; 19669 iwork = itau + *m; 19670 19671 /* Compute A=L*Q, copying result to VT 19672 (Workspace: need 2*M, prefer M+M*NB) */ 19673 19674 i__2 = *lwork - iwork + 1; 19675 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19676 iwork], &i__2, &ierr); 19677 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19678 ldvt); 19679 19680 /* Generate Q in VT 19681 (Workspace: need 2*M, prefer M+M*NB) */ 19682 19683 i__2 = *lwork - iwork + 1; 19684 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & 19685 work[iwork], &i__2, &ierr); 19686 19687 /* Copy L to U, zeroing out above it */ 19688 19689 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 19690 ldu); 19691 i__2 = *m - 1; 19692 i__3 = *m - 1; 19693 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1, 19694 2), ldu); 19695 ie = itau; 19696 itauq = ie + *m; 19697 itaup = itauq + *m; 19698 iwork = itaup + *m; 19699 19700 /* Bidiagonalize L in U 19701 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 19702 19703 i__2 = *lwork - iwork + 1; 19704 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & 19705 work[itauq], &work[itaup], &work[iwork], & 19706 i__2, &ierr); 19707 19708 /* Multiply right bidiagonalizing vectors in U by Q 19709 in VT 19710 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 19711 19712 i__2 = *lwork - iwork + 1; 19713 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & 19714 work[itaup], &vt[vt_offset], ldvt, &work[ 19715 iwork], &i__2, &ierr); 19716 19717 /* Generate left bidiagonalizing vectors in U 19718 (Workspace: need 4*M, prefer 3*M+M*NB) */ 19719 19720 i__2 = *lwork - iwork + 1; 19721 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], 19722 &work[iwork], &i__2, &ierr); 19723 iwork = ie + *m; 19724 19725 /* Perform bidiagonal QR iteration, computing left 19726 singular vectors of A in U and computing right 19727 singular vectors of A in VT 19728 (Workspace: need BDSPAC) */ 19729 19730 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ 19731 vt_offset], ldvt, &u[u_offset], ldu, dum, & 19732 c__1, &work[iwork], info); 19733 19734 } 19735 19736 } 19737 19738 } else if (wntva) { 19739 19740 if (wntun) { 19741 19742 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') 19743 N right singular vectors to be computed in VT and 19744 no left singular vectors to be computed 19745 19746 Computing MAX */ 19747 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); 19748 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { 19749 19750 /* Sufficient workspace for a fast algorithm */ 19751 19752 ir = 1; 19753 if (*lwork >= wrkbl + *lda * *m) { 19754 19755 /* WORK(IR) is LDA by M */ 19756 19757 ldwrkr = *lda; 19758 } else { 19759 19760 /* WORK(IR) is M by M */ 19761 19762 ldwrkr = *m; 19763 } 19764 itau = ir + ldwrkr * *m; 19765 iwork = itau + *m; 19766 19767 /* Compute A=L*Q, copying result to VT 19768 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 19769 19770 i__2 = *lwork - iwork + 1; 19771 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19772 iwork], &i__2, &ierr); 19773 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19774 ldvt); 19775 19776 /* Copy L to WORK(IR), zeroing out above it */ 19777 19778 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], & 19779 ldwrkr); 19780 i__2 = *m - 1; 19781 i__3 = *m - 1; 19782 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir 19783 + ldwrkr], &ldwrkr); 19784 19785 /* Generate Q in VT 19786 (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ 19787 19788 i__2 = *lwork - iwork + 1; 19789 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 19790 work[iwork], &i__2, &ierr); 19791 ie = itau; 19792 itauq = ie + *m; 19793 itaup = itauq + *m; 19794 iwork = itaup + *m; 19795 19796 /* Bidiagonalize L in WORK(IR) 19797 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 19798 19799 i__2 = *lwork - iwork + 1; 19800 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & 19801 work[itauq], &work[itaup], &work[iwork], & 19802 i__2, &ierr); 19803 19804 /* Generate right bidiagonalizing vectors in WORK(IR) 19805 (Workspace: need M*M+4*M-1, 19806 prefer M*M+3*M+(M-1)*NB) */ 19807 19808 i__2 = *lwork - iwork + 1; 19809 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] 19810 , &work[iwork], &i__2, &ierr); 19811 iwork = ie + *m; 19812 19813 /* Perform bidiagonal QR iteration, computing right 19814 singular vectors of L in WORK(IR) 19815 (Workspace: need M*M+BDSPAC) */ 19816 19817 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & 19818 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & 19819 work[iwork], info); 19820 19821 /* Multiply right singular vectors of L in WORK(IR) by 19822 Q in VT, storing result in A 19823 (Workspace: need M*M) */ 19824 19825 sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr, 19826 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 19827 lda); 19828 19829 /* Copy right singular vectors of A from A to VT */ 19830 19831 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 19832 ldvt); 19833 19834 } else { 19835 19836 /* Insufficient workspace for a fast algorithm */ 19837 19838 itau = 1; 19839 iwork = itau + *m; 19840 19841 /* Compute A=L*Q, copying result to VT 19842 (Workspace: need 2*M, prefer M+M*NB) */ 19843 19844 i__2 = *lwork - iwork + 1; 19845 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19846 iwork], &i__2, &ierr); 19847 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19848 ldvt); 19849 19850 /* Generate Q in VT 19851 (Workspace: need M+N, prefer M+N*NB) */ 19852 19853 i__2 = *lwork - iwork + 1; 19854 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 19855 work[iwork], &i__2, &ierr); 19856 ie = itau; 19857 itauq = ie + *m; 19858 itaup = itauq + *m; 19859 iwork = itaup + *m; 19860 19861 /* Zero out above L in A */ 19862 19863 i__2 = *m - 1; 19864 i__3 = *m - 1; 19865 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 19866 2), lda); 19867 19868 /* Bidiagonalize L in A 19869 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 19870 19871 i__2 = *lwork - iwork + 1; 19872 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & 19873 work[itauq], &work[itaup], &work[iwork], & 19874 i__2, &ierr); 19875 19876 /* Multiply right bidiagonalizing vectors in A by Q 19877 in VT 19878 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 19879 19880 i__2 = *lwork - iwork + 1; 19881 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & 19882 work[itaup], &vt[vt_offset], ldvt, &work[ 19883 iwork], &i__2, &ierr); 19884 iwork = ie + *m; 19885 19886 /* Perform bidiagonal QR iteration, computing right 19887 singular vectors of A in VT 19888 (Workspace: need BDSPAC) */ 19889 19890 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & 19891 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & 19892 work[iwork], info); 19893 19894 } 19895 19896 } else if (wntuo) { 19897 19898 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') 19899 N right singular vectors to be computed in VT and 19900 M left singular vectors to be overwritten on A 19901 19902 Computing MAX */ 19903 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); 19904 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { 19905 19906 /* Sufficient workspace for a fast algorithm */ 19907 19908 iu = 1; 19909 if (*lwork >= wrkbl + (*lda << 1) * *m) { 19910 19911 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ 19912 19913 ldwrku = *lda; 19914 ir = iu + ldwrku * *m; 19915 ldwrkr = *lda; 19916 } else if (*lwork >= wrkbl + (*lda + *m) * *m) { 19917 19918 /* WORK(IU) is LDA by M and WORK(IR) is M by M */ 19919 19920 ldwrku = *lda; 19921 ir = iu + ldwrku * *m; 19922 ldwrkr = *m; 19923 } else { 19924 19925 /* WORK(IU) is M by M and WORK(IR) is M by M */ 19926 19927 ldwrku = *m; 19928 ir = iu + ldwrku * *m; 19929 ldwrkr = *m; 19930 } 19931 itau = ir + ldwrkr * *m; 19932 iwork = itau + *m; 19933 19934 /* Compute A=L*Q, copying result to VT 19935 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ 19936 19937 i__2 = *lwork - iwork + 1; 19938 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 19939 iwork], &i__2, &ierr); 19940 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 19941 ldvt); 19942 19943 /* Generate Q in VT 19944 (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */ 19945 19946 i__2 = *lwork - iwork + 1; 19947 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 19948 work[iwork], &i__2, &ierr); 19949 19950 /* Copy L to WORK(IU), zeroing out above it */ 19951 19952 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & 19953 ldwrku); 19954 i__2 = *m - 1; 19955 i__3 = *m - 1; 19956 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 19957 + ldwrku], &ldwrku); 19958 ie = itau; 19959 itauq = ie + *m; 19960 itaup = itauq + *m; 19961 iwork = itaup + *m; 19962 19963 /* Bidiagonalize L in WORK(IU), copying result to 19964 WORK(IR) 19965 (Workspace: need 2*M*M+4*M, 19966 prefer 2*M*M+3*M+2*M*NB) */ 19967 19968 i__2 = *lwork - iwork + 1; 19969 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & 19970 work[itauq], &work[itaup], &work[iwork], & 19971 i__2, &ierr); 19972 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & 19973 ldwrkr); 19974 19975 /* Generate right bidiagonalizing vectors in WORK(IU) 19976 (Workspace: need 2*M*M+4*M-1, 19977 prefer 2*M*M+3*M+(M-1)*NB) */ 19978 19979 i__2 = *lwork - iwork + 1; 19980 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] 19981 , &work[iwork], &i__2, &ierr); 19982 19983 /* Generate left bidiagonalizing vectors in WORK(IR) 19984 (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ 19985 19986 i__2 = *lwork - iwork + 1; 19987 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] 19988 , &work[iwork], &i__2, &ierr); 19989 iwork = ie + *m; 19990 19991 /* Perform bidiagonal QR iteration, computing left 19992 singular vectors of L in WORK(IR) and computing 19993 right singular vectors of L in WORK(IU) 19994 (Workspace: need 2*M*M+BDSPAC) */ 19995 19996 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ 19997 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 19998 &work[iwork], info); 19999 20000 /* Multiply right singular vectors of L in WORK(IU) by 20001 Q in VT, storing result in A 20002 (Workspace: need M*M) */ 20003 20004 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku, 20005 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 20006 lda); 20007 20008 /* Copy right singular vectors of A from A to VT */ 20009 20010 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 20011 ldvt); 20012 20013 /* Copy left singular vectors of A from WORK(IR) to A */ 20014 20015 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 20016 lda); 20017 20018 } else { 20019 20020 /* Insufficient workspace for a fast algorithm */ 20021 20022 itau = 1; 20023 iwork = itau + *m; 20024 20025 /* Compute A=L*Q, copying result to VT 20026 (Workspace: need 2*M, prefer M+M*NB) */ 20027 20028 i__2 = *lwork - iwork + 1; 20029 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 20030 iwork], &i__2, &ierr); 20031 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 20032 ldvt); 20033 20034 /* Generate Q in VT 20035 (Workspace: need M+N, prefer M+N*NB) */ 20036 20037 i__2 = *lwork - iwork + 1; 20038 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 20039 work[iwork], &i__2, &ierr); 20040 ie = itau; 20041 itauq = ie + *m; 20042 itaup = itauq + *m; 20043 iwork = itaup + *m; 20044 20045 /* Zero out above L in A */ 20046 20047 i__2 = *m - 1; 20048 i__3 = *m - 1; 20049 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 20050 2), lda); 20051 20052 /* Bidiagonalize L in A 20053 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 20054 20055 i__2 = *lwork - iwork + 1; 20056 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & 20057 work[itauq], &work[itaup], &work[iwork], & 20058 i__2, &ierr); 20059 20060 /* Multiply right bidiagonalizing vectors in A by Q 20061 in VT 20062 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 20063 20064 i__2 = *lwork - iwork + 1; 20065 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & 20066 work[itaup], &vt[vt_offset], ldvt, &work[ 20067 iwork], &i__2, &ierr); 20068 20069 /* Generate left bidiagonalizing vectors in A 20070 (Workspace: need 4*M, prefer 3*M+M*NB) */ 20071 20072 i__2 = *lwork - iwork + 1; 20073 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], 20074 &work[iwork], &i__2, &ierr); 20075 iwork = ie + *m; 20076 20077 /* Perform bidiagonal QR iteration, computing left 20078 singular vectors of A in A and computing right 20079 singular vectors of A in VT 20080 (Workspace: need BDSPAC) */ 20081 20082 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ 20083 vt_offset], ldvt, &a[a_offset], lda, dum, & 20084 c__1, &work[iwork], info); 20085 20086 } 20087 20088 } else if (wntuas) { 20089 20090 /* Path 9t(N much larger than M, JOBU='S' or 'A', 20091 JOBVT='A') 20092 N right singular vectors to be computed in VT and 20093 M left singular vectors to be computed in U 20094 20095 Computing MAX */ 20096 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); 20097 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { 20098 20099 /* Sufficient workspace for a fast algorithm */ 20100 20101 iu = 1; 20102 if (*lwork >= wrkbl + *lda * *m) { 20103 20104 /* WORK(IU) is LDA by M */ 20105 20106 ldwrku = *lda; 20107 } else { 20108 20109 /* WORK(IU) is M by M */ 20110 20111 ldwrku = *m; 20112 } 20113 itau = iu + ldwrku * *m; 20114 iwork = itau + *m; 20115 20116 /* Compute A=L*Q, copying result to VT 20117 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ 20118 20119 i__2 = *lwork - iwork + 1; 20120 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 20121 iwork], &i__2, &ierr); 20122 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 20123 ldvt); 20124 20125 /* Generate Q in VT 20126 (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ 20127 20128 i__2 = *lwork - iwork + 1; 20129 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 20130 work[iwork], &i__2, &ierr); 20131 20132 /* Copy L to WORK(IU), zeroing out above it */ 20133 20134 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & 20135 ldwrku); 20136 i__2 = *m - 1; 20137 i__3 = *m - 1; 20138 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu 20139 + ldwrku], &ldwrku); 20140 ie = itau; 20141 itauq = ie + *m; 20142 itaup = itauq + *m; 20143 iwork = itaup + *m; 20144 20145 /* Bidiagonalize L in WORK(IU), copying result to U 20146 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ 20147 20148 i__2 = *lwork - iwork + 1; 20149 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & 20150 work[itauq], &work[itaup], &work[iwork], & 20151 i__2, &ierr); 20152 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 20153 ldu); 20154 20155 /* Generate right bidiagonalizing vectors in WORK(IU) 20156 (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ 20157 20158 i__2 = *lwork - iwork + 1; 20159 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] 20160 , &work[iwork], &i__2, &ierr); 20161 20162 /* Generate left bidiagonalizing vectors in U 20163 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ 20164 20165 i__2 = *lwork - iwork + 1; 20166 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], 20167 &work[iwork], &i__2, &ierr); 20168 iwork = ie + *m; 20169 20170 /* Perform bidiagonal QR iteration, computing left 20171 singular vectors of L in U and computing right 20172 singular vectors of L in WORK(IU) 20173 (Workspace: need M*M+BDSPAC) */ 20174 20175 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ 20176 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & 20177 work[iwork], info); 20178 20179 /* Multiply right singular vectors of L in WORK(IU) by 20180 Q in VT, storing result in A 20181 (Workspace: need M*M) */ 20182 20183 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku, 20184 &vt[vt_offset], ldvt, &c_b416, &a[a_offset], 20185 lda); 20186 20187 /* Copy right singular vectors of A from A to VT */ 20188 20189 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 20190 ldvt); 20191 20192 } else { 20193 20194 /* Insufficient workspace for a fast algorithm */ 20195 20196 itau = 1; 20197 iwork = itau + *m; 20198 20199 /* Compute A=L*Q, copying result to VT 20200 (Workspace: need 2*M, prefer M+M*NB) */ 20201 20202 i__2 = *lwork - iwork + 1; 20203 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ 20204 iwork], &i__2, &ierr); 20205 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 20206 ldvt); 20207 20208 /* Generate Q in VT 20209 (Workspace: need M+N, prefer M+N*NB) */ 20210 20211 i__2 = *lwork - iwork + 1; 20212 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & 20213 work[iwork], &i__2, &ierr); 20214 20215 /* Copy L to U, zeroing out above it */ 20216 20217 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 20218 ldu); 20219 i__2 = *m - 1; 20220 i__3 = *m - 1; 20221 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1, 20222 2), ldu); 20223 ie = itau; 20224 itauq = ie + *m; 20225 itaup = itauq + *m; 20226 iwork = itaup + *m; 20227 20228 /* Bidiagonalize L in U 20229 (Workspace: need 4*M, prefer 3*M+2*M*NB) */ 20230 20231 i__2 = *lwork - iwork + 1; 20232 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & 20233 work[itauq], &work[itaup], &work[iwork], & 20234 i__2, &ierr); 20235 20236 /* Multiply right bidiagonalizing vectors in U by Q 20237 in VT 20238 (Workspace: need 3*M+N, prefer 3*M+N*NB) */ 20239 20240 i__2 = *lwork - iwork + 1; 20241 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & 20242 work[itaup], &vt[vt_offset], ldvt, &work[ 20243 iwork], &i__2, &ierr); 20244 20245 /* Generate left bidiagonalizing vectors in U 20246 (Workspace: need 4*M, prefer 3*M+M*NB) */ 20247 20248 i__2 = *lwork - iwork + 1; 20249 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], 20250 &work[iwork], &i__2, &ierr); 20251 iwork = ie + *m; 20252 20253 /* Perform bidiagonal QR iteration, computing left 20254 singular vectors of A in U and computing right 20255 singular vectors of A in VT 20256 (Workspace: need BDSPAC) */ 20257 20258 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ 20259 vt_offset], ldvt, &u[u_offset], ldu, dum, & 20260 c__1, &work[iwork], info); 20261 20262 } 20263 20264 } 20265 20266 } 20267 20268 } else { 20269 20270 /* N .LT. MNTHR 20271 20272 Path 10t(N greater than M, but not much larger) 20273 Reduce to bidiagonal form without LQ decomposition */ 20274 20275 ie = 1; 20276 itauq = ie + *m; 20277 itaup = itauq + *m; 20278 iwork = itaup + *m; 20279 20280 /* Bidiagonalize A 20281 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ 20282 20283 i__2 = *lwork - iwork + 1; 20284 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & 20285 work[itaup], &work[iwork], &i__2, &ierr); 20286 if (wntuas) { 20287 20288 /* If left singular vectors desired in U, copy result to U 20289 and generate left bidiagonalizing vectors in U 20290 (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ 20291 20292 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); 20293 i__2 = *lwork - iwork + 1; 20294 sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ 20295 iwork], &i__2, &ierr); 20296 } 20297 if (wntvas) { 20298 20299 /* If right singular vectors desired in VT, copy result to 20300 VT and generate right bidiagonalizing vectors in VT 20301 (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */ 20302 20303 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); 20304 if (wntva) { 20305 nrvt = *n; 20306 } 20307 if (wntvs) { 20308 nrvt = *m; 20309 } 20310 i__2 = *lwork - iwork + 1; 20311 sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 20312 &work[iwork], &i__2, &ierr); 20313 } 20314 if (wntuo) { 20315 20316 /* If left singular vectors desired in A, generate left 20317 bidiagonalizing vectors in A 20318 (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ 20319 20320 i__2 = *lwork - iwork + 1; 20321 sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ 20322 iwork], &i__2, &ierr); 20323 } 20324 if (wntvo) { 20325 20326 /* If right singular vectors desired in A, generate right 20327 bidiagonalizing vectors in A 20328 (Workspace: need 4*M, prefer 3*M+M*NB) */ 20329 20330 i__2 = *lwork - iwork + 1; 20331 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ 20332 iwork], &i__2, &ierr); 20333 } 20334 iwork = ie + *m; 20335 if (wntuas || wntuo) { 20336 nru = *m; 20337 } 20338 if (wntun) { 20339 nru = 0; 20340 } 20341 if (wntvas || wntvo) { 20342 ncvt = *n; 20343 } 20344 if (wntvn) { 20345 ncvt = 0; 20346 } 20347 if (! wntuo && ! wntvo) { 20348 20349 /* Perform bidiagonal QR iteration, if desired, computing 20350 left singular vectors in U and computing right singular 20351 vectors in VT 20352 (Workspace: need BDSPAC) */ 20353 20354 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ 20355 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & 20356 work[iwork], info); 20357 } else if (! wntuo && wntvo) { 20358 20359 /* Perform bidiagonal QR iteration, if desired, computing 20360 left singular vectors in U and computing right singular 20361 vectors in A 20362 (Workspace: need BDSPAC) */ 20363 20364 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ 20365 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ 20366 iwork], info); 20367 } else { 20368 20369 /* Perform bidiagonal QR iteration, if desired, computing 20370 left singular vectors in A and computing right singular 20371 vectors in VT 20372 (Workspace: need BDSPAC) */ 20373 20374 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ 20375 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & 20376 work[iwork], info); 20377 } 20378 20379 } 20380 20381 } 20382 20383 /* If SBDSQR failed to converge, copy unconverged superdiagonals 20384 to WORK( 2:MINMN ) */ 20385 20386 if (*info != 0) { 20387 if (ie > 2) { 20388 i__2 = minmn - 1; 20389 for (i__ = 1; i__ <= i__2; ++i__) { 20390 work[i__ + 1] = work[i__ + ie - 1]; 20391 /* L50: */ 20392 } 20393 } 20394 if (ie < 2) { 20395 for (i__ = minmn - 1; i__ >= 1; --i__) { 20396 work[i__ + 1] = work[i__ + ie - 1]; 20397 /* L60: */ 20398 } 20399 } 20400 } 20401 20402 /* Undo scaling if necessary */ 20403 20404 if (iscl == 1) { 20405 if (anrm > bignum) { 20406 slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & 20407 minmn, &ierr); 20408 } 20409 if (*info != 0 && anrm > bignum) { 20410 i__2 = minmn - 1; 20411 slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], 20412 &minmn, &ierr); 20413 } 20414 if (anrm < smlnum) { 20415 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & 20416 minmn, &ierr); 20417 } 20418 if (*info != 0 && anrm < smlnum) { 20419 i__2 = minmn - 1; 20420 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], 20421 &minmn, &ierr); 20422 } 20423 } 20424 20425 /* Return optimal workspace in WORK(1) */ 20426 20427 work[1] = (real) maxwrk; 20428 20429 return 0; 20430 20431 /* End of SGESVD */ 20432 20433 } /* sgesvd_ */
int slabrd_ | ( | integer * | m, | |
integer * | n, | |||
integer * | nb, | |||
real * | a, | |||
integer * | lda, | |||
real * | d__, | |||
real * | e, | |||
real * | tauq, | |||
real * | taup, | |||
real * | x, | |||
integer * | ldx, | |||
real * | y, | |||
integer * | ldy | |||
) |
Definition at line 24278 of file lapackblas.cpp.
References a_ref, f2cmin, integer, sgemv_(), slarfg_(), sscal_(), x_ref, and y_ref.
Referenced by sgebrd_().
24281 { 24282 /* -- LAPACK auxiliary routine (version 3.0) -- 24283 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 24284 Courant Institute, Argonne National Lab, and Rice University 24285 February 29, 1992 24286 24287 24288 Purpose 24289 ======= 24290 24291 SLABRD reduces the first NB rows and columns of a real general 24292 m by n matrix A to upper or lower bidiagonal form by an orthogonal 24293 transformation Q' * A * P, and returns the matrices X and Y which 24294 are needed to apply the transformation to the unreduced part of A. 24295 24296 If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower 24297 bidiagonal form. 24298 24299 This is an auxiliary routine called by SGEBRD 24300 24301 Arguments 24302 ========= 24303 24304 M (input) INTEGER 24305 The number of rows in the matrix A. 24306 24307 N (input) INTEGER 24308 The number of columns in the matrix A. 24309 24310 NB (input) INTEGER 24311 The number of leading rows and columns of A to be reduced. 24312 24313 A (input/output) REAL array, dimension (LDA,N) 24314 On entry, the m by n general matrix to be reduced. 24315 On exit, the first NB rows and columns of the matrix are 24316 overwritten; the rest of the array is unchanged. 24317 If m >= n, elements on and below the diagonal in the first NB 24318 columns, with the array TAUQ, represent the orthogonal 24319 matrix Q as a product of elementary reflectors; and 24320 elements above the diagonal in the first NB rows, with the 24321 array TAUP, represent the orthogonal matrix P as a product 24322 of elementary reflectors. 24323 If m < n, elements below the diagonal in the first NB 24324 columns, with the array TAUQ, represent the orthogonal 24325 matrix Q as a product of elementary reflectors, and 24326 elements on and above the diagonal in the first NB rows, 24327 with the array TAUP, represent the orthogonal matrix P as 24328 a product of elementary reflectors. 24329 See Further Details. 24330 24331 LDA (input) INTEGER 24332 The leading dimension of the array A. LDA >= max(1,M). 24333 24334 D (output) REAL array, dimension (NB) 24335 The diagonal elements of the first NB rows and columns of 24336 the reduced matrix. D(i) = A(i,i). 24337 24338 E (output) REAL array, dimension (NB) 24339 The off-diagonal elements of the first NB rows and columns of 24340 the reduced matrix. 24341 24342 TAUQ (output) REAL array dimension (NB) 24343 The scalar factors of the elementary reflectors which 24344 represent the orthogonal matrix Q. See Further Details. 24345 24346 TAUP (output) REAL array, dimension (NB) 24347 The scalar factors of the elementary reflectors which 24348 represent the orthogonal matrix P. See Further Details. 24349 24350 X (output) REAL array, dimension (LDX,NB) 24351 The m-by-nb matrix X required to update the unreduced part 24352 of A. 24353 24354 LDX (input) INTEGER 24355 The leading dimension of the array X. LDX >= M. 24356 24357 Y (output) REAL array, dimension (LDY,NB) 24358 The n-by-nb matrix Y required to update the unreduced part 24359 of A. 24360 24361 LDY (output) INTEGER 24362 The leading dimension of the array Y. LDY >= N. 24363 24364 Further Details 24365 =============== 24366 24367 The matrices Q and P are represented as products of elementary 24368 reflectors: 24369 24370 Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) 24371 24372 Each H(i) and G(i) has the form: 24373 24374 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' 24375 24376 where tauq and taup are real scalars, and v and u are real vectors. 24377 24378 If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in 24379 A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in 24380 A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). 24381 24382 If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in 24383 A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in 24384 A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). 24385 24386 The elements of the vectors v and u together form the m-by-nb matrix 24387 V and the nb-by-n matrix U' which are needed, with X and Y, to apply 24388 the transformation to the unreduced part of the matrix, using a block 24389 update of the form: A := A - V*Y' - X*U'. 24390 24391 The contents of A on exit are illustrated by the following examples 24392 with nb = 2: 24393 24394 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): 24395 24396 ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) 24397 ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) 24398 ( v1 v2 a a a ) ( v1 1 a a a a ) 24399 ( v1 v2 a a a ) ( v1 v2 a a a a ) 24400 ( v1 v2 a a a ) ( v1 v2 a a a a ) 24401 ( v1 v2 a a a ) 24402 24403 where a denotes an element of the original matrix which is unchanged, 24404 vi denotes an element of the vector defining H(i), and ui an element 24405 of the vector defining G(i). 24406 24407 ===================================================================== 24408 24409 24410 Quick return if possible 24411 24412 Parameter adjustments */ 24413 /* Table of constant values */ 24414 static real c_b4 = -1.f; 24415 static real c_b5 = 1.f; 24416 static integer c__1 = 1; 24417 static real c_b16 = 0.f; 24418 24419 /* System generated locals */ 24420 integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, 24421 i__3; 24422 /* Local variables */ 24423 static integer i__; 24424 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 24425 sgemv_(const char *, integer *, integer *, real *, real *, integer *, 24426 real *, integer *, real *, real *, integer *), slarfg_( 24427 integer *, real *, real *, integer *, real *); 24428 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 24429 #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] 24430 #define y_ref(a_1,a_2) y[(a_2)*y_dim1 + a_1] 24431 24432 24433 a_dim1 = *lda; 24434 a_offset = 1 + a_dim1 * 1; 24435 a -= a_offset; 24436 --d__; 24437 --e; 24438 --tauq; 24439 --taup; 24440 x_dim1 = *ldx; 24441 x_offset = 1 + x_dim1 * 1; 24442 x -= x_offset; 24443 y_dim1 = *ldy; 24444 y_offset = 1 + y_dim1 * 1; 24445 y -= y_offset; 24446 24447 /* Function Body */ 24448 if (*m <= 0 || *n <= 0) { 24449 return 0; 24450 } 24451 24452 if (*m >= *n) { 24453 24454 /* Reduce to upper bidiagonal form */ 24455 24456 i__1 = *nb; 24457 for (i__ = 1; i__ <= i__1; ++i__) { 24458 24459 /* Update A(i:m,i) */ 24460 24461 i__2 = *m - i__ + 1; 24462 i__3 = i__ - 1; 24463 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__, 1), lda, & 24464 y_ref(i__, 1), ldy, &c_b5, &a_ref(i__, i__), &c__1); 24465 i__2 = *m - i__ + 1; 24466 i__3 = i__ - 1; 24467 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__, 1), ldx, & 24468 a_ref(1, i__), &c__1, &c_b5, &a_ref(i__, i__), &c__1); 24469 24470 /* Generate reflection Q(i) to annihilate A(i+1:m,i) 24471 24472 Computing MIN */ 24473 i__2 = i__ + 1; 24474 i__3 = *m - i__ + 1; 24475 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1, 24476 &tauq[i__]); 24477 d__[i__] = a_ref(i__, i__); 24478 if (i__ < *n) { 24479 a_ref(i__, i__) = 1.f; 24480 24481 /* Compute Y(i+1:n,i) */ 24482 24483 i__2 = *m - i__ + 1; 24484 i__3 = *n - i__; 24485 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, i__ + 1), 24486 lda, &a_ref(i__, i__), &c__1, &c_b16, &y_ref(i__ + 1, 24487 i__), &c__1); 24488 i__2 = *m - i__ + 1; 24489 i__3 = i__ - 1; 24490 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, 24491 &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), & 24492 c__1); 24493 i__2 = *n - i__; 24494 i__3 = i__ - 1; 24495 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1) 24496 , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 24497 i__), &c__1); 24498 i__2 = *m - i__ + 1; 24499 i__3 = i__ - 1; 24500 sgemv_("Transpose", &i__2, &i__3, &c_b5, &x_ref(i__, 1), ldx, 24501 &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), & 24502 c__1); 24503 i__2 = i__ - 1; 24504 i__3 = *n - i__; 24505 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1), 24506 lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 24507 i__), &c__1); 24508 i__2 = *n - i__; 24509 sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1); 24510 24511 /* Update A(i,i+1:n) */ 24512 24513 i__2 = *n - i__; 24514 sgemv_("No transpose", &i__2, &i__, &c_b4, &y_ref(i__ + 1, 1), 24515 ldy, &a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__ + 1) 24516 , lda); 24517 i__2 = i__ - 1; 24518 i__3 = *n - i__; 24519 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1), 24520 lda, &x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__ + 1), 24521 lda); 24522 24523 /* Generate reflection P(i) to annihilate A(i,i+2:n) 24524 24525 Computing MIN */ 24526 i__2 = i__ + 2; 24527 i__3 = *n - i__; 24528 slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n)) 24529 , lda, &taup[i__]); 24530 e[i__] = a_ref(i__, i__ + 1); 24531 a_ref(i__, i__ + 1) = 1.f; 24532 24533 /* Compute X(i+1:m,i) */ 24534 24535 i__2 = *m - i__; 24536 i__3 = *n - i__; 24537 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 24538 i__ + 1), lda, &a_ref(i__, i__ + 1), lda, &c_b16, & 24539 x_ref(i__ + 1, i__), &c__1); 24540 i__2 = *n - i__; 24541 sgemv_("Transpose", &i__2, &i__, &c_b5, &y_ref(i__ + 1, 1), 24542 ldy, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1, i__) 24543 , &c__1); 24544 i__2 = *m - i__; 24545 sgemv_("No transpose", &i__2, &i__, &c_b4, &a_ref(i__ + 1, 1), 24546 lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 24547 i__), &c__1); 24548 i__2 = i__ - 1; 24549 i__3 = *n - i__; 24550 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ + 1) 24551 , lda, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1, 24552 i__), &c__1); 24553 i__2 = *m - i__; 24554 i__3 = i__ - 1; 24555 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1) 24556 , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 24557 i__), &c__1); 24558 i__2 = *m - i__; 24559 sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1); 24560 } 24561 /* L10: */ 24562 } 24563 } else { 24564 24565 /* Reduce to lower bidiagonal form */ 24566 24567 i__1 = *nb; 24568 for (i__ = 1; i__ <= i__1; ++i__) { 24569 24570 /* Update A(i,i:n) */ 24571 24572 i__2 = *n - i__ + 1; 24573 i__3 = i__ - 1; 24574 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__, 1), ldy, & 24575 a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__), lda); 24576 i__2 = i__ - 1; 24577 i__3 = *n - i__ + 1; 24578 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__), lda, & 24579 x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__), lda); 24580 24581 /* Generate reflection P(i) to annihilate A(i,i+1:n) 24582 24583 Computing MIN */ 24584 i__2 = i__ + 1; 24585 i__3 = *n - i__ + 1; 24586 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, & 24587 taup[i__]); 24588 d__[i__] = a_ref(i__, i__); 24589 if (i__ < *m) { 24590 a_ref(i__, i__) = 1.f; 24591 24592 /* Compute X(i+1:m,i) */ 24593 24594 i__2 = *m - i__; 24595 i__3 = *n - i__ + 1; 24596 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 24597 i__), lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(i__ 24598 + 1, i__), &c__1); 24599 i__2 = *n - i__ + 1; 24600 i__3 = i__ - 1; 24601 sgemv_("Transpose", &i__2, &i__3, &c_b5, &y_ref(i__, 1), ldy, 24602 &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), &c__1); 24603 i__2 = *m - i__; 24604 i__3 = i__ - 1; 24605 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1) 24606 , lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 24607 i__), &c__1); 24608 i__2 = i__ - 1; 24609 i__3 = *n - i__ + 1; 24610 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__), 24611 lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), & 24612 c__1); 24613 i__2 = *m - i__; 24614 i__3 = i__ - 1; 24615 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1) 24616 , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1, 24617 i__), &c__1); 24618 i__2 = *m - i__; 24619 sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1); 24620 24621 /* Update A(i+1:m,i) */ 24622 24623 i__2 = *m - i__; 24624 i__3 = i__ - 1; 24625 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1) 24626 , lda, &y_ref(i__, 1), ldy, &c_b5, &a_ref(i__ + 1, 24627 i__), &c__1); 24628 i__2 = *m - i__; 24629 sgemv_("No transpose", &i__2, &i__, &c_b4, &x_ref(i__ + 1, 1), 24630 ldx, &a_ref(1, i__), &c__1, &c_b5, &a_ref(i__ + 1, 24631 i__), &c__1); 24632 24633 /* Generate reflection Q(i) to annihilate A(i+2:m,i) 24634 24635 Computing MIN */ 24636 i__2 = i__ + 2; 24637 i__3 = *m - i__; 24638 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__) 24639 , &c__1, &tauq[i__]); 24640 e[i__] = a_ref(i__ + 1, i__); 24641 a_ref(i__ + 1, i__) = 1.f; 24642 24643 /* Compute Y(i+1:n,i) */ 24644 24645 i__2 = *m - i__; 24646 i__3 = *n - i__; 24647 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, i__ 24648 + 1), lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, & 24649 y_ref(i__ + 1, i__), &c__1); 24650 i__2 = *m - i__; 24651 i__3 = i__ - 1; 24652 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1), 24653 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1, 24654 i__), &c__1); 24655 i__2 = *n - i__; 24656 i__3 = i__ - 1; 24657 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1) 24658 , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 24659 i__), &c__1); 24660 i__2 = *m - i__; 24661 sgemv_("Transpose", &i__2, &i__, &c_b5, &x_ref(i__ + 1, 1), 24662 ldx, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1, 24663 i__), &c__1); 24664 i__2 = *n - i__; 24665 sgemv_("Transpose", &i__, &i__2, &c_b4, &a_ref(1, i__ + 1), 24666 lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1, 24667 i__), &c__1); 24668 i__2 = *n - i__; 24669 sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1); 24670 } 24671 /* L20: */ 24672 } 24673 } 24674 return 0; 24675 24676 /* End of SLABRD */ 24677 24678 } /* slabrd_ */
int slacpy_ | ( | const char * | uplo, | |
integer * | m, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | b, | |||
integer * | ldb | |||
) |
Definition at line 12889 of file lapackblas.cpp.
References a_ref, b_ref, f2cmin, integer, and lsame_().
Referenced by sgesvd_(), slaed0_(), slaed2_(), slaed3_(), slaed8_(), and sstedc_().
12891 { 12892 /* -- LAPACK auxiliary routine (version 3.0) -- 12893 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 12894 Courant Institute, Argonne National Lab, and Rice University 12895 February 29, 1992 12896 12897 12898 Purpose 12899 ======= 12900 12901 SLACPY copies all or part of a two-dimensional matrix A to another 12902 matrix B. 12903 12904 Arguments 12905 ========= 12906 12907 UPLO (input) CHARACTER*1 12908 Specifies the part of the matrix A to be copied to B. 12909 = 'U': Upper triangular part 12910 = 'L': Lower triangular part 12911 Otherwise: All of the matrix A 12912 12913 M (input) INTEGER 12914 The number of rows of the matrix A. M >= 0. 12915 12916 N (input) INTEGER 12917 The number of columns of the matrix A. N >= 0. 12918 12919 A (input) REAL array, dimension (LDA,N) 12920 The m by n matrix A. If UPLO = 'U', only the upper triangle 12921 or trapezoid is accessed; if UPLO = 'L', only the lower 12922 triangle or trapezoid is accessed. 12923 12924 LDA (input) INTEGER 12925 The leading dimension of the array A. LDA >= max(1,M). 12926 12927 B (output) REAL array, dimension (LDB,N) 12928 On exit, B = A in the locations specified by UPLO. 12929 12930 LDB (input) INTEGER 12931 The leading dimension of the array B. LDB >= max(1,M). 12932 12933 ===================================================================== 12934 12935 12936 Parameter adjustments */ 12937 /* System generated locals */ 12938 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; 12939 /* Local variables */ 12940 static integer i__, j; 12941 extern logical lsame_(const char *, const char *); 12942 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 12943 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 12944 12945 a_dim1 = *lda; 12946 a_offset = 1 + a_dim1 * 1; 12947 a -= a_offset; 12948 b_dim1 = *ldb; 12949 b_offset = 1 + b_dim1 * 1; 12950 b -= b_offset; 12951 12952 /* Function Body */ 12953 if (lsame_(uplo, "U")) { 12954 i__1 = *n; 12955 for (j = 1; j <= i__1; ++j) { 12956 i__2 = f2cmin(j,*m); 12957 for (i__ = 1; i__ <= i__2; ++i__) { 12958 b_ref(i__, j) = a_ref(i__, j); 12959 /* L10: */ 12960 } 12961 /* L20: */ 12962 } 12963 } else if (lsame_(uplo, "L")) { 12964 i__1 = *n; 12965 for (j = 1; j <= i__1; ++j) { 12966 i__2 = *m; 12967 for (i__ = j; i__ <= i__2; ++i__) { 12968 b_ref(i__, j) = a_ref(i__, j); 12969 /* L30: */ 12970 } 12971 /* L40: */ 12972 } 12973 } else { 12974 i__1 = *n; 12975 for (j = 1; j <= i__1; ++j) { 12976 i__2 = *m; 12977 for (i__ = 1; i__ <= i__2; ++i__) { 12978 b_ref(i__, j) = a_ref(i__, j); 12979 /* L50: */ 12980 } 12981 /* L60: */ 12982 } 12983 } 12984 return 0; 12985 12986 /* End of SLACPY */ 12987 12988 } /* slacpy_ */
int slae2_ | ( | real * | a, | |
real * | b, | |||
real * | c__, | |||
real * | rt1, | |||
real * | rt2 | |||
) |
Definition at line 2012 of file lapackblas.cpp.
Referenced by ssteqr_(), and ssterf_().
02013 { 02014 /* -- LAPACK auxiliary routine (version 3.0) -- 02015 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02016 Courant Institute, Argonne National Lab, and Rice University 02017 October 31, 1992 02018 02019 02020 Purpose 02021 ======= 02022 02023 SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix 02024 [ A B ] 02025 [ B C ]. 02026 On return, RT1 is the eigenvalue of larger absolute value, and RT2 02027 is the eigenvalue of smaller absolute value. 02028 02029 Arguments 02030 ========= 02031 02032 A (input) REAL 02033 The (1,1) element of the 2-by-2 matrix. 02034 02035 B (input) REAL 02036 The (1,2) and (2,1) elements of the 2-by-2 matrix. 02037 02038 C (input) REAL 02039 The (2,2) element of the 2-by-2 matrix. 02040 02041 RT1 (output) REAL 02042 The eigenvalue of larger absolute value. 02043 02044 RT2 (output) REAL 02045 The eigenvalue of smaller absolute value. 02046 02047 Further Details 02048 =============== 02049 02050 RT1 is accurate to a few ulps barring over/underflow. 02051 02052 RT2 may be inaccurate if there is massive cancellation in the 02053 determinant A*C-B*B; higher precision or correctly rounded or 02054 correctly truncated arithmetic would be needed to compute RT2 02055 accurately in all cases. 02056 02057 Overflow is possible only if RT1 is within a factor of 5 of overflow. 02058 Underflow is harmless if the input data is 0 or exceeds 02059 underflow_threshold / macheps. 02060 02061 ===================================================================== 02062 02063 02064 Compute the eigenvalues */ 02065 /* System generated locals */ 02066 real r__1; 02067 /* Builtin functions */ 02068 // double sqrt(doublereal); 02069 /* Local variables */ 02070 static real acmn, acmx, ab, df, tb, sm, rt, adf; 02071 02072 02073 sm = *a + *c__; 02074 df = *a - *c__; 02075 adf = dabs(df); 02076 tb = *b + *b; 02077 ab = dabs(tb); 02078 if (dabs(*a) > dabs(*c__)) { 02079 acmx = *a; 02080 acmn = *c__; 02081 } else { 02082 acmx = *c__; 02083 acmn = *a; 02084 } 02085 if (adf > ab) { 02086 /* Computing 2nd power */ 02087 r__1 = ab / adf; 02088 rt = adf * sqrt(r__1 * r__1 + 1.f); 02089 } else if (adf < ab) { 02090 /* Computing 2nd power */ 02091 r__1 = adf / ab; 02092 rt = ab * sqrt(r__1 * r__1 + 1.f); 02093 } else { 02094 02095 /* Includes case AB=ADF=0 */ 02096 02097 rt = ab * sqrt(2.f); 02098 } 02099 if (sm < 0.f) { 02100 *rt1 = (sm - rt) * .5f; 02101 02102 /* Order of execution important. 02103 To get fully accurate smaller eigenvalue, 02104 next line needs to be executed in higher precision. */ 02105 02106 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; 02107 } else if (sm > 0.f) { 02108 *rt1 = (sm + rt) * .5f; 02109 02110 /* Order of execution important. 02111 To get fully accurate smaller eigenvalue, 02112 next line needs to be executed in higher precision. */ 02113 02114 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; 02115 } else { 02116 02117 /* Includes case RT1 = RT2 = 0 */ 02118 02119 *rt1 = rt * .5f; 02120 *rt2 = rt * -.5f; 02121 } 02122 return 0; 02123 02124 /* End of SLAE2 */ 02125 02126 } /* slae2_ */
int slaed0_ | ( | integer * | icompq, | |
integer * | qsiz, | |||
integer * | n, | |||
real * | d__, | |||
real * | e, | |||
real * | q, | |||
integer * | ldq, | |||
real * | qstore, | |||
integer * | ldqs, | |||
real * | work, | |||
integer * | iwork, | |||
integer * | info | |||
) |
Definition at line 11924 of file lapackblas.cpp.
References c__0, c__1, c__2, dabs, f2cmax, ilaenv_(), integer, log(), pow_ii(), q_ref, qstore_ref, scopy_(), sgemm_(), slacpy_(), slaed1_(), slaed7_(), ssteqr_(), and xerbla_().
Referenced by sstedc_().
11927 { 11928 /* -- LAPACK routine (version 3.0) -- 11929 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 11930 Courant Institute, Argonne National Lab, and Rice University 11931 June 30, 1999 11932 11933 11934 Purpose 11935 ======= 11936 11937 SLAED0 computes all eigenvalues and corresponding eigenvectors of a 11938 symmetric tridiagonal matrix using the divide and conquer method. 11939 11940 Arguments 11941 ========= 11942 11943 ICOMPQ (input) INTEGER 11944 = 0: Compute eigenvalues only. 11945 = 1: Compute eigenvectors of original dense symmetric matrix 11946 also. On entry, Q contains the orthogonal matrix used 11947 to reduce the original matrix to tridiagonal form. 11948 = 2: Compute eigenvalues and eigenvectors of tridiagonal 11949 matrix. 11950 11951 QSIZ (input) INTEGER 11952 The dimension of the orthogonal matrix used to reduce 11953 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. 11954 11955 N (input) INTEGER 11956 The dimension of the symmetric tridiagonal matrix. N >= 0. 11957 11958 D (input/output) REAL array, dimension (N) 11959 On entry, the main diagonal of the tridiagonal matrix. 11960 On exit, its eigenvalues. 11961 11962 E (input) REAL array, dimension (N-1) 11963 The off-diagonal elements of the tridiagonal matrix. 11964 On exit, E has been destroyed. 11965 11966 Q (input/output) REAL array, dimension (LDQ, N) 11967 On entry, Q must contain an N-by-N orthogonal matrix. 11968 If ICOMPQ = 0 Q is not referenced. 11969 If ICOMPQ = 1 On entry, Q is a subset of the columns of the 11970 orthogonal matrix used to reduce the full 11971 matrix to tridiagonal form corresponding to 11972 the subset of the full matrix which is being 11973 decomposed at this time. 11974 If ICOMPQ = 2 On entry, Q will be the identity matrix. 11975 On exit, Q contains the eigenvectors of the 11976 tridiagonal matrix. 11977 11978 LDQ (input) INTEGER 11979 The leading dimension of the array Q. If eigenvectors are 11980 desired, then LDQ >= max(1,N). In any case, LDQ >= 1. 11981 11982 QSTORE (workspace) REAL array, dimension (LDQS, N) 11983 Referenced only when ICOMPQ = 1. Used to store parts of 11984 the eigenvector matrix when the updating matrix multiplies 11985 take place. 11986 11987 LDQS (input) INTEGER 11988 The leading dimension of the array QSTORE. If ICOMPQ = 1, 11989 then LDQS >= max(1,N). In any case, LDQS >= 1. 11990 11991 WORK (workspace) REAL array, 11992 If ICOMPQ = 0 or 1, the dimension of WORK must be at least 11993 1 + 3*N + 2*N*lg N + 2*N**2 11994 ( lg( N ) = smallest integer k 11995 such that 2^k >= N ) 11996 If ICOMPQ = 2, the dimension of WORK must be at least 11997 4*N + N**2. 11998 11999 IWORK (workspace) INTEGER array, 12000 If ICOMPQ = 0 or 1, the dimension of IWORK must be at least 12001 6 + 6*N + 5*N*lg N. 12002 ( lg( N ) = smallest integer k 12003 such that 2^k >= N ) 12004 If ICOMPQ = 2, the dimension of IWORK must be at least 12005 3 + 5*N. 12006 12007 INFO (output) INTEGER 12008 = 0: successful exit. 12009 < 0: if INFO = -i, the i-th argument had an illegal value. 12010 > 0: The algorithm failed to compute an eigenvalue while 12011 working on the submatrix lying in rows and columns 12012 INFO/(N+1) through mod(INFO,N+1). 12013 12014 Further Details 12015 =============== 12016 12017 Based on contributions by 12018 Jeff Rutter, Computer Science Division, University of California 12019 at Berkeley, USA 12020 12021 ===================================================================== 12022 12023 12024 Test the input parameters. 12025 12026 Parameter adjustments */ 12027 /* Table of constant values */ 12028 static integer c__9 = 9; 12029 static integer c__0 = 0; 12030 static integer c__2 = 2; 12031 static real c_b23 = 1.f; 12032 static real c_b24 = 0.f; 12033 static integer c__1 = 1; 12034 12035 /* System generated locals */ 12036 integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; 12037 real r__1; 12038 /* Builtin functions */ 12039 // double log(doublereal); 12040 integer pow_ii(integer *, integer *); 12041 /* Local variables */ 12042 static real temp; 12043 static integer curr, i__, j, k; 12044 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 12045 integer *, real *, real *, integer *, real *, integer *, real *, 12046 real *, integer *); 12047 static integer iperm, indxq, iwrem; 12048 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 12049 integer *); 12050 static integer iqptr, tlvls; 12051 extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *, 12052 integer *, real *, integer *, real *, integer *, integer *), 12053 slaed7_(integer *, integer *, integer *, integer *, integer *, 12054 integer *, real *, real *, integer *, integer *, real *, integer * 12055 , real *, integer *, integer *, integer *, integer *, integer *, 12056 real *, real *, integer *, integer *); 12057 static integer iq, igivcl; 12058 extern /* Subroutine */ int xerbla_(const char *, integer *); 12059 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 12060 integer *, integer *, ftnlen, ftnlen); 12061 static integer igivnm, submat; 12062 extern /* Subroutine */ int slacpy_(const char *, integer *, integer *, real *, 12063 integer *, real *, integer *); 12064 static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz; 12065 extern /* Subroutine */ int ssteqr_(const char *, integer *, real *, real *, 12066 real *, integer *, real *, integer *); 12067 static integer lgn, msd2, smm1, spm1, spm2; 12068 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 12069 #define qstore_ref(a_1,a_2) qstore[(a_2)*qstore_dim1 + a_1] 12070 12071 12072 --d__; 12073 --e; 12074 q_dim1 = *ldq; 12075 q_offset = 1 + q_dim1 * 1; 12076 q -= q_offset; 12077 qstore_dim1 = *ldqs; 12078 qstore_offset = 1 + qstore_dim1 * 1; 12079 qstore -= qstore_offset; 12080 --work; 12081 --iwork; 12082 12083 /* Function Body */ 12084 *info = 0; 12085 12086 if (*icompq < 0 || *icompq > 2) { 12087 *info = -1; 12088 } else if (*icompq == 1 && *qsiz < f2cmax(0,*n)) { 12089 *info = -2; 12090 } else if (*n < 0) { 12091 *info = -3; 12092 } else if (*ldq < f2cmax(1,*n)) { 12093 *info = -7; 12094 } else if (*ldqs < f2cmax(1,*n)) { 12095 *info = -9; 12096 } 12097 if (*info != 0) { 12098 i__1 = -(*info); 12099 xerbla_("SLAED0", &i__1); 12100 return 0; 12101 } 12102 12103 /* Quick return if possible */ 12104 12105 if (*n == 0) { 12106 return 0; 12107 } 12108 12109 smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( 12110 ftnlen)6, (ftnlen)1); 12111 12112 /* Determine the size and placement of the submatrices, and save in 12113 the leading elements of IWORK. */ 12114 12115 iwork[1] = *n; 12116 subpbs = 1; 12117 tlvls = 0; 12118 L10: 12119 if (iwork[subpbs] > smlsiz) { 12120 for (j = subpbs; j >= 1; --j) { 12121 iwork[j * 2] = (iwork[j] + 1) / 2; 12122 iwork[(j << 1) - 1] = iwork[j] / 2; 12123 /* L20: */ 12124 } 12125 ++tlvls; 12126 subpbs <<= 1; 12127 goto L10; 12128 } 12129 i__1 = subpbs; 12130 for (j = 2; j <= i__1; ++j) { 12131 iwork[j] += iwork[j - 1]; 12132 /* L30: */ 12133 } 12134 12135 /* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 12136 using rank-1 modifications (cuts). */ 12137 12138 spm1 = subpbs - 1; 12139 i__1 = spm1; 12140 for (i__ = 1; i__ <= i__1; ++i__) { 12141 submat = iwork[i__] + 1; 12142 smm1 = submat - 1; 12143 d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); 12144 d__[submat] -= (r__1 = e[smm1], dabs(r__1)); 12145 /* L40: */ 12146 } 12147 12148 indxq = (*n << 2) + 3; 12149 if (*icompq != 2) { 12150 12151 /* Set up workspaces for eigenvalues only/accumulate new vectors 12152 routine */ 12153 12154 temp = log((real) (*n)) / log(2.f); 12155 lgn = (integer) temp; 12156 if (pow_ii(&c__2, &lgn) < *n) { 12157 ++lgn; 12158 } 12159 if (pow_ii(&c__2, &lgn) < *n) { 12160 ++lgn; 12161 } 12162 iprmpt = indxq + *n + 1; 12163 iperm = iprmpt + *n * lgn; 12164 iqptr = iperm + *n * lgn; 12165 igivpt = iqptr + *n + 2; 12166 igivcl = igivpt + *n * lgn; 12167 12168 igivnm = 1; 12169 iq = igivnm + (*n << 1) * lgn; 12170 /* Computing 2nd power */ 12171 i__1 = *n; 12172 iwrem = iq + i__1 * i__1 + 1; 12173 12174 /* Initialize pointers */ 12175 12176 i__1 = subpbs; 12177 for (i__ = 0; i__ <= i__1; ++i__) { 12178 iwork[iprmpt + i__] = 1; 12179 iwork[igivpt + i__] = 1; 12180 /* L50: */ 12181 } 12182 iwork[iqptr] = 1; 12183 } 12184 12185 /* Solve each submatrix eigenproblem at the bottom of the divide and 12186 conquer tree. */ 12187 12188 curr = 0; 12189 i__1 = spm1; 12190 for (i__ = 0; i__ <= i__1; ++i__) { 12191 if (i__ == 0) { 12192 submat = 1; 12193 matsiz = iwork[1]; 12194 } else { 12195 submat = iwork[i__] + 1; 12196 matsiz = iwork[i__ + 1] - iwork[i__]; 12197 } 12198 if (*icompq == 2) { 12199 ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q_ref(submat, 12200 submat), ldq, &work[1], info); 12201 if (*info != 0) { 12202 goto L130; 12203 } 12204 } else { 12205 ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + 12206 iwork[iqptr + curr]], &matsiz, &work[1], info); 12207 if (*info != 0) { 12208 goto L130; 12209 } 12210 if (*icompq == 1) { 12211 sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q_ref(1, 12212 submat), ldq, &work[iq - 1 + iwork[iqptr + curr]], & 12213 matsiz, &c_b24, &qstore_ref(1, submat), ldqs); 12214 } 12215 /* Computing 2nd power */ 12216 i__2 = matsiz; 12217 iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; 12218 ++curr; 12219 } 12220 k = 1; 12221 i__2 = iwork[i__ + 1]; 12222 for (j = submat; j <= i__2; ++j) { 12223 iwork[indxq + j] = k; 12224 ++k; 12225 /* L60: */ 12226 } 12227 /* L70: */ 12228 } 12229 12230 /* Successively merge eigensystems of adjacent submatrices 12231 into eigensystem for the corresponding larger matrix. 12232 12233 while ( SUBPBS > 1 ) */ 12234 12235 curlvl = 1; 12236 L80: 12237 if (subpbs > 1) { 12238 spm2 = subpbs - 2; 12239 i__1 = spm2; 12240 for (i__ = 0; i__ <= i__1; i__ += 2) { 12241 if (i__ == 0) { 12242 submat = 1; 12243 matsiz = iwork[2]; 12244 msd2 = iwork[1]; 12245 curprb = 0; 12246 } else { 12247 submat = iwork[i__] + 1; 12248 matsiz = iwork[i__ + 2] - iwork[i__]; 12249 msd2 = matsiz / 2; 12250 ++curprb; 12251 } 12252 12253 /* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) 12254 into an eigensystem of size MATSIZ. 12255 SLAED1 is used only for the full eigensystem of a tridiagonal 12256 matrix. 12257 SLAED7 handles the cases in which eigenvalues only or eigenvalues 12258 and eigenvectors of a full symmetric matrix (which was reduced to 12259 tridiagonal form) are desired. */ 12260 12261 if (*icompq == 2) { 12262 slaed1_(&matsiz, &d__[submat], &q_ref(submat, submat), ldq, & 12263 iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & 12264 work[1], &iwork[subpbs + 1], info); 12265 } else { 12266 slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ 12267 submat], &qstore_ref(1, submat), ldqs, &iwork[indxq + 12268 submat], &e[submat + msd2 - 1], &msd2, &work[iq], & 12269 iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ 12270 igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem], 12271 &iwork[subpbs + 1], info); 12272 } 12273 if (*info != 0) { 12274 goto L130; 12275 } 12276 iwork[i__ / 2 + 1] = iwork[i__ + 2]; 12277 /* L90: */ 12278 } 12279 subpbs /= 2; 12280 ++curlvl; 12281 goto L80; 12282 } 12283 12284 /* end while 12285 12286 Re-merge the eigenvalues/vectors which were deflated at the final 12287 merge step. */ 12288 12289 if (*icompq == 1) { 12290 i__1 = *n; 12291 for (i__ = 1; i__ <= i__1; ++i__) { 12292 j = iwork[indxq + i__]; 12293 work[i__] = d__[j]; 12294 scopy_(qsiz, &qstore_ref(1, j), &c__1, &q_ref(1, i__), &c__1); 12295 /* L100: */ 12296 } 12297 scopy_(n, &work[1], &c__1, &d__[1], &c__1); 12298 } else if (*icompq == 2) { 12299 i__1 = *n; 12300 for (i__ = 1; i__ <= i__1; ++i__) { 12301 j = iwork[indxq + i__]; 12302 work[i__] = d__[j]; 12303 scopy_(n, &q_ref(1, j), &c__1, &work[*n * i__ + 1], &c__1); 12304 /* L110: */ 12305 } 12306 scopy_(n, &work[1], &c__1, &d__[1], &c__1); 12307 slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); 12308 } else { 12309 i__1 = *n; 12310 for (i__ = 1; i__ <= i__1; ++i__) { 12311 j = iwork[indxq + i__]; 12312 work[i__] = d__[j]; 12313 /* L120: */ 12314 } 12315 scopy_(n, &work[1], &c__1, &d__[1], &c__1); 12316 } 12317 goto L140; 12318 12319 L130: 12320 *info = submat * (*n + 1) + submat + matsiz - 1; 12321 12322 L140: 12323 return 0; 12324 12325 /* End of SLAED0 */ 12326 12327 } /* slaed0_ */
int slaed1_ | ( | integer * | n, | |
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
integer * | indxq, | |||
real * | rho, | |||
integer * | cutpnt, | |||
real * | work, | |||
integer * | iwork, | |||
integer * | info | |||
) |
Definition at line 12663 of file lapackblas.cpp.
References c__1, c_n1, f2cmax, f2cmin, integer, q_ref, scopy_(), slaed2_(), slaed3_(), slamrg_(), and xerbla_().
Referenced by slaed0_().
12666 { 12667 /* -- LAPACK routine (version 3.0) -- 12668 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 12669 Courant Institute, Argonne National Lab, and Rice University 12670 June 30, 1999 12671 12672 12673 Purpose 12674 ======= 12675 12676 SLAED1 computes the updated eigensystem of a diagonal 12677 matrix after modification by a rank-one symmetric matrix. This 12678 routine is used only for the eigenproblem which requires all 12679 eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles 12680 the case in which eigenvalues only or eigenvalues and eigenvectors 12681 of a full symmetric matrix (which was reduced to tridiagonal form) 12682 are desired. 12683 12684 T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) 12685 12686 where Z = Q'u, u is a vector of length N with ones in the 12687 CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. 12688 12689 The eigenvectors of the original matrix are stored in Q, and the 12690 eigenvalues are in D. The algorithm consists of three stages: 12691 12692 The first stage consists of deflating the size of the problem 12693 when there are multiple eigenvalues or if there is a zero in 12694 the Z vector. For each such occurence the dimension of the 12695 secular equation problem is reduced by one. This stage is 12696 performed by the routine SLAED2. 12697 12698 The second stage consists of calculating the updated 12699 eigenvalues. This is done by finding the roots of the secular 12700 equation via the routine SLAED4 (as called by SLAED3). 12701 This routine also calculates the eigenvectors of the current 12702 problem. 12703 12704 The final stage consists of computing the updated eigenvectors 12705 directly using the updated eigenvalues. The eigenvectors for 12706 the current problem are multiplied with the eigenvectors from 12707 the overall problem. 12708 12709 Arguments 12710 ========= 12711 12712 N (input) INTEGER 12713 The dimension of the symmetric tridiagonal matrix. N >= 0. 12714 12715 D (input/output) REAL array, dimension (N) 12716 On entry, the eigenvalues of the rank-1-perturbed matrix. 12717 On exit, the eigenvalues of the repaired matrix. 12718 12719 Q (input/output) REAL array, dimension (LDQ,N) 12720 On entry, the eigenvectors of the rank-1-perturbed matrix. 12721 On exit, the eigenvectors of the repaired tridiagonal matrix. 12722 12723 LDQ (input) INTEGER 12724 The leading dimension of the array Q. LDQ >= max(1,N). 12725 12726 INDXQ (input/output) INTEGER array, dimension (N) 12727 On entry, the permutation which separately sorts the two 12728 subproblems in D into ascending order. 12729 On exit, the permutation which will reintegrate the 12730 subproblems back into sorted order, 12731 i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. 12732 12733 RHO (input) REAL 12734 The subdiagonal entry used to create the rank-1 modification. 12735 12736 CUTPNT (input) INTEGER 12737 The location of the last eigenvalue in the leading sub-matrix. 12738 min(1,N) <= CUTPNT <= N/2. 12739 12740 WORK (workspace) REAL array, dimension (4*N + N**2) 12741 12742 IWORK (workspace) INTEGER array, dimension (4*N) 12743 12744 INFO (output) INTEGER 12745 = 0: successful exit. 12746 < 0: if INFO = -i, the i-th argument had an illegal value. 12747 > 0: if INFO = 1, an eigenvalue did not converge 12748 12749 Further Details 12750 =============== 12751 12752 Based on contributions by 12753 Jeff Rutter, Computer Science Division, University of California 12754 at Berkeley, USA 12755 Modified by Francoise Tisseur, University of Tennessee. 12756 12757 ===================================================================== 12758 12759 12760 Test the input parameters. 12761 12762 Parameter adjustments */ 12763 /* Table of constant values */ 12764 static integer c__1 = 1; 12765 static integer c_n1 = -1; 12766 12767 /* System generated locals */ 12768 integer q_dim1, q_offset, i__1, i__2; 12769 /* Local variables */ 12770 static integer indx, i__, k, indxc, indxp; 12771 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 12772 integer *); 12773 static integer n1, n2; 12774 extern /* Subroutine */ int slaed2_(integer *, integer *, integer *, real 12775 *, real *, integer *, integer *, real *, real *, real *, real *, 12776 real *, integer *, integer *, integer *, integer *, integer *), 12777 slaed3_(integer *, integer *, integer *, real *, real *, integer * 12778 , real *, real *, real *, integer *, integer *, real *, real *, 12779 integer *); 12780 static integer idlmda, is, iw, iz; 12781 extern /* Subroutine */ int xerbla_(const char *, integer *), slamrg_( 12782 integer *, integer *, real *, integer *, integer *, integer *); 12783 static integer coltyp, iq2, cpp1; 12784 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 12785 12786 12787 --d__; 12788 q_dim1 = *ldq; 12789 q_offset = 1 + q_dim1 * 1; 12790 q -= q_offset; 12791 --indxq; 12792 --work; 12793 --iwork; 12794 12795 /* Function Body */ 12796 *info = 0; 12797 12798 if (*n < 0) { 12799 *info = -1; 12800 } else if (*ldq < f2cmax(1,*n)) { 12801 *info = -4; 12802 } else /* if(complicated condition) */ { 12803 /* Computing MIN */ 12804 i__1 = 1, i__2 = *n / 2; 12805 if (f2cmin(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { 12806 *info = -7; 12807 } 12808 } 12809 if (*info != 0) { 12810 i__1 = -(*info); 12811 xerbla_("SLAED1", &i__1); 12812 return 0; 12813 } 12814 12815 /* Quick return if possible */ 12816 12817 if (*n == 0) { 12818 return 0; 12819 } 12820 12821 /* The following values are integer pointers which indicate 12822 the portion of the workspace 12823 used by a particular array in SLAED2 and SLAED3. */ 12824 12825 iz = 1; 12826 idlmda = iz + *n; 12827 iw = idlmda + *n; 12828 iq2 = iw + *n; 12829 12830 indx = 1; 12831 indxc = indx + *n; 12832 coltyp = indxc + *n; 12833 indxp = coltyp + *n; 12834 12835 12836 /* Form the z-vector which consists of the last row of Q_1 and the 12837 first row of Q_2. */ 12838 12839 scopy_(cutpnt, &q_ref(*cutpnt, 1), ldq, &work[iz], &c__1); 12840 cpp1 = *cutpnt + 1; 12841 i__1 = *n - *cutpnt; 12842 scopy_(&i__1, &q_ref(cpp1, cpp1), ldq, &work[iz + *cutpnt], &c__1); 12843 12844 /* Deflate eigenvalues. */ 12845 12846 slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ 12847 iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ 12848 indxc], &iwork[indxp], &iwork[coltyp], info); 12849 12850 if (*info != 0) { 12851 goto L20; 12852 } 12853 12854 /* Solve Secular Equation. */ 12855 12856 if (k != 0) { 12857 is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + 12858 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; 12859 slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], 12860 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ 12861 is], info); 12862 if (*info != 0) { 12863 goto L20; 12864 } 12865 12866 /* Prepare the INDXQ sorting permutation. */ 12867 12868 n1 = k; 12869 n2 = *n - k; 12870 slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); 12871 } else { 12872 i__1 = *n; 12873 for (i__ = 1; i__ <= i__1; ++i__) { 12874 indxq[i__] = i__; 12875 /* L10: */ 12876 } 12877 } 12878 12879 L20: 12880 return 0; 12881 12882 /* End of SLAED1 */ 12883 12884 } /* slaed1_ */
int slaed2_ | ( | integer * | k, | |
integer * | n, | |||
integer * | n1, | |||
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
integer * | indxq, | |||
real * | rho, | |||
real * | z__, | |||
real * | dlamda, | |||
real * | w, | |||
real * | q2, | |||
integer * | indx, | |||
integer * | indxc, | |||
integer * | indxp, | |||
integer * | coltyp, | |||
integer * | info | |||
) |
Definition at line 13569 of file lapackblas.cpp.
References dabs, df2cmax, f2cmax, f2cmin, integer, isamax_(), q_ref, scopy_(), slacpy_(), slamch_(), slamrg_(), slapy2_(), sqrt(), srot_(), sscal_(), t, and xerbla_().
Referenced by slaed1_().
13573 { 13574 /* System generated locals */ 13575 integer q_dim1, q_offset, i__1, i__2; 13576 real r__1, r__2, r__3, r__4; 13577 13578 /* Builtin functions */ 13579 // double sqrt(doublereal); 13580 13581 /* Local variables */ 13582 static integer imax, jmax, ctot[4]; 13583 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 13584 integer *, real *, real *); 13585 static real c__; 13586 static integer i__, j; 13587 static real s, t; 13588 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 13589 static integer k2; 13590 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 13591 integer *); 13592 static integer n2; 13593 extern doublereal slapy2_(real *, real *); 13594 static integer ct, nj, pj, js; 13595 extern doublereal slamch_(const char *); 13596 extern /* Subroutine */ int xerbla_(const char *, integer *); 13597 extern integer isamax_(integer *, real *, integer *); 13598 extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 13599 *, integer *, integer *), slacpy_(const char *, integer *, integer *, 13600 real *, integer *, real *, integer *); 13601 static integer iq1, iq2, n1p1; 13602 static real eps, tau, tol; 13603 static integer psm[4]; 13604 13605 13606 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 13607 13608 13609 /* -- LAPACK routine (version 3.0) -- 13610 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 13611 Courant Institute, Argonne National Lab, and Rice University 13612 October 31, 1999 13613 13614 13615 Purpose 13616 ======= 13617 13618 SLAED2 merges the two sets of eigenvalues together into a single 13619 sorted set. Then it tries to deflate the size of the problem. 13620 There are two ways in which deflation can occur: when two or more 13621 eigenvalues are close together or if there is a tiny entry in the 13622 Z vector. For each such occurrence the order of the related secular 13623 equation problem is reduced by one. 13624 13625 Arguments 13626 ========= 13627 13628 K (output) INTEGER 13629 The number of non-deflated eigenvalues, and the order of the 13630 related secular equation. 0 <= K <=N. 13631 13632 N (input) INTEGER 13633 The dimension of the symmetric tridiagonal matrix. N >= 0. 13634 13635 N1 (input) INTEGER 13636 The location of the last eigenvalue in the leading sub-matrix. 13637 f2cmin(1,N) <= N1 <= N/2. 13638 13639 D (input/output) REAL array, dimension (N) 13640 On entry, D contains the eigenvalues of the two submatrices to 13641 be combined. 13642 On exit, D contains the trailing (N-K) updated eigenvalues 13643 (those which were deflated) sorted into increasing order. 13644 13645 Q (input/output) REAL array, dimension (LDQ, N) 13646 On entry, Q contains the eigenvectors of two submatrices in 13647 the two square blocks with corners at (1,1), (N1,N1) 13648 and (N1+1, N1+1), (N,N). 13649 On exit, Q contains the trailing (N-K) updated eigenvectors 13650 (those which were deflated) in its last N-K columns. 13651 13652 LDQ (input) INTEGER 13653 The leading dimension of the array Q. LDQ >= max(1,N). 13654 13655 INDXQ (input/output) INTEGER array, dimension (N) 13656 The permutation which separately sorts the two sub-problems 13657 in D into ascending order. Note that elements in the second 13658 half of this permutation must first have N1 added to their 13659 values. Destroyed on exit. 13660 13661 RHO (input/output) REAL 13662 On entry, the off-diagonal element associated with the rank-1 13663 cut which originally split the two submatrices which are now 13664 being recombined. 13665 On exit, RHO has been modified to the value required by 13666 SLAED3. 13667 13668 Z (input) REAL array, dimension (N) 13669 On entry, Z contains the updating vector (the last 13670 row of the first sub-eigenvector matrix and the first row of 13671 the second sub-eigenvector matrix). 13672 On exit, the contents of Z have been destroyed by the updating 13673 process. 13674 13675 DLAMDA (output) REAL array, dimension (N) 13676 A copy of the first K eigenvalues which will be used by 13677 SLAED3 to form the secular equation. 13678 13679 W (output) REAL array, dimension (N) 13680 The first k values of the final deflation-altered z-vector 13681 which will be passed to SLAED3. 13682 13683 Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) 13684 A copy of the first K eigenvectors which will be used by 13685 SLAED3 in a matrix multiply (SGEMM) to solve for the new 13686 eigenvectors. 13687 13688 INDX (workspace) INTEGER array, dimension (N) 13689 The permutation used to sort the contents of DLAMDA into 13690 ascending order. 13691 13692 INDXC (output) INTEGER array, dimension (N) 13693 The permutation used to arrange the columns of the deflated 13694 Q matrix into three groups: the first group contains non-zero 13695 elements only at and above N1, the second contains 13696 non-zero elements only below N1, and the third is dense. 13697 13698 INDXP (workspace) INTEGER array, dimension (N) 13699 The permutation used to place deflated values of D at the end 13700 of the array. INDXP(1:K) points to the nondeflated D-values 13701 and INDXP(K+1:N) points to the deflated eigenvalues. 13702 13703 COLTYP (workspace/output) INTEGER array, dimension (N) 13704 During execution, a label which will indicate which of the 13705 following types a column in the Q2 matrix is: 13706 1 : non-zero in the upper half only; 13707 2 : dense; 13708 3 : non-zero in the lower half only; 13709 4 : deflated. 13710 On exit, COLTYP(i) is the number of columns of type i, 13711 for i=1 to 4 only. 13712 13713 INFO (output) INTEGER 13714 = 0: successful exit. 13715 < 0: if INFO = -i, the i-th argument had an illegal value. 13716 13717 Further Details 13718 =============== 13719 13720 Based on contributions by 13721 Jeff Rutter, Computer Science Division, University of California 13722 at Berkeley, USA 13723 Modified by Francoise Tisseur, University of Tennessee. 13724 13725 ===================================================================== 13726 13727 13728 Test the input parameters. 13729 13730 Parameter adjustments */ 13731 --d__; 13732 q_dim1 = *ldq; 13733 q_offset = 1 + q_dim1 * 1; 13734 q -= q_offset; 13735 --indxq; 13736 --z__; 13737 --dlamda; 13738 --w; 13739 --q2; 13740 --indx; 13741 --indxc; 13742 --indxp; 13743 --coltyp; 13744 13745 /* Function Body */ 13746 *info = 0; 13747 13748 if (*n < 0) { 13749 *info = -2; 13750 } else if (*ldq < f2cmax(1,*n)) { 13751 *info = -6; 13752 } else /* if(complicated condition) */ { 13753 /* Computing F2CMIN */ 13754 i__1 = 1, i__2 = *n / 2; 13755 if (f2cmin(i__1,i__2) > *n1 || *n / 2 < *n1) { 13756 *info = -3; 13757 } 13758 } 13759 if (*info != 0) { 13760 i__1 = -(*info); 13761 xerbla_("SLAED2", &i__1); 13762 return 0; 13763 } 13764 13765 /* Quick return if possible */ 13766 13767 if (*n == 0) { 13768 return 0; 13769 } 13770 13771 n2 = *n - *n1; 13772 n1p1 = *n1 + 1; 13773 13774 if (*rho < 0.f) { 13775 sscal_(&n2, &c_b3, &z__[n1p1], &c__1); 13776 } 13777 13778 /* Normalize z so that norm(z) = 1. Since z is the concatenation of 13779 two normalized vectors, norm2(z) = sqrt(2). */ 13780 13781 t = 1.f / sqrt(2.f); 13782 sscal_(n, &t, &z__[1], &c__1); 13783 13784 /* RHO = ABS( norm(z)**2 * RHO ) */ 13785 13786 *rho = (r__1 = *rho * 2.f, dabs(r__1)); 13787 13788 /* Sort the eigenvalues into increasing order */ 13789 13790 i__1 = *n; 13791 for (i__ = n1p1; i__ <= i__1; ++i__) { 13792 indxq[i__] += *n1; 13793 /* L10: */ 13794 } 13795 13796 /* re-integrate the deflated parts from the last pass */ 13797 13798 i__1 = *n; 13799 for (i__ = 1; i__ <= i__1; ++i__) { 13800 dlamda[i__] = d__[indxq[i__]]; 13801 /* L20: */ 13802 } 13803 slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); 13804 i__1 = *n; 13805 for (i__ = 1; i__ <= i__1; ++i__) { 13806 indx[i__] = indxq[indxc[i__]]; 13807 /* L30: */ 13808 } 13809 13810 /* Calculate the allowable deflation tolerance */ 13811 13812 imax = isamax_(n, &z__[1], &c__1); 13813 jmax = isamax_(n, &d__[1], &c__1); 13814 eps = slamch_("Epsilon"); 13815 /* Computing MAX */ 13816 r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs( 13817 r__2)); 13818 tol = eps * 8.f * df2cmax(r__3,r__4); 13819 13820 /* If the rank-1 modifier is small enough, no more needs to be done 13821 except to reorganize Q so that its columns correspond with the 13822 elements in D. */ 13823 13824 if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { 13825 *k = 0; 13826 iq2 = 1; 13827 i__1 = *n; 13828 for (j = 1; j <= i__1; ++j) { 13829 i__ = indx[j]; 13830 scopy_(n, &q_ref(1, i__), &c__1, &q2[iq2], &c__1); 13831 dlamda[j] = d__[i__]; 13832 iq2 += *n; 13833 /* L40: */ 13834 } 13835 slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); 13836 scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); 13837 goto L190; 13838 } 13839 13840 /* If there are multiple eigenvalues then the problem deflates. Here 13841 the number of equal eigenvalues are found. As each equal 13842 eigenvalue is found, an elementary reflector is computed to rotate 13843 the corresponding eigensubspace so that the corresponding 13844 components of Z are zero in this new basis. */ 13845 13846 i__1 = *n1; 13847 for (i__ = 1; i__ <= i__1; ++i__) { 13848 coltyp[i__] = 1; 13849 /* L50: */ 13850 } 13851 i__1 = *n; 13852 for (i__ = n1p1; i__ <= i__1; ++i__) { 13853 coltyp[i__] = 3; 13854 /* L60: */ 13855 } 13856 13857 13858 *k = 0; 13859 k2 = *n + 1; 13860 i__1 = *n; 13861 for (j = 1; j <= i__1; ++j) { 13862 nj = indx[j]; 13863 if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { 13864 13865 /* Deflate due to small z component. */ 13866 13867 --k2; 13868 coltyp[nj] = 4; 13869 indxp[k2] = nj; 13870 if (j == *n) { 13871 goto L100; 13872 } 13873 } else { 13874 pj = nj; 13875 goto L80; 13876 } 13877 /* L70: */ 13878 } 13879 L80: 13880 ++j; 13881 nj = indx[j]; 13882 if (j > *n) { 13883 goto L100; 13884 } 13885 if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { 13886 13887 /* Deflate due to small z component. */ 13888 13889 --k2; 13890 coltyp[nj] = 4; 13891 indxp[k2] = nj; 13892 } else { 13893 13894 /* Check if eigenvalues are close enough to allow deflation. */ 13895 13896 s = z__[pj]; 13897 c__ = z__[nj]; 13898 13899 /* Find sqrt(a**2+b**2) without overflow or 13900 destructive underflow. */ 13901 13902 tau = slapy2_(&c__, &s); 13903 t = d__[nj] - d__[pj]; 13904 c__ /= tau; 13905 s = -s / tau; 13906 if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { 13907 13908 /* Deflation is possible. */ 13909 13910 z__[nj] = tau; 13911 z__[pj] = 0.f; 13912 if (coltyp[nj] != coltyp[pj]) { 13913 coltyp[nj] = 2; 13914 } 13915 coltyp[pj] = 4; 13916 srot_(n, &q_ref(1, pj), &c__1, &q_ref(1, nj), &c__1, &c__, &s); 13917 /* Computing 2nd power */ 13918 r__1 = c__; 13919 /* Computing 2nd power */ 13920 r__2 = s; 13921 t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); 13922 /* Computing 2nd power */ 13923 r__1 = s; 13924 /* Computing 2nd power */ 13925 r__2 = c__; 13926 d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); 13927 d__[pj] = t; 13928 --k2; 13929 i__ = 1; 13930 L90: 13931 if (k2 + i__ <= *n) { 13932 if (d__[pj] < d__[indxp[k2 + i__]]) { 13933 indxp[k2 + i__ - 1] = indxp[k2 + i__]; 13934 indxp[k2 + i__] = pj; 13935 ++i__; 13936 goto L90; 13937 } else { 13938 indxp[k2 + i__ - 1] = pj; 13939 } 13940 } else { 13941 indxp[k2 + i__ - 1] = pj; 13942 } 13943 pj = nj; 13944 } else { 13945 ++(*k); 13946 dlamda[*k] = d__[pj]; 13947 w[*k] = z__[pj]; 13948 indxp[*k] = pj; 13949 pj = nj; 13950 } 13951 } 13952 goto L80; 13953 L100: 13954 13955 /* Record the last eigenvalue. */ 13956 13957 ++(*k); 13958 dlamda[*k] = d__[pj]; 13959 w[*k] = z__[pj]; 13960 indxp[*k] = pj; 13961 13962 /* Count up the total number of the various types of columns, then 13963 form a permutation which positions the four column types into 13964 four uniform groups (although one or more of these groups may be 13965 empty). */ 13966 13967 for (j = 1; j <= 4; ++j) { 13968 ctot[j - 1] = 0; 13969 /* L110: */ 13970 } 13971 i__1 = *n; 13972 for (j = 1; j <= i__1; ++j) { 13973 ct = coltyp[j]; 13974 ++ctot[ct - 1]; 13975 /* L120: */ 13976 } 13977 13978 /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ 13979 13980 psm[0] = 1; 13981 psm[1] = ctot[0] + 1; 13982 psm[2] = psm[1] + ctot[1]; 13983 psm[3] = psm[2] + ctot[2]; 13984 *k = *n - ctot[3]; 13985 13986 /* Fill out the INDXC array so that the permutation which it induces 13987 will place all type-1 columns first, all type-2 columns next, 13988 then all type-3's, and finally all type-4's. */ 13989 13990 i__1 = *n; 13991 for (j = 1; j <= i__1; ++j) { 13992 js = indxp[j]; 13993 ct = coltyp[js]; 13994 indx[psm[ct - 1]] = js; 13995 indxc[psm[ct - 1]] = j; 13996 ++psm[ct - 1]; 13997 /* L130: */ 13998 } 13999 14000 /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA 14001 and Q2 respectively. The eigenvalues/vectors which were not 14002 deflated go into the first K slots of DLAMDA and Q2 respectively, 14003 while those which were deflated go into the last N - K slots. */ 14004 14005 i__ = 1; 14006 iq1 = 1; 14007 iq2 = (ctot[0] + ctot[1]) * *n1 + 1; 14008 i__1 = ctot[0]; 14009 for (j = 1; j <= i__1; ++j) { 14010 js = indx[i__]; 14011 scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1); 14012 z__[i__] = d__[js]; 14013 ++i__; 14014 iq1 += *n1; 14015 /* L140: */ 14016 } 14017 14018 i__1 = ctot[1]; 14019 for (j = 1; j <= i__1; ++j) { 14020 js = indx[i__]; 14021 scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1); 14022 scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1); 14023 z__[i__] = d__[js]; 14024 ++i__; 14025 iq1 += *n1; 14026 iq2 += n2; 14027 /* L150: */ 14028 } 14029 14030 i__1 = ctot[2]; 14031 for (j = 1; j <= i__1; ++j) { 14032 js = indx[i__]; 14033 scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1); 14034 z__[i__] = d__[js]; 14035 ++i__; 14036 iq2 += n2; 14037 /* L160: */ 14038 } 14039 14040 iq1 = iq2; 14041 i__1 = ctot[3]; 14042 for (j = 1; j <= i__1; ++j) { 14043 js = indx[i__]; 14044 scopy_(n, &q_ref(1, js), &c__1, &q2[iq2], &c__1); 14045 iq2 += *n; 14046 z__[i__] = d__[js]; 14047 ++i__; 14048 /* L170: */ 14049 } 14050 14051 /* The deflated eigenvalues and their corresponding vectors go back 14052 into the last N - K slots of D and Q respectively. */ 14053 14054 slacpy_("A", n, &ctot[3], &q2[iq1], n, &q_ref(1, *k + 1), ldq); 14055 i__1 = *n - *k; 14056 scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); 14057 14058 /* Copy CTOT into COLTYP for referencing in SLAED3. */ 14059 14060 for (j = 1; j <= 4; ++j) { 14061 coltyp[j] = ctot[j - 1]; 14062 /* L180: */ 14063 } 14064 14065 L190: 14066 return 0; 14067 14068 /* End of SLAED2 */ 14069 14070 } /* slaed2_ */
int slaed3_ | ( | integer * | k, | |
integer * | n, | |||
integer * | n1, | |||
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
real * | rho, | |||
real * | dlamda, | |||
real * | q2, | |||
integer * | indx, | |||
integer * | ctot, | |||
real * | w, | |||
real * | s, | |||
integer * | info | |||
) |
Definition at line 15616 of file lapackblas.cpp.
References f2cmax, integer, q_ref, r_sign(), scopy_(), sgemm_(), slacpy_(), slaed4_(), slamc3_(), slaset_(), snrm2_(), sqrt(), and xerbla_().
Referenced by slaed1_().
15619 { 15620 /* -- LAPACK routine (version 3.0) -- 15621 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 15622 Courant Institute, NAG Ltd., and Rice University 15623 June 30, 1999 15624 15625 15626 Purpose 15627 ======= 15628 15629 SLAED3 finds the roots of the secular equation, as defined by the 15630 values in D, W, and RHO, between 1 and K. It makes the 15631 appropriate calls to SLAED4 and then updates the eigenvectors by 15632 multiplying the matrix of eigenvectors of the pair of eigensystems 15633 being combined by the matrix of eigenvectors of the K-by-K system 15634 which is solved here. 15635 15636 This code makes very mild assumptions about floating point 15637 arithmetic. It will work on machines with a guard digit in 15638 add/subtract, or on those binary machines without guard digits 15639 which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. 15640 It could conceivably fail on hexadecimal or decimal machines 15641 without guard digits, but we know of none. 15642 15643 Arguments 15644 ========= 15645 15646 K (input) INTEGER 15647 The number of terms in the rational function to be solved by 15648 SLAED4. K >= 0. 15649 15650 N (input) INTEGER 15651 The number of rows and columns in the Q matrix. 15652 N >= K (deflation may result in N>K). 15653 15654 N1 (input) INTEGER 15655 The location of the last eigenvalue in the leading submatrix. 15656 min(1,N) <= N1 <= N/2. 15657 15658 D (output) REAL array, dimension (N) 15659 D(I) contains the updated eigenvalues for 15660 1 <= I <= K. 15661 15662 Q (output) REAL array, dimension (LDQ,N) 15663 Initially the first K columns are used as workspace. 15664 On output the columns 1 to K contain 15665 the updated eigenvectors. 15666 15667 LDQ (input) INTEGER 15668 The leading dimension of the array Q. LDQ >= max(1,N). 15669 15670 RHO (input) REAL 15671 The value of the parameter in the rank one update equation. 15672 RHO >= 0 required. 15673 15674 DLAMDA (input/output) REAL array, dimension (K) 15675 The first K elements of this array contain the old roots 15676 of the deflated updating problem. These are the poles 15677 of the secular equation. May be changed on output by 15678 having lowest order bit set to zero on Cray X-MP, Cray Y-MP, 15679 Cray-2, or Cray C-90, as described above. 15680 15681 Q2 (input) REAL array, dimension (LDQ2, N) 15682 The first K columns of this matrix contain the non-deflated 15683 eigenvectors for the split problem. 15684 15685 INDX (input) INTEGER array, dimension (N) 15686 The permutation used to arrange the columns of the deflated 15687 Q matrix into three groups (see SLAED2). 15688 The rows of the eigenvectors found by SLAED4 must be likewise 15689 permuted before the matrix multiply can take place. 15690 15691 CTOT (input) INTEGER array, dimension (4) 15692 A count of the total number of the various types of columns 15693 in Q, as described in INDX. The fourth column type is any 15694 column which has been deflated. 15695 15696 W (input/output) REAL array, dimension (K) 15697 The first K elements of this array contain the components 15698 of the deflation-adjusted updating vector. Destroyed on 15699 output. 15700 15701 S (workspace) REAL array, dimension (N1 + 1)*K 15702 Will contain the eigenvectors of the repaired matrix which 15703 will be multiplied by the previously accumulated eigenvectors 15704 to update the system. 15705 15706 LDS (input) INTEGER 15707 The leading dimension of S. LDS >= max(1,K). 15708 15709 INFO (output) INTEGER 15710 = 0: successful exit. 15711 < 0: if INFO = -i, the i-th argument had an illegal value. 15712 > 0: if INFO = 1, an eigenvalue did not converge 15713 15714 Further Details 15715 =============== 15716 15717 Based on contributions by 15718 Jeff Rutter, Computer Science Division, University of California 15719 at Berkeley, USA 15720 Modified by Francoise Tisseur, University of Tennessee. 15721 15722 ===================================================================== 15723 15724 15725 Test the input parameters. 15726 15727 Parameter adjustments */ 15728 /* Table of constant values */ 15729 static integer c__1 = 1; 15730 static real c_b22 = 1.f; 15731 static real c_b23 = 0.f; 15732 15733 /* System generated locals */ 15734 integer q_dim1, q_offset, i__1, i__2; 15735 real r__1; 15736 /* Builtin functions */ 15737 // double sqrt(doublereal), r_sign(real *, real *); 15738 /* Local variables */ 15739 static real temp; 15740 extern doublereal snrm2_(integer *, real *, integer *); 15741 static integer i__, j; 15742 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 15743 integer *, real *, real *, integer *, real *, integer *, real *, 15744 real *, integer *), scopy_(integer *, real *, 15745 integer *, real *, integer *); 15746 static integer n2; 15747 extern /* Subroutine */ int slaed4_(integer *, integer *, real *, real *, 15748 real *, real *, real *, integer *); 15749 extern doublereal slamc3_(real *, real *); 15750 static integer n12, ii, n23; 15751 extern /* Subroutine */ int xerbla_(const char *, integer *), slacpy_( 15752 const char *, integer *, integer *, real *, integer *, real *, integer * 15753 ), slaset_(const char *, integer *, integer *, real *, real *, 15754 real *, integer *); 15755 static integer iq2; 15756 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 15757 15758 15759 --d__; 15760 q_dim1 = *ldq; 15761 q_offset = 1 + q_dim1 * 1; 15762 q -= q_offset; 15763 --dlamda; 15764 --q2; 15765 --indx; 15766 --ctot; 15767 --w; 15768 --s; 15769 15770 /* Function Body */ 15771 *info = 0; 15772 15773 if (*k < 0) { 15774 *info = -1; 15775 } else if (*n < *k) { 15776 *info = -2; 15777 } else if (*ldq < f2cmax(1,*n)) { 15778 *info = -6; 15779 } 15780 if (*info != 0) { 15781 i__1 = -(*info); 15782 xerbla_("SLAED3", &i__1); 15783 return 0; 15784 } 15785 15786 /* Quick return if possible */ 15787 15788 if (*k == 0) { 15789 return 0; 15790 } 15791 15792 /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can 15793 be computed with high relative accuracy (barring over/underflow). 15794 This is a problem on machines without a guard digit in 15795 add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). 15796 The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), 15797 which on any of these machines zeros out the bottommost 15798 bit of DLAMDA(I) if it is 1; this makes the subsequent 15799 subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation 15800 occurs. On binary machines with a guard digit (almost all 15801 machines) it does not change DLAMDA(I) at all. On hexadecimal 15802 and decimal machines with a guard digit, it slightly 15803 changes the bottommost bits of DLAMDA(I). It does not account 15804 for hexadecimal or decimal machines without guard digits 15805 (we know of none). We use a subroutine call to compute 15806 2*DLAMBDA(I) to prevent optimizing compilers from eliminating 15807 this code. */ 15808 15809 i__1 = *k; 15810 for (i__ = 1; i__ <= i__1; ++i__) { 15811 dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; 15812 /* L10: */ 15813 } 15814 15815 i__1 = *k; 15816 for (j = 1; j <= i__1; ++j) { 15817 slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); 15818 15819 /* If the zero finder fails, the computation is terminated. */ 15820 15821 if (*info != 0) { 15822 goto L120; 15823 } 15824 /* L20: */ 15825 } 15826 15827 if (*k == 1) { 15828 goto L110; 15829 } 15830 if (*k == 2) { 15831 i__1 = *k; 15832 for (j = 1; j <= i__1; ++j) { 15833 w[1] = q_ref(1, j); 15834 w[2] = q_ref(2, j); 15835 ii = indx[1]; 15836 q_ref(1, j) = w[ii]; 15837 ii = indx[2]; 15838 q_ref(2, j) = w[ii]; 15839 /* L30: */ 15840 } 15841 goto L110; 15842 } 15843 15844 /* Compute updated W. */ 15845 15846 scopy_(k, &w[1], &c__1, &s[1], &c__1); 15847 15848 /* Initialize W(I) = Q(I,I) */ 15849 15850 i__1 = *ldq + 1; 15851 scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); 15852 i__1 = *k; 15853 for (j = 1; j <= i__1; ++j) { 15854 i__2 = j - 1; 15855 for (i__ = 1; i__ <= i__2; ++i__) { 15856 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); 15857 /* L40: */ 15858 } 15859 i__2 = *k; 15860 for (i__ = j + 1; i__ <= i__2; ++i__) { 15861 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); 15862 /* L50: */ 15863 } 15864 /* L60: */ 15865 } 15866 i__1 = *k; 15867 for (i__ = 1; i__ <= i__1; ++i__) { 15868 r__1 = sqrt(-w[i__]); 15869 w[i__] = r_sign(&r__1, &s[i__]); 15870 /* L70: */ 15871 } 15872 15873 /* Compute eigenvectors of the modified rank-1 modification. */ 15874 15875 i__1 = *k; 15876 for (j = 1; j <= i__1; ++j) { 15877 i__2 = *k; 15878 for (i__ = 1; i__ <= i__2; ++i__) { 15879 s[i__] = w[i__] / q_ref(i__, j); 15880 /* L80: */ 15881 } 15882 temp = snrm2_(k, &s[1], &c__1); 15883 i__2 = *k; 15884 for (i__ = 1; i__ <= i__2; ++i__) { 15885 ii = indx[i__]; 15886 q_ref(i__, j) = s[ii] / temp; 15887 /* L90: */ 15888 } 15889 /* L100: */ 15890 } 15891 15892 /* Compute the updated eigenvectors. */ 15893 15894 L110: 15895 15896 n2 = *n - *n1; 15897 n12 = ctot[1] + ctot[2]; 15898 n23 = ctot[2] + ctot[3]; 15899 15900 slacpy_("A", &n23, k, &q_ref(ctot[1] + 1, 1), ldq, &s[1], &n23) 15901 ; 15902 iq2 = *n1 * n12 + 1; 15903 if (n23 != 0) { 15904 sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & 15905 c_b23, &q_ref(*n1 + 1, 1), ldq); 15906 } else { 15907 slaset_("A", &n2, k, &c_b23, &c_b23, &q_ref(*n1 + 1, 1), ldq); 15908 } 15909 15910 slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); 15911 if (n12 != 0) { 15912 sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, 15913 &q[q_offset], ldq); 15914 } else { 15915 slaset_("A", n1, k, &c_b23, &c_b23, &q_ref(1, 1), ldq); 15916 } 15917 15918 15919 L120: 15920 return 0; 15921 15922 /* End of SLAED3 */ 15923 15924 } /* slaed3_ */
int slaed4_ | ( | integer * | n, | |
integer * | i__, | |||
real * | d__, | |||
real * | z__, | |||
real * | delta, | |||
real * | rho, | |||
real * | dlam, | |||
integer * | info | |||
) |
Definition at line 14435 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, FALSE_, integer, phi, slaed5_(), slaed6_(), slamch_(), sqrt(), and TRUE_.
Referenced by slaed3_(), and slaed9_().
14437 { 14438 /* -- LAPACK routine (version 3.0) -- 14439 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 14440 Courant Institute, NAG Ltd., and Rice University 14441 December 23, 1999 14442 14443 14444 Purpose 14445 ======= 14446 14447 This subroutine computes the I-th updated eigenvalue of a symmetric 14448 rank-one modification to a diagonal matrix whose elements are 14449 given in the array d, and that 14450 14451 D(i) < D(j) for i < j 14452 14453 and that RHO > 0. This is arranged by the calling routine, and is 14454 no loss in generality. The rank-one modified system is thus 14455 14456 diag( D ) + RHO * Z * Z_transpose. 14457 14458 where we assume the Euclidean norm of Z is 1. 14459 14460 The method consists of approximating the rational functions in the 14461 secular equation by simpler interpolating rational functions. 14462 14463 Arguments 14464 ========= 14465 14466 N (input) INTEGER 14467 The length of all arrays. 14468 14469 I (input) INTEGER 14470 The index of the eigenvalue to be computed. 1 <= I <= N. 14471 14472 D (input) REAL array, dimension (N) 14473 The original eigenvalues. It is assumed that they are in 14474 order, D(I) < D(J) for I < J. 14475 14476 Z (input) REAL array, dimension (N) 14477 The components of the updating vector. 14478 14479 DELTA (output) REAL array, dimension (N) 14480 If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th 14481 component. If N = 1, then DELTA(1) = 1. The vector DELTA 14482 contains the information necessary to construct the 14483 eigenvectors. 14484 14485 RHO (input) REAL 14486 The scalar in the symmetric updating formula. 14487 14488 DLAM (output) REAL 14489 The computed lambda_I, the I-th updated eigenvalue. 14490 14491 INFO (output) INTEGER 14492 = 0: successful exit 14493 > 0: if INFO = 1, the updating process failed. 14494 14495 Internal Parameters 14496 =================== 14497 14498 Logical variable ORGATI (origin-at-i?) is used for distinguishing 14499 whether D(i) or D(i+1) is treated as the origin. 14500 14501 ORGATI = .true. origin at i 14502 ORGATI = .false. origin at i+1 14503 14504 Logical variable SWTCH3 (switch-for-3-poles?) is for noting 14505 if we are working with THREE poles! 14506 14507 MAXIT is the maximum number of iterations allowed for each 14508 eigenvalue. 14509 14510 Further Details 14511 =============== 14512 14513 Based on contributions by 14514 Ren-Cang Li, Computer Science Division, University of California 14515 at Berkeley, USA 14516 14517 ===================================================================== 14518 14519 14520 Since this routine is called in an inner loop, we do no argument 14521 checking. 14522 14523 Quick return for N=1 and 2. 14524 14525 Parameter adjustments */ 14526 /* System generated locals */ 14527 integer i__1; 14528 real r__1; 14529 /* Builtin functions */ 14530 // double sqrt(doublereal); 14531 /* Local variables */ 14532 static real dphi, dpsi; 14533 static integer iter; 14534 static real temp, prew, temp1, a, b, c__; 14535 static integer j; 14536 static real w, dltlb, dltub, midpt; 14537 static integer niter; 14538 static logical swtch; 14539 extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *, 14540 real *, real *), slaed6_(integer *, logical *, real *, real *, 14541 real *, real *, real *, integer *); 14542 static logical swtch3; 14543 static integer ii; 14544 static real dw; 14545 extern doublereal slamch_(const char *); 14546 static real zz[3]; 14547 static logical orgati; 14548 static real erretm, rhoinv; 14549 static integer ip1; 14550 static real del, eta, phi, eps, tau, psi; 14551 static integer iim1, iip1; 14552 14553 --delta; 14554 --z__; 14555 --d__; 14556 14557 /* Function Body */ 14558 *info = 0; 14559 if (*n == 1) { 14560 14561 /* Presumably, I=1 upon entry */ 14562 14563 *dlam = d__[1] + *rho * z__[1] * z__[1]; 14564 delta[1] = 1.f; 14565 return 0; 14566 } 14567 if (*n == 2) { 14568 slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); 14569 return 0; 14570 } 14571 14572 /* Compute machine epsilon */ 14573 14574 eps = slamch_("Epsilon"); 14575 rhoinv = 1.f / *rho; 14576 14577 /* The case I = N */ 14578 14579 if (*i__ == *n) { 14580 14581 /* Initialize some basic variables */ 14582 14583 ii = *n - 1; 14584 niter = 1; 14585 14586 /* Calculate initial guess */ 14587 14588 midpt = *rho / 2.f; 14589 14590 /* If ||Z||_2 is not one, then TEMP should be set to 14591 RHO * ||Z||_2^2 / TWO */ 14592 14593 i__1 = *n; 14594 for (j = 1; j <= i__1; ++j) { 14595 delta[j] = d__[j] - d__[*i__] - midpt; 14596 /* L10: */ 14597 } 14598 14599 psi = 0.f; 14600 i__1 = *n - 2; 14601 for (j = 1; j <= i__1; ++j) { 14602 psi += z__[j] * z__[j] / delta[j]; 14603 /* L20: */ 14604 } 14605 14606 c__ = rhoinv + psi; 14607 w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* 14608 n]; 14609 14610 if (w <= 0.f) { 14611 temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) 14612 + z__[*n] * z__[*n] / *rho; 14613 if (c__ <= temp) { 14614 tau = *rho; 14615 } else { 14616 del = d__[*n] - d__[*n - 1]; 14617 a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] 14618 ; 14619 b = z__[*n] * z__[*n] * del; 14620 if (a < 0.f) { 14621 tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); 14622 } else { 14623 tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); 14624 } 14625 } 14626 14627 /* It can be proved that 14628 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */ 14629 14630 dltlb = midpt; 14631 dltub = *rho; 14632 } else { 14633 del = d__[*n] - d__[*n - 1]; 14634 a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; 14635 b = z__[*n] * z__[*n] * del; 14636 if (a < 0.f) { 14637 tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); 14638 } else { 14639 tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); 14640 } 14641 14642 /* It can be proved that 14643 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */ 14644 14645 dltlb = 0.f; 14646 dltub = midpt; 14647 } 14648 14649 i__1 = *n; 14650 for (j = 1; j <= i__1; ++j) { 14651 delta[j] = d__[j] - d__[*i__] - tau; 14652 /* L30: */ 14653 } 14654 14655 /* Evaluate PSI and the derivative DPSI */ 14656 14657 dpsi = 0.f; 14658 psi = 0.f; 14659 erretm = 0.f; 14660 i__1 = ii; 14661 for (j = 1; j <= i__1; ++j) { 14662 temp = z__[j] / delta[j]; 14663 psi += z__[j] * temp; 14664 dpsi += temp * temp; 14665 erretm += psi; 14666 /* L40: */ 14667 } 14668 erretm = dabs(erretm); 14669 14670 /* Evaluate PHI and the derivative DPHI */ 14671 14672 temp = z__[*n] / delta[*n]; 14673 phi = z__[*n] * temp; 14674 dphi = temp * temp; 14675 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( 14676 dpsi + dphi); 14677 14678 w = rhoinv + phi + psi; 14679 14680 /* Test for convergence */ 14681 14682 if (dabs(w) <= eps * erretm) { 14683 *dlam = d__[*i__] + tau; 14684 goto L250; 14685 } 14686 14687 if (w <= 0.f) { 14688 dltlb = df2cmax(dltlb,tau); 14689 } else { 14690 dltub = df2cmin(dltub,tau); 14691 } 14692 14693 /* Calculate the new step */ 14694 14695 ++niter; 14696 c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; 14697 a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( 14698 dpsi + dphi); 14699 b = delta[*n - 1] * delta[*n] * w; 14700 if (c__ < 0.f) { 14701 c__ = dabs(c__); 14702 } 14703 if (c__ == 0.f) { 14704 /* ETA = B/A 14705 ETA = RHO - TAU */ 14706 eta = dltub - tau; 14707 } else if (a >= 0.f) { 14708 eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( 14709 c__ * 2.f); 14710 } else { 14711 eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( 14712 r__1)))); 14713 } 14714 14715 /* Note, eta should be positive if w is negative, and 14716 eta should be negative otherwise. However, 14717 if for some reason caused by roundoff, eta*w > 0, 14718 we simply use one Newton step instead. This way 14719 will guarantee eta*w < 0. */ 14720 14721 if (w * eta > 0.f) { 14722 eta = -w / (dpsi + dphi); 14723 } 14724 temp = tau + eta; 14725 if (temp > dltub || temp < dltlb) { 14726 if (w < 0.f) { 14727 eta = (dltub - tau) / 2.f; 14728 } else { 14729 eta = (dltlb - tau) / 2.f; 14730 } 14731 } 14732 i__1 = *n; 14733 for (j = 1; j <= i__1; ++j) { 14734 delta[j] -= eta; 14735 /* L50: */ 14736 } 14737 14738 tau += eta; 14739 14740 /* Evaluate PSI and the derivative DPSI */ 14741 14742 dpsi = 0.f; 14743 psi = 0.f; 14744 erretm = 0.f; 14745 i__1 = ii; 14746 for (j = 1; j <= i__1; ++j) { 14747 temp = z__[j] / delta[j]; 14748 psi += z__[j] * temp; 14749 dpsi += temp * temp; 14750 erretm += psi; 14751 /* L60: */ 14752 } 14753 erretm = dabs(erretm); 14754 14755 /* Evaluate PHI and the derivative DPHI */ 14756 14757 temp = z__[*n] / delta[*n]; 14758 phi = z__[*n] * temp; 14759 dphi = temp * temp; 14760 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( 14761 dpsi + dphi); 14762 14763 w = rhoinv + phi + psi; 14764 14765 /* Main loop to update the values of the array DELTA */ 14766 14767 iter = niter + 1; 14768 14769 for (niter = iter; niter <= 30; ++niter) { 14770 14771 /* Test for convergence */ 14772 14773 if (dabs(w) <= eps * erretm) { 14774 *dlam = d__[*i__] + tau; 14775 goto L250; 14776 } 14777 14778 if (w <= 0.f) { 14779 dltlb = df2cmax(dltlb,tau); 14780 } else { 14781 dltub = df2cmin(dltub,tau); 14782 } 14783 14784 /* Calculate the new step */ 14785 14786 c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; 14787 a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * 14788 (dpsi + dphi); 14789 b = delta[*n - 1] * delta[*n] * w; 14790 if (a >= 0.f) { 14791 eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / 14792 (c__ * 2.f); 14793 } else { 14794 eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( 14795 r__1)))); 14796 } 14797 14798 /* Note, eta should be positive if w is negative, and 14799 eta should be negative otherwise. However, 14800 if for some reason caused by roundoff, eta*w > 0, 14801 we simply use one Newton step instead. This way 14802 will guarantee eta*w < 0. */ 14803 14804 if (w * eta > 0.f) { 14805 eta = -w / (dpsi + dphi); 14806 } 14807 temp = tau + eta; 14808 if (temp > dltub || temp < dltlb) { 14809 if (w < 0.f) { 14810 eta = (dltub - tau) / 2.f; 14811 } else { 14812 eta = (dltlb - tau) / 2.f; 14813 } 14814 } 14815 i__1 = *n; 14816 for (j = 1; j <= i__1; ++j) { 14817 delta[j] -= eta; 14818 /* L70: */ 14819 } 14820 14821 tau += eta; 14822 14823 /* Evaluate PSI and the derivative DPSI */ 14824 14825 dpsi = 0.f; 14826 psi = 0.f; 14827 erretm = 0.f; 14828 i__1 = ii; 14829 for (j = 1; j <= i__1; ++j) { 14830 temp = z__[j] / delta[j]; 14831 psi += z__[j] * temp; 14832 dpsi += temp * temp; 14833 erretm += psi; 14834 /* L80: */ 14835 } 14836 erretm = dabs(erretm); 14837 14838 /* Evaluate PHI and the derivative DPHI */ 14839 14840 temp = z__[*n] / delta[*n]; 14841 phi = z__[*n] * temp; 14842 dphi = temp * temp; 14843 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * 14844 (dpsi + dphi); 14845 14846 w = rhoinv + phi + psi; 14847 /* L90: */ 14848 } 14849 14850 /* Return with INFO = 1, NITER = MAXIT and not converged */ 14851 14852 *info = 1; 14853 *dlam = d__[*i__] + tau; 14854 goto L250; 14855 14856 /* End for the case I = N */ 14857 14858 } else { 14859 14860 /* The case for I < N */ 14861 14862 niter = 1; 14863 ip1 = *i__ + 1; 14864 14865 /* Calculate initial guess */ 14866 14867 del = d__[ip1] - d__[*i__]; 14868 midpt = del / 2.f; 14869 i__1 = *n; 14870 for (j = 1; j <= i__1; ++j) { 14871 delta[j] = d__[j] - d__[*i__] - midpt; 14872 /* L100: */ 14873 } 14874 14875 psi = 0.f; 14876 i__1 = *i__ - 1; 14877 for (j = 1; j <= i__1; ++j) { 14878 psi += z__[j] * z__[j] / delta[j]; 14879 /* L110: */ 14880 } 14881 14882 phi = 0.f; 14883 i__1 = *i__ + 2; 14884 for (j = *n; j >= i__1; --j) { 14885 phi += z__[j] * z__[j] / delta[j]; 14886 /* L120: */ 14887 } 14888 c__ = rhoinv + psi + phi; 14889 w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / 14890 delta[ip1]; 14891 14892 if (w > 0.f) { 14893 14894 /* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 14895 14896 We choose d(i) as origin. */ 14897 14898 orgati = TRUE_; 14899 a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; 14900 b = z__[*i__] * z__[*i__] * del; 14901 if (a > 0.f) { 14902 tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( 14903 r__1)))); 14904 } else { 14905 tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / 14906 (c__ * 2.f); 14907 } 14908 dltlb = 0.f; 14909 dltub = midpt; 14910 } else { 14911 14912 /* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) 14913 14914 We choose d(i+1) as origin. */ 14915 14916 orgati = FALSE_; 14917 a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; 14918 b = z__[ip1] * z__[ip1] * del; 14919 if (a < 0.f) { 14920 tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( 14921 r__1)))); 14922 } else { 14923 tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) 14924 / (c__ * 2.f); 14925 } 14926 dltlb = -midpt; 14927 dltub = 0.f; 14928 } 14929 14930 if (orgati) { 14931 i__1 = *n; 14932 for (j = 1; j <= i__1; ++j) { 14933 delta[j] = d__[j] - d__[*i__] - tau; 14934 /* L130: */ 14935 } 14936 } else { 14937 i__1 = *n; 14938 for (j = 1; j <= i__1; ++j) { 14939 delta[j] = d__[j] - d__[ip1] - tau; 14940 /* L140: */ 14941 } 14942 } 14943 if (orgati) { 14944 ii = *i__; 14945 } else { 14946 ii = *i__ + 1; 14947 } 14948 iim1 = ii - 1; 14949 iip1 = ii + 1; 14950 14951 /* Evaluate PSI and the derivative DPSI */ 14952 14953 dpsi = 0.f; 14954 psi = 0.f; 14955 erretm = 0.f; 14956 i__1 = iim1; 14957 for (j = 1; j <= i__1; ++j) { 14958 temp = z__[j] / delta[j]; 14959 psi += z__[j] * temp; 14960 dpsi += temp * temp; 14961 erretm += psi; 14962 /* L150: */ 14963 } 14964 erretm = dabs(erretm); 14965 14966 /* Evaluate PHI and the derivative DPHI */ 14967 14968 dphi = 0.f; 14969 phi = 0.f; 14970 i__1 = iip1; 14971 for (j = *n; j >= i__1; --j) { 14972 temp = z__[j] / delta[j]; 14973 phi += z__[j] * temp; 14974 dphi += temp * temp; 14975 erretm += phi; 14976 /* L160: */ 14977 } 14978 14979 w = rhoinv + phi + psi; 14980 14981 /* W is the value of the secular function with 14982 its ii-th element removed. */ 14983 14984 swtch3 = FALSE_; 14985 if (orgati) { 14986 if (w < 0.f) { 14987 swtch3 = TRUE_; 14988 } 14989 } else { 14990 if (w > 0.f) { 14991 swtch3 = TRUE_; 14992 } 14993 } 14994 if (ii == 1 || ii == *n) { 14995 swtch3 = FALSE_; 14996 } 14997 14998 temp = z__[ii] / delta[ii]; 14999 dw = dpsi + dphi + temp * temp; 15000 temp = z__[ii] * temp; 15001 w += temp; 15002 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f 15003 + dabs(tau) * dw; 15004 15005 /* Test for convergence */ 15006 15007 if (dabs(w) <= eps * erretm) { 15008 if (orgati) { 15009 *dlam = d__[*i__] + tau; 15010 } else { 15011 *dlam = d__[ip1] + tau; 15012 } 15013 goto L250; 15014 } 15015 15016 if (w <= 0.f) { 15017 dltlb = df2cmax(dltlb,tau); 15018 } else { 15019 dltub = df2cmin(dltub,tau); 15020 } 15021 15022 /* Calculate the new step */ 15023 15024 ++niter; 15025 if (! swtch3) { 15026 if (orgati) { 15027 /* Computing 2nd power */ 15028 r__1 = z__[*i__] / delta[*i__]; 15029 c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 * 15030 r__1); 15031 } else { 15032 /* Computing 2nd power */ 15033 r__1 = z__[ip1] / delta[ip1]; 15034 c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 * 15035 r__1); 15036 } 15037 a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * 15038 dw; 15039 b = delta[*i__] * delta[ip1] * w; 15040 if (c__ == 0.f) { 15041 if (a == 0.f) { 15042 if (orgati) { 15043 a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * 15044 (dpsi + dphi); 15045 } else { 15046 a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * 15047 (dpsi + dphi); 15048 } 15049 } 15050 eta = b / a; 15051 } else if (a <= 0.f) { 15052 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / 15053 (c__ * 2.f); 15054 } else { 15055 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( 15056 r__1)))); 15057 } 15058 } else { 15059 15060 /* Interpolation using THREE most relevant poles */ 15061 15062 temp = rhoinv + psi + phi; 15063 if (orgati) { 15064 temp1 = z__[iim1] / delta[iim1]; 15065 temp1 *= temp1; 15066 c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ 15067 iip1]) * temp1; 15068 zz[0] = z__[iim1] * z__[iim1]; 15069 zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); 15070 } else { 15071 temp1 = z__[iip1] / delta[iip1]; 15072 temp1 *= temp1; 15073 c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ 15074 iim1]) * temp1; 15075 zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); 15076 zz[2] = z__[iip1] * z__[iip1]; 15077 } 15078 zz[1] = z__[ii] * z__[ii]; 15079 slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); 15080 if (*info != 0) { 15081 goto L250; 15082 } 15083 } 15084 15085 /* Note, eta should be positive if w is negative, and 15086 eta should be negative otherwise. However, 15087 if for some reason caused by roundoff, eta*w > 0, 15088 we simply use one Newton step instead. This way 15089 will guarantee eta*w < 0. */ 15090 15091 if (w * eta >= 0.f) { 15092 eta = -w / dw; 15093 } 15094 temp = tau + eta; 15095 if (temp > dltub || temp < dltlb) { 15096 if (w < 0.f) { 15097 eta = (dltub - tau) / 2.f; 15098 } else { 15099 eta = (dltlb - tau) / 2.f; 15100 } 15101 } 15102 15103 prew = w; 15104 15105 /* L170: */ 15106 i__1 = *n; 15107 for (j = 1; j <= i__1; ++j) { 15108 delta[j] -= eta; 15109 /* L180: */ 15110 } 15111 15112 /* Evaluate PSI and the derivative DPSI */ 15113 15114 dpsi = 0.f; 15115 psi = 0.f; 15116 erretm = 0.f; 15117 i__1 = iim1; 15118 for (j = 1; j <= i__1; ++j) { 15119 temp = z__[j] / delta[j]; 15120 psi += z__[j] * temp; 15121 dpsi += temp * temp; 15122 erretm += psi; 15123 /* L190: */ 15124 } 15125 erretm = dabs(erretm); 15126 15127 /* Evaluate PHI and the derivative DPHI */ 15128 15129 dphi = 0.f; 15130 phi = 0.f; 15131 i__1 = iip1; 15132 for (j = *n; j >= i__1; --j) { 15133 temp = z__[j] / delta[j]; 15134 phi += z__[j] * temp; 15135 dphi += temp * temp; 15136 erretm += phi; 15137 /* L200: */ 15138 } 15139 15140 temp = z__[ii] / delta[ii]; 15141 dw = dpsi + dphi + temp * temp; 15142 temp = z__[ii] * temp; 15143 w = rhoinv + phi + psi + temp; 15144 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f 15145 + (r__1 = tau + eta, dabs(r__1)) * dw; 15146 15147 swtch = FALSE_; 15148 if (orgati) { 15149 if (-w > dabs(prew) / 10.f) { 15150 swtch = TRUE_; 15151 } 15152 } else { 15153 if (w > dabs(prew) / 10.f) { 15154 swtch = TRUE_; 15155 } 15156 } 15157 15158 tau += eta; 15159 15160 /* Main loop to update the values of the array DELTA */ 15161 15162 iter = niter + 1; 15163 15164 for (niter = iter; niter <= 30; ++niter) { 15165 15166 /* Test for convergence */ 15167 15168 if (dabs(w) <= eps * erretm) { 15169 if (orgati) { 15170 *dlam = d__[*i__] + tau; 15171 } else { 15172 *dlam = d__[ip1] + tau; 15173 } 15174 goto L250; 15175 } 15176 15177 if (w <= 0.f) { 15178 dltlb = df2cmax(dltlb,tau); 15179 } else { 15180 dltub = df2cmin(dltub,tau); 15181 } 15182 15183 /* Calculate the new step */ 15184 15185 if (! swtch3) { 15186 if (! swtch) { 15187 if (orgati) { 15188 /* Computing 2nd power */ 15189 r__1 = z__[*i__] / delta[*i__]; 15190 c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( 15191 r__1 * r__1); 15192 } else { 15193 /* Computing 2nd power */ 15194 r__1 = z__[ip1] / delta[ip1]; 15195 c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * 15196 (r__1 * r__1); 15197 } 15198 } else { 15199 temp = z__[ii] / delta[ii]; 15200 if (orgati) { 15201 dpsi += temp * temp; 15202 } else { 15203 dphi += temp * temp; 15204 } 15205 c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; 15206 } 15207 a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] 15208 * dw; 15209 b = delta[*i__] * delta[ip1] * w; 15210 if (c__ == 0.f) { 15211 if (a == 0.f) { 15212 if (! swtch) { 15213 if (orgati) { 15214 a = z__[*i__] * z__[*i__] + delta[ip1] * 15215 delta[ip1] * (dpsi + dphi); 15216 } else { 15217 a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ 15218 *i__] * (dpsi + dphi); 15219 } 15220 } else { 15221 a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] 15222 * delta[ip1] * dphi; 15223 } 15224 } 15225 eta = b / a; 15226 } else if (a <= 0.f) { 15227 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) 15228 )) / (c__ * 2.f); 15229 } else { 15230 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, 15231 dabs(r__1)))); 15232 } 15233 } else { 15234 15235 /* Interpolation using THREE most relevant poles */ 15236 15237 temp = rhoinv + psi + phi; 15238 if (swtch) { 15239 c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; 15240 zz[0] = delta[iim1] * delta[iim1] * dpsi; 15241 zz[2] = delta[iip1] * delta[iip1] * dphi; 15242 } else { 15243 if (orgati) { 15244 temp1 = z__[iim1] / delta[iim1]; 15245 temp1 *= temp1; 15246 c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] 15247 - d__[iip1]) * temp1; 15248 zz[0] = z__[iim1] * z__[iim1]; 15249 zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + 15250 dphi); 15251 } else { 15252 temp1 = z__[iip1] / delta[iip1]; 15253 temp1 *= temp1; 15254 c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] 15255 - d__[iim1]) * temp1; 15256 zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - 15257 temp1)); 15258 zz[2] = z__[iip1] * z__[iip1]; 15259 } 15260 } 15261 slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, 15262 info); 15263 if (*info != 0) { 15264 goto L250; 15265 } 15266 } 15267 15268 /* Note, eta should be positive if w is negative, and 15269 eta should be negative otherwise. However, 15270 if for some reason caused by roundoff, eta*w > 0, 15271 we simply use one Newton step instead. This way 15272 will guarantee eta*w < 0. */ 15273 15274 if (w * eta >= 0.f) { 15275 eta = -w / dw; 15276 } 15277 temp = tau + eta; 15278 if (temp > dltub || temp < dltlb) { 15279 if (w < 0.f) { 15280 eta = (dltub - tau) / 2.f; 15281 } else { 15282 eta = (dltlb - tau) / 2.f; 15283 } 15284 } 15285 15286 i__1 = *n; 15287 for (j = 1; j <= i__1; ++j) { 15288 delta[j] -= eta; 15289 /* L210: */ 15290 } 15291 15292 tau += eta; 15293 prew = w; 15294 15295 /* Evaluate PSI and the derivative DPSI */ 15296 15297 dpsi = 0.f; 15298 psi = 0.f; 15299 erretm = 0.f; 15300 i__1 = iim1; 15301 for (j = 1; j <= i__1; ++j) { 15302 temp = z__[j] / delta[j]; 15303 psi += z__[j] * temp; 15304 dpsi += temp * temp; 15305 erretm += psi; 15306 /* L220: */ 15307 } 15308 erretm = dabs(erretm); 15309 15310 /* Evaluate PHI and the derivative DPHI */ 15311 15312 dphi = 0.f; 15313 phi = 0.f; 15314 i__1 = iip1; 15315 for (j = *n; j >= i__1; --j) { 15316 temp = z__[j] / delta[j]; 15317 phi += z__[j] * temp; 15318 dphi += temp * temp; 15319 erretm += phi; 15320 /* L230: */ 15321 } 15322 15323 temp = z__[ii] / delta[ii]; 15324 dw = dpsi + dphi + temp * temp; 15325 temp = z__[ii] * temp; 15326 w = rhoinv + phi + psi + temp; 15327 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 15328 3.f + dabs(tau) * dw; 15329 if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { 15330 swtch = ! swtch; 15331 } 15332 15333 /* L240: */ 15334 } 15335 15336 /* Return with INFO = 1, NITER = MAXIT and not converged */ 15337 15338 *info = 1; 15339 if (orgati) { 15340 *dlam = d__[*i__] + tau; 15341 } else { 15342 *dlam = d__[ip1] + tau; 15343 } 15344 15345 } 15346 15347 L250: 15348 15349 return 0; 15350 15351 /* End of SLAED4 */ 15352 15353 } /* slaed4_ */
int slaed5_ | ( | integer * | i__, | |
real * | d__, | |||
real * | z__, | |||
real * | delta, | |||
real * | rho, | |||
real * | dlam | |||
) |
Definition at line 16243 of file lapackblas.cpp.
Referenced by slaed4_().
16245 { 16246 /* -- LAPACK routine (version 3.0) -- 16247 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 16248 Courant Institute, NAG Ltd., and Rice University 16249 September 30, 1994 16250 16251 16252 Purpose 16253 ======= 16254 16255 This subroutine computes the I-th eigenvalue of a symmetric rank-one 16256 modification of a 2-by-2 diagonal matrix 16257 16258 diag( D ) + RHO * Z * transpose(Z) . 16259 16260 The diagonal elements in the array D are assumed to satisfy 16261 16262 D(i) < D(j) for i < j . 16263 16264 We also assume RHO > 0 and that the Euclidean norm of the vector 16265 Z is one. 16266 16267 Arguments 16268 ========= 16269 16270 I (input) INTEGER 16271 The index of the eigenvalue to be computed. I = 1 or I = 2. 16272 16273 D (input) REAL array, dimension (2) 16274 The original eigenvalues. We assume D(1) < D(2). 16275 16276 Z (input) REAL array, dimension (2) 16277 The components of the updating vector. 16278 16279 DELTA (output) REAL array, dimension (2) 16280 The vector DELTA contains the information necessary 16281 to construct the eigenvectors. 16282 16283 RHO (input) REAL 16284 The scalar in the symmetric updating formula. 16285 16286 DLAM (output) REAL 16287 The computed lambda_I, the I-th updated eigenvalue. 16288 16289 Further Details 16290 =============== 16291 16292 Based on contributions by 16293 Ren-Cang Li, Computer Science Division, University of California 16294 at Berkeley, USA 16295 16296 ===================================================================== 16297 16298 16299 Parameter adjustments */ 16300 /* System generated locals */ 16301 real r__1; 16302 /* Builtin functions */ 16303 // double sqrt(doublereal); 16304 /* Local variables */ 16305 static real temp, b, c__, w, del, tau; 16306 16307 --delta; 16308 --z__; 16309 --d__; 16310 16311 /* Function Body */ 16312 del = d__[2] - d__[1]; 16313 if (*i__ == 1) { 16314 w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f; 16315 if (w > 0.f) { 16316 b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); 16317 c__ = *rho * z__[1] * z__[1] * del; 16318 16319 /* B > ZERO, always */ 16320 16321 tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) 16322 )); 16323 *dlam = d__[1] + tau; 16324 delta[1] = -z__[1] / tau; 16325 delta[2] = z__[2] / (del - tau); 16326 } else { 16327 b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); 16328 c__ = *rho * z__[2] * z__[2] * del; 16329 if (b > 0.f) { 16330 tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); 16331 } else { 16332 tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; 16333 } 16334 *dlam = d__[2] + tau; 16335 delta[1] = -z__[1] / (del + tau); 16336 delta[2] = -z__[2] / tau; 16337 } 16338 temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); 16339 delta[1] /= temp; 16340 delta[2] /= temp; 16341 } else { 16342 16343 /* Now I=2 */ 16344 16345 b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); 16346 c__ = *rho * z__[2] * z__[2] * del; 16347 if (b > 0.f) { 16348 tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; 16349 } else { 16350 tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); 16351 } 16352 *dlam = d__[2] + tau; 16353 delta[1] = -z__[1] / (del + tau); 16354 delta[2] = -z__[2] / tau; 16355 temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); 16356 delta[1] /= temp; 16357 delta[2] /= temp; 16358 } 16359 return 0; 16360 16361 /* End OF SLAED5 */ 16362 16363 } /* slaed5_ */
int slaed6_ | ( | integer * | kniter, | |
logical * | orgati, | |||
real * | rho, | |||
real * | d__, | |||
real * | z__, | |||
real * | finit, | |||
real * | tau, | |||
integer * | info | |||
) |
Definition at line 15929 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, f2cmax, FALSE_, integer, log(), pow_ri(), slamch_(), sqrt(), and TRUE_.
Referenced by slaed4_().
15931 { 15932 /* -- LAPACK routine (version 3.0) -- 15933 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 15934 Courant Institute, NAG Ltd., and Rice University 15935 June 30, 1999 15936 15937 15938 Purpose 15939 ======= 15940 15941 SLAED6 computes the positive or negative root (closest to the origin) 15942 of 15943 z(1) z(2) z(3) 15944 f(x) = rho + --------- + ---------- + --------- 15945 d(1)-x d(2)-x d(3)-x 15946 15947 It is assumed that 15948 15949 if ORGATI = .true. the root is between d(2) and d(3); 15950 otherwise it is between d(1) and d(2) 15951 15952 This routine will be called by SLAED4 when necessary. In most cases, 15953 the root sought is the smallest in magnitude, though it might not be 15954 in some extremely rare situations. 15955 15956 Arguments 15957 ========= 15958 15959 KNITER (input) INTEGER 15960 Refer to SLAED4 for its significance. 15961 15962 ORGATI (input) LOGICAL 15963 If ORGATI is true, the needed root is between d(2) and 15964 d(3); otherwise it is between d(1) and d(2). See 15965 SLAED4 for further details. 15966 15967 RHO (input) REAL 15968 Refer to the equation f(x) above. 15969 15970 D (input) REAL array, dimension (3) 15971 D satisfies d(1) < d(2) < d(3). 15972 15973 Z (input) REAL array, dimension (3) 15974 Each of the elements in z must be positive. 15975 15976 FINIT (input) REAL 15977 The value of f at 0. It is more accurate than the one 15978 evaluated inside this routine (if someone wants to do 15979 so). 15980 15981 TAU (output) REAL 15982 The root of the equation f(x). 15983 15984 INFO (output) INTEGER 15985 = 0: successful exit 15986 > 0: if INFO = 1, failure to converge 15987 15988 Further Details 15989 =============== 15990 15991 Based on contributions by 15992 Ren-Cang Li, Computer Science Division, University of California 15993 at Berkeley, USA 15994 15995 ===================================================================== 15996 15997 Parameter adjustments */ 15998 /* Initialized data */ 15999 static logical first = TRUE_; 16000 /* System generated locals */ 16001 integer i__1; 16002 real r__1, r__2, r__3, r__4; 16003 /* Builtin functions */ 16004 // double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *); 16005 /* Local variables */ 16006 static real base; 16007 static integer iter; 16008 static real temp, temp1, temp2, temp3, temp4, a, b, c__, f; 16009 static integer i__; 16010 static logical scale; 16011 static integer niter; 16012 static real small1, small2, fc, df, sminv1, sminv2, dscale[3], sclfac; 16013 extern doublereal slamch_(const char *); 16014 static real zscale[3], erretm, sclinv, ddf, eta, eps; 16015 16016 --z__; 16017 --d__; 16018 16019 /* Function Body */ 16020 16021 *info = 0; 16022 16023 niter = 1; 16024 *tau = 0.f; 16025 if (*kniter == 2) { 16026 if (*orgati) { 16027 temp = (d__[3] - d__[2]) / 2.f; 16028 c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); 16029 a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; 16030 b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; 16031 } else { 16032 temp = (d__[1] - d__[2]) / 2.f; 16033 c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); 16034 a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; 16035 b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; 16036 } 16037 /* Computing MAX */ 16038 r__1 = dabs(a), r__2 = dabs(b), r__1 = f2cmax(r__1,r__2), r__2 = dabs( 16039 c__); 16040 temp = df2cmax(r__1,r__2); 16041 a /= temp; 16042 b /= temp; 16043 c__ /= temp; 16044 if (c__ == 0.f) { 16045 *tau = b / a; 16046 } else if (a <= 0.f) { 16047 *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( 16048 c__ * 2.f); 16049 } else { 16050 *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( 16051 r__1)))); 16052 } 16053 temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) + 16054 z__[3] / (d__[3] - *tau); 16055 if (dabs(*finit) <= dabs(temp)) { 16056 *tau = 0.f; 16057 } 16058 } 16059 16060 /* On first call to routine, get machine parameters for 16061 possible scaling to avoid overflow */ 16062 16063 if (first) { 16064 eps = slamch_("Epsilon"); 16065 base = slamch_("Base"); 16066 i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f) 16067 ; 16068 small1 = pow_ri(&base, &i__1); 16069 sminv1 = 1.f / small1; 16070 small2 = small1 * small1; 16071 sminv2 = sminv1 * sminv1; 16072 first = FALSE_; 16073 } 16074 16075 /* Determine if scaling of inputs necessary to avoid overflow 16076 when computing 1/TEMP**3 */ 16077 16078 if (*orgati) { 16079 /* Computing MIN */ 16080 r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - * 16081 tau, dabs(r__2)); 16082 temp = df2cmin(r__3,r__4); 16083 } else { 16084 /* Computing MIN */ 16085 r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - * 16086 tau, dabs(r__2)); 16087 temp = df2cmin(r__3,r__4); 16088 } 16089 scale = FALSE_; 16090 if (temp <= small1) { 16091 scale = TRUE_; 16092 if (temp <= small2) { 16093 16094 /* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ 16095 16096 sclfac = sminv2; 16097 sclinv = small2; 16098 } else { 16099 16100 /* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ 16101 16102 sclfac = sminv1; 16103 sclinv = small1; 16104 } 16105 16106 /* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ 16107 16108 for (i__ = 1; i__ <= 3; ++i__) { 16109 dscale[i__ - 1] = d__[i__] * sclfac; 16110 zscale[i__ - 1] = z__[i__] * sclfac; 16111 /* L10: */ 16112 } 16113 *tau *= sclfac; 16114 } else { 16115 16116 /* Copy D and Z to DSCALE and ZSCALE */ 16117 16118 for (i__ = 1; i__ <= 3; ++i__) { 16119 dscale[i__ - 1] = d__[i__]; 16120 zscale[i__ - 1] = z__[i__]; 16121 /* L20: */ 16122 } 16123 } 16124 16125 fc = 0.f; 16126 df = 0.f; 16127 ddf = 0.f; 16128 for (i__ = 1; i__ <= 3; ++i__) { 16129 temp = 1.f / (dscale[i__ - 1] - *tau); 16130 temp1 = zscale[i__ - 1] * temp; 16131 temp2 = temp1 * temp; 16132 temp3 = temp2 * temp; 16133 fc += temp1 / dscale[i__ - 1]; 16134 df += temp2; 16135 ddf += temp3; 16136 /* L30: */ 16137 } 16138 f = *finit + *tau * fc; 16139 16140 if (dabs(f) <= 0.f) { 16141 goto L60; 16142 } 16143 16144 /* Iteration begins 16145 16146 It is not hard to see that 16147 16148 1) Iterations will go up monotonically 16149 if FINIT < 0; 16150 16151 2) Iterations will go down monotonically 16152 if FINIT > 0. */ 16153 16154 iter = niter + 1; 16155 16156 for (niter = iter; niter <= 20; ++niter) { 16157 16158 if (*orgati) { 16159 temp1 = dscale[1] - *tau; 16160 temp2 = dscale[2] - *tau; 16161 } else { 16162 temp1 = dscale[0] - *tau; 16163 temp2 = dscale[1] - *tau; 16164 } 16165 a = (temp1 + temp2) * f - temp1 * temp2 * df; 16166 b = temp1 * temp2 * f; 16167 c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; 16168 /* Computing MAX */ 16169 r__1 = dabs(a), r__2 = dabs(b), r__1 = f2cmax(r__1,r__2), r__2 = dabs( 16170 c__); 16171 temp = df2cmax(r__1,r__2); 16172 a /= temp; 16173 b /= temp; 16174 c__ /= temp; 16175 if (c__ == 0.f) { 16176 eta = b / a; 16177 } else if (a <= 0.f) { 16178 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( 16179 c__ * 2.f); 16180 } else { 16181 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( 16182 r__1)))); 16183 } 16184 if (f * eta >= 0.f) { 16185 eta = -f / df; 16186 } 16187 16188 temp = eta + *tau; 16189 if (*orgati) { 16190 if (eta > 0.f && temp >= dscale[2]) { 16191 eta = (dscale[2] - *tau) / 2.f; 16192 } 16193 if (eta < 0.f && temp <= dscale[1]) { 16194 eta = (dscale[1] - *tau) / 2.f; 16195 } 16196 } else { 16197 if (eta > 0.f && temp >= dscale[1]) { 16198 eta = (dscale[1] - *tau) / 2.f; 16199 } 16200 if (eta < 0.f && temp <= dscale[0]) { 16201 eta = (dscale[0] - *tau) / 2.f; 16202 } 16203 } 16204 *tau += eta; 16205 16206 fc = 0.f; 16207 erretm = 0.f; 16208 df = 0.f; 16209 ddf = 0.f; 16210 for (i__ = 1; i__ <= 3; ++i__) { 16211 temp = 1.f / (dscale[i__ - 1] - *tau); 16212 temp1 = zscale[i__ - 1] * temp; 16213 temp2 = temp1 * temp; 16214 temp3 = temp2 * temp; 16215 temp4 = temp1 / dscale[i__ - 1]; 16216 fc += temp4; 16217 erretm += dabs(temp4); 16218 df += temp2; 16219 ddf += temp3; 16220 /* L40: */ 16221 } 16222 f = *finit + *tau * fc; 16223 erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df; 16224 if (dabs(f) <= eps * erretm) { 16225 goto L60; 16226 } 16227 /* L50: */ 16228 } 16229 *info = 1; 16230 L60: 16231 16232 /* Undo scaling */ 16233 16234 if (scale) { 16235 *tau *= sclinv; 16236 } 16237 return 0; 16238 16239 /* End of SLAED6 */ 16240 16241 } /* slaed6_ */
int slaed7_ | ( | integer * | icompq, | |
integer * | n, | |||
integer * | qsiz, | |||
integer * | tlvls, | |||
integer * | curlvl, | |||
integer * | curpbm, | |||
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
integer * | indxq, | |||
real * | rho, | |||
integer * | cutpnt, | |||
real * | qstore, | |||
integer * | qptr, | |||
integer * | prmptr, | |||
integer * | perm, | |||
integer * | givptr, | |||
integer * | givcol, | |||
real * | givnum, | |||
real * | work, | |||
integer * | iwork, | |||
integer * | info | |||
) |
Definition at line 12334 of file lapackblas.cpp.
References c__1, c__2, c_n1, f2cmax, f2cmin, givcol_ref, givnum_ref, integer, pow_ii(), sgemm_(), slaed8_(), slaed9_(), slaeda_(), slamrg_(), and xerbla_().
Referenced by slaed0_().
12340 { 12341 /* -- LAPACK routine (version 3.0) -- 12342 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 12343 Courant Institute, Argonne National Lab, and Rice University 12344 September 30, 1994 12345 12346 12347 Purpose 12348 ======= 12349 12350 SLAED7 computes the updated eigensystem of a diagonal 12351 matrix after modification by a rank-one symmetric matrix. This 12352 routine is used only for the eigenproblem which requires all 12353 eigenvalues and optionally eigenvectors of a dense symmetric matrix 12354 that has been reduced to tridiagonal form. SLAED1 handles 12355 the case in which all eigenvalues and eigenvectors of a symmetric 12356 tridiagonal matrix are desired. 12357 12358 T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) 12359 12360 where Z = Q'u, u is a vector of length N with ones in the 12361 CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. 12362 12363 The eigenvectors of the original matrix are stored in Q, and the 12364 eigenvalues are in D. The algorithm consists of three stages: 12365 12366 The first stage consists of deflating the size of the problem 12367 when there are multiple eigenvalues or if there is a zero in 12368 the Z vector. For each such occurence the dimension of the 12369 secular equation problem is reduced by one. This stage is 12370 performed by the routine SLAED8. 12371 12372 The second stage consists of calculating the updated 12373 eigenvalues. This is done by finding the roots of the secular 12374 equation via the routine SLAED4 (as called by SLAED9). 12375 This routine also calculates the eigenvectors of the current 12376 problem. 12377 12378 The final stage consists of computing the updated eigenvectors 12379 directly using the updated eigenvalues. The eigenvectors for 12380 the current problem are multiplied with the eigenvectors from 12381 the overall problem. 12382 12383 Arguments 12384 ========= 12385 12386 ICOMPQ (input) INTEGER 12387 = 0: Compute eigenvalues only. 12388 = 1: Compute eigenvectors of original dense symmetric matrix 12389 also. On entry, Q contains the orthogonal matrix used 12390 to reduce the original matrix to tridiagonal form. 12391 12392 N (input) INTEGER 12393 The dimension of the symmetric tridiagonal matrix. N >= 0. 12394 12395 QSIZ (input) INTEGER 12396 The dimension of the orthogonal matrix used to reduce 12397 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. 12398 12399 TLVLS (input) INTEGER 12400 The total number of merging levels in the overall divide and 12401 conquer tree. 12402 12403 CURLVL (input) INTEGER 12404 The current level in the overall merge routine, 12405 0 <= CURLVL <= TLVLS. 12406 12407 CURPBM (input) INTEGER 12408 The current problem in the current level in the overall 12409 merge routine (counting from upper left to lower right). 12410 12411 D (input/output) REAL array, dimension (N) 12412 On entry, the eigenvalues of the rank-1-perturbed matrix. 12413 On exit, the eigenvalues of the repaired matrix. 12414 12415 Q (input/output) REAL array, dimension (LDQ, N) 12416 On entry, the eigenvectors of the rank-1-perturbed matrix. 12417 On exit, the eigenvectors of the repaired tridiagonal matrix. 12418 12419 LDQ (input) INTEGER 12420 The leading dimension of the array Q. LDQ >= max(1,N). 12421 12422 INDXQ (output) INTEGER array, dimension (N) 12423 The permutation which will reintegrate the subproblem just 12424 solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) 12425 will be in ascending order. 12426 12427 RHO (input) REAL 12428 The subdiagonal element used to create the rank-1 12429 modification. 12430 12431 CUTPNT (input) INTEGER 12432 Contains the location of the last eigenvalue in the leading 12433 sub-matrix. min(1,N) <= CUTPNT <= N. 12434 12435 QSTORE (input/output) REAL array, dimension (N**2+1) 12436 Stores eigenvectors of submatrices encountered during 12437 divide and conquer, packed together. QPTR points to 12438 beginning of the submatrices. 12439 12440 QPTR (input/output) INTEGER array, dimension (N+2) 12441 List of indices pointing to beginning of submatrices stored 12442 in QSTORE. The submatrices are numbered starting at the 12443 bottom left of the divide and conquer tree, from left to 12444 right and bottom to top. 12445 12446 PRMPTR (input) INTEGER array, dimension (N lg N) 12447 Contains a list of pointers which indicate where in PERM a 12448 level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) 12449 indicates the size of the permutation and also the size of 12450 the full, non-deflated problem. 12451 12452 PERM (input) INTEGER array, dimension (N lg N) 12453 Contains the permutations (from deflation and sorting) to be 12454 applied to each eigenblock. 12455 12456 GIVPTR (input) INTEGER array, dimension (N lg N) 12457 Contains a list of pointers which indicate where in GIVCOL a 12458 level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) 12459 indicates the number of Givens rotations. 12460 12461 GIVCOL (input) INTEGER array, dimension (2, N lg N) 12462 Each pair of numbers indicates a pair of columns to take place 12463 in a Givens rotation. 12464 12465 GIVNUM (input) REAL array, dimension (2, N lg N) 12466 Each number indicates the S value to be used in the 12467 corresponding Givens rotation. 12468 12469 WORK (workspace) REAL array, dimension (3*N+QSIZ*N) 12470 12471 IWORK (workspace) INTEGER array, dimension (4*N) 12472 12473 INFO (output) INTEGER 12474 = 0: successful exit. 12475 < 0: if INFO = -i, the i-th argument had an illegal value. 12476 > 0: if INFO = 1, an eigenvalue did not converge 12477 12478 Further Details 12479 =============== 12480 12481 Based on contributions by 12482 Jeff Rutter, Computer Science Division, University of California 12483 at Berkeley, USA 12484 12485 ===================================================================== 12486 12487 12488 Test the input parameters. 12489 12490 Parameter adjustments */ 12491 /* Table of constant values */ 12492 static integer c__2 = 2; 12493 static integer c__1 = 1; 12494 static real c_b10 = 1.f; 12495 static real c_b11 = 0.f; 12496 static integer c_n1 = -1; 12497 12498 /* System generated locals */ 12499 integer q_dim1, q_offset, i__1, i__2; 12500 /* Builtin functions */ 12501 integer pow_ii(integer *, integer *); 12502 /* Local variables */ 12503 static integer indx, curr, i__, k, indxc; 12504 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 12505 integer *, real *, real *, integer *, real *, integer *, real *, 12506 real *, integer *); 12507 static integer indxp, n1, n2; 12508 extern /* Subroutine */ int slaed8_(integer *, integer *, integer *, 12509 integer *, real *, real *, integer *, integer *, real *, integer * 12510 , real *, real *, real *, integer *, real *, integer *, integer *, 12511 integer *, real *, integer *, integer *, integer *), slaed9_( 12512 integer *, integer *, integer *, integer *, real *, real *, 12513 integer *, real *, real *, real *, real *, integer *, integer *), 12514 slaeda_(integer *, integer *, integer *, integer *, integer *, 12515 integer *, integer *, integer *, real *, real *, integer *, real * 12516 , real *, integer *); 12517 static integer idlmda, is, iw, iz; 12518 extern /* Subroutine */ int xerbla_(const char *, integer *), slamrg_( 12519 integer *, integer *, real *, integer *, integer *, integer *); 12520 static integer coltyp, iq2, ptr, ldq2; 12521 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1] 12522 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1] 12523 12524 12525 --d__; 12526 q_dim1 = *ldq; 12527 q_offset = 1 + q_dim1 * 1; 12528 q -= q_offset; 12529 --indxq; 12530 --qstore; 12531 --qptr; 12532 --prmptr; 12533 --perm; 12534 --givptr; 12535 givcol -= 3; 12536 givnum -= 3; 12537 --work; 12538 --iwork; 12539 12540 /* Function Body */ 12541 *info = 0; 12542 12543 if (*icompq < 0 || *icompq > 1) { 12544 *info = -1; 12545 } else if (*n < 0) { 12546 *info = -2; 12547 } else if (*icompq == 1 && *qsiz < *n) { 12548 *info = -4; 12549 } else if (*ldq < f2cmax(1,*n)) { 12550 *info = -9; 12551 } else if (f2cmin(1,*n) > *cutpnt || *n < *cutpnt) { 12552 *info = -12; 12553 } 12554 if (*info != 0) { 12555 i__1 = -(*info); 12556 xerbla_("SLAED7", &i__1); 12557 return 0; 12558 } 12559 12560 /* Quick return if possible */ 12561 12562 if (*n == 0) { 12563 return 0; 12564 } 12565 12566 /* The following values are for bookkeeping purposes only. They are 12567 integer pointers which indicate the portion of the workspace 12568 used by a particular array in SLAED8 and SLAED9. */ 12569 12570 if (*icompq == 1) { 12571 ldq2 = *qsiz; 12572 } else { 12573 ldq2 = *n; 12574 } 12575 12576 iz = 1; 12577 idlmda = iz + *n; 12578 iw = idlmda + *n; 12579 iq2 = iw + *n; 12580 is = iq2 + *n * ldq2; 12581 12582 indx = 1; 12583 indxc = indx + *n; 12584 coltyp = indxc + *n; 12585 indxp = coltyp + *n; 12586 12587 /* Form the z-vector which consists of the last row of Q_1 and the 12588 first row of Q_2. */ 12589 12590 ptr = pow_ii(&c__2, tlvls) + 1; 12591 i__1 = *curlvl - 1; 12592 for (i__ = 1; i__ <= i__1; ++i__) { 12593 i__2 = *tlvls - i__; 12594 ptr += pow_ii(&c__2, &i__2); 12595 /* L10: */ 12596 } 12597 curr = ptr + *curpbm; 12598 slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & 12599 givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz 12600 + *n], info); 12601 12602 /* When solving the final problem, we no longer need the stored data, 12603 so we will overwrite the data from this level onto the previously 12604 used storage space. */ 12605 12606 if (*curlvl == *tlvls) { 12607 qptr[curr] = 1; 12608 prmptr[curr] = 1; 12609 givptr[curr] = 1; 12610 } 12611 12612 /* Sort and Deflate eigenvalues. */ 12613 12614 slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, 12615 cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & 12616 perm[prmptr[curr]], &givptr[curr + 1], &givcol_ref(1, givptr[curr] 12617 ), &givnum_ref(1, givptr[curr]), &iwork[indxp], &iwork[indx], 12618 info); 12619 prmptr[curr + 1] = prmptr[curr] + *n; 12620 givptr[curr + 1] += givptr[curr]; 12621 12622 /* Solve Secular Equation. */ 12623 12624 if (k != 0) { 12625 slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], 12626 &work[iw], &qstore[qptr[curr]], &k, info); 12627 if (*info != 0) { 12628 goto L30; 12629 } 12630 if (*icompq == 1) { 12631 sgemm_("N", "N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[ 12632 qptr[curr]], &k, &c_b11, &q[q_offset], ldq); 12633 } 12634 /* Computing 2nd power */ 12635 i__1 = k; 12636 qptr[curr + 1] = qptr[curr] + i__1 * i__1; 12637 12638 /* Prepare the INDXQ sorting permutation. */ 12639 12640 n1 = k; 12641 n2 = *n - k; 12642 slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); 12643 } else { 12644 qptr[curr + 1] = qptr[curr]; 12645 i__1 = *n; 12646 for (i__ = 1; i__ <= i__1; ++i__) { 12647 indxq[i__] = i__; 12648 /* L20: */ 12649 } 12650 } 12651 12652 L30: 12653 return 0; 12654 12655 /* End of SLAED7 */ 12656 12657 } /* slaed7_ */
int slaed8_ | ( | integer * | icompq, | |
integer * | k, | |||
integer * | n, | |||
integer * | qsiz, | |||
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
integer * | indxq, | |||
real * | rho, | |||
integer * | cutpnt, | |||
real * | z__, | |||
real * | dlamda, | |||
real * | q2, | |||
integer * | ldq2, | |||
real * | w, | |||
integer * | perm, | |||
integer * | givptr, | |||
integer * | givcol, | |||
real * | givnum, | |||
integer * | indxp, | |||
integer * | indx, | |||
integer * | info | |||
) |
Definition at line 13103 of file lapackblas.cpp.
References c__1, c_b3, dabs, f2cmax, f2cmin, givcol_ref, givnum_ref, integer, isamax_(), q2_ref, q_ref, scopy_(), slacpy_(), slamch_(), slamrg_(), slapy2_(), sqrt(), srot_(), sscal_(), t, and xerbla_().
Referenced by slaed7_().
13108 { 13109 /* -- LAPACK routine (version 3.0) -- 13110 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 13111 Courant Institute, NAG Ltd., and Rice University 13112 September 30, 1994 13113 13114 13115 Purpose 13116 ======= 13117 13118 SLAED8 merges the two sets of eigenvalues together into a single 13119 sorted set. Then it tries to deflate the size of the problem. 13120 There are two ways in which deflation can occur: when two or more 13121 eigenvalues are close together or if there is a tiny element in the 13122 Z vector. For each such occurrence the order of the related secular 13123 equation problem is reduced by one. 13124 13125 Arguments 13126 ========= 13127 13128 ICOMPQ (input) INTEGER 13129 = 0: Compute eigenvalues only. 13130 = 1: Compute eigenvectors of original dense symmetric matrix 13131 also. On entry, Q contains the orthogonal matrix used 13132 to reduce the original matrix to tridiagonal form. 13133 13134 K (output) INTEGER 13135 The number of non-deflated eigenvalues, and the order of the 13136 related secular equation. 13137 13138 N (input) INTEGER 13139 The dimension of the symmetric tridiagonal matrix. N >= 0. 13140 13141 QSIZ (input) INTEGER 13142 The dimension of the orthogonal matrix used to reduce 13143 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. 13144 13145 D (input/output) REAL array, dimension (N) 13146 On entry, the eigenvalues of the two submatrices to be 13147 combined. On exit, the trailing (N-K) updated eigenvalues 13148 (those which were deflated) sorted into increasing order. 13149 13150 Q (input/output) REAL array, dimension (LDQ,N) 13151 If ICOMPQ = 0, Q is not referenced. Otherwise, 13152 on entry, Q contains the eigenvectors of the partially solved 13153 system which has been previously updated in matrix 13154 multiplies with other partially solved eigensystems. 13155 On exit, Q contains the trailing (N-K) updated eigenvectors 13156 (those which were deflated) in its last N-K columns. 13157 13158 LDQ (input) INTEGER 13159 The leading dimension of the array Q. LDQ >= max(1,N). 13160 13161 INDXQ (input) INTEGER array, dimension (N) 13162 The permutation which separately sorts the two sub-problems 13163 in D into ascending order. Note that elements in the second 13164 half of this permutation must first have CUTPNT added to 13165 their values in order to be accurate. 13166 13167 RHO (input/output) REAL 13168 On entry, the off-diagonal element associated with the rank-1 13169 cut which originally split the two submatrices which are now 13170 being recombined. 13171 On exit, RHO has been modified to the value required by 13172 SLAED3. 13173 13174 CUTPNT (input) INTEGER 13175 The location of the last eigenvalue in the leading 13176 sub-matrix. min(1,N) <= CUTPNT <= N. 13177 13178 Z (input) REAL array, dimension (N) 13179 On entry, Z contains the updating vector (the last row of 13180 the first sub-eigenvector matrix and the first row of the 13181 second sub-eigenvector matrix). 13182 On exit, the contents of Z are destroyed by the updating 13183 process. 13184 13185 DLAMDA (output) REAL array, dimension (N) 13186 A copy of the first K eigenvalues which will be used by 13187 SLAED3 to form the secular equation. 13188 13189 Q2 (output) REAL array, dimension (LDQ2,N) 13190 If ICOMPQ = 0, Q2 is not referenced. Otherwise, 13191 a copy of the first K eigenvectors which will be used by 13192 SLAED7 in a matrix multiply (SGEMM) to update the new 13193 eigenvectors. 13194 13195 LDQ2 (input) INTEGER 13196 The leading dimension of the array Q2. LDQ2 >= max(1,N). 13197 13198 W (output) REAL array, dimension (N) 13199 The first k values of the final deflation-altered z-vector and 13200 will be passed to SLAED3. 13201 13202 PERM (output) INTEGER array, dimension (N) 13203 The permutations (from deflation and sorting) to be applied 13204 to each eigenblock. 13205 13206 GIVPTR (output) INTEGER 13207 The number of Givens rotations which took place in this 13208 subproblem. 13209 13210 GIVCOL (output) INTEGER array, dimension (2, N) 13211 Each pair of numbers indicates a pair of columns to take place 13212 in a Givens rotation. 13213 13214 GIVNUM (output) REAL array, dimension (2, N) 13215 Each number indicates the S value to be used in the 13216 corresponding Givens rotation. 13217 13218 INDXP (workspace) INTEGER array, dimension (N) 13219 The permutation used to place deflated values of D at the end 13220 of the array. INDXP(1:K) points to the nondeflated D-values 13221 and INDXP(K+1:N) points to the deflated eigenvalues. 13222 13223 INDX (workspace) INTEGER array, dimension (N) 13224 The permutation used to sort the contents of D into ascending 13225 order. 13226 13227 INFO (output) INTEGER 13228 = 0: successful exit. 13229 < 0: if INFO = -i, the i-th argument had an illegal value. 13230 13231 Further Details 13232 =============== 13233 13234 Based on contributions by 13235 Jeff Rutter, Computer Science Division, University of California 13236 at Berkeley, USA 13237 13238 ===================================================================== 13239 13240 13241 13242 Test the input parameters. 13243 13244 Parameter adjustments */ 13245 /* Table of constant values */ 13246 static real c_b3 = -1.f; 13247 static integer c__1 = 1; 13248 13249 /* System generated locals */ 13250 integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; 13251 real r__1; 13252 /* Builtin functions */ 13253 // double sqrt(doublereal); 13254 /* Local variables */ 13255 static integer jlam, imax, jmax; 13256 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 13257 integer *, real *, real *); 13258 static real c__; 13259 static integer i__, j; 13260 static real s, t; 13261 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 13262 static integer k2; 13263 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 13264 integer *); 13265 static integer n1, n2; 13266 extern doublereal slapy2_(real *, real *); 13267 static integer jp; 13268 extern doublereal slamch_(const char *); 13269 extern /* Subroutine */ int xerbla_(const char *, integer *); 13270 extern integer isamax_(integer *, real *, integer *); 13271 extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 13272 *, integer *, integer *), slacpy_(const char *, integer *, integer *, 13273 real *, integer *, real *, integer *); 13274 static integer n1p1; 13275 static real eps, tau, tol; 13276 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 13277 #define q2_ref(a_1,a_2) q2[(a_2)*q2_dim1 + a_1] 13278 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1] 13279 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1] 13280 13281 13282 --d__; 13283 q_dim1 = *ldq; 13284 q_offset = 1 + q_dim1 * 1; 13285 q -= q_offset; 13286 --indxq; 13287 --z__; 13288 --dlamda; 13289 q2_dim1 = *ldq2; 13290 q2_offset = 1 + q2_dim1 * 1; 13291 q2 -= q2_offset; 13292 --w; 13293 --perm; 13294 givcol -= 3; 13295 givnum -= 3; 13296 --indxp; 13297 --indx; 13298 13299 /* Function Body */ 13300 *info = 0; 13301 13302 if (*icompq < 0 || *icompq > 1) { 13303 *info = -1; 13304 } else if (*n < 0) { 13305 *info = -3; 13306 } else if (*icompq == 1 && *qsiz < *n) { 13307 *info = -4; 13308 } else if (*ldq < f2cmax(1,*n)) { 13309 *info = -7; 13310 } else if (*cutpnt < f2cmin(1,*n) || *cutpnt > *n) { 13311 *info = -10; 13312 } else if (*ldq2 < f2cmax(1,*n)) { 13313 *info = -14; 13314 } 13315 if (*info != 0) { 13316 i__1 = -(*info); 13317 xerbla_("SLAED8", &i__1); 13318 return 0; 13319 } 13320 13321 /* Quick return if possible */ 13322 13323 if (*n == 0) { 13324 return 0; 13325 } 13326 13327 n1 = *cutpnt; 13328 n2 = *n - n1; 13329 n1p1 = n1 + 1; 13330 13331 if (*rho < 0.f) { 13332 sscal_(&n2, &c_b3, &z__[n1p1], &c__1); 13333 } 13334 13335 /* Normalize z so that norm(z) = 1 */ 13336 13337 t = 1.f / sqrt(2.f); 13338 i__1 = *n; 13339 for (j = 1; j <= i__1; ++j) { 13340 indx[j] = j; 13341 /* L10: */ 13342 } 13343 sscal_(n, &t, &z__[1], &c__1); 13344 *rho = (r__1 = *rho * 2.f, dabs(r__1)); 13345 13346 /* Sort the eigenvalues into increasing order */ 13347 13348 i__1 = *n; 13349 for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { 13350 indxq[i__] += *cutpnt; 13351 /* L20: */ 13352 } 13353 i__1 = *n; 13354 for (i__ = 1; i__ <= i__1; ++i__) { 13355 dlamda[i__] = d__[indxq[i__]]; 13356 w[i__] = z__[indxq[i__]]; 13357 /* L30: */ 13358 } 13359 i__ = 1; 13360 j = *cutpnt + 1; 13361 slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); 13362 i__1 = *n; 13363 for (i__ = 1; i__ <= i__1; ++i__) { 13364 d__[i__] = dlamda[indx[i__]]; 13365 z__[i__] = w[indx[i__]]; 13366 /* L40: */ 13367 } 13368 13369 /* Calculate the allowable deflation tolerence */ 13370 13371 imax = isamax_(n, &z__[1], &c__1); 13372 jmax = isamax_(n, &d__[1], &c__1); 13373 eps = slamch_("Epsilon"); 13374 tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); 13375 13376 /* If the rank-1 modifier is small enough, no more needs to be done 13377 except to reorganize Q so that its columns correspond with the 13378 elements in D. */ 13379 13380 if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { 13381 *k = 0; 13382 if (*icompq == 0) { 13383 i__1 = *n; 13384 for (j = 1; j <= i__1; ++j) { 13385 perm[j] = indxq[indx[j]]; 13386 /* L50: */ 13387 } 13388 } else { 13389 i__1 = *n; 13390 for (j = 1; j <= i__1; ++j) { 13391 perm[j] = indxq[indx[j]]; 13392 scopy_(qsiz, &q_ref(1, perm[j]), &c__1, &q2_ref(1, j), &c__1); 13393 /* L60: */ 13394 } 13395 slacpy_("A", qsiz, n, &q2_ref(1, 1), ldq2, &q_ref(1, 1), ldq); 13396 } 13397 return 0; 13398 } 13399 13400 /* If there are multiple eigenvalues then the problem deflates. Here 13401 the number of equal eigenvalues are found. As each equal 13402 eigenvalue is found, an elementary reflector is computed to rotate 13403 the corresponding eigensubspace so that the corresponding 13404 components of Z are zero in this new basis. */ 13405 13406 *k = 0; 13407 *givptr = 0; 13408 k2 = *n + 1; 13409 i__1 = *n; 13410 for (j = 1; j <= i__1; ++j) { 13411 if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { 13412 13413 /* Deflate due to small z component. */ 13414 13415 --k2; 13416 indxp[k2] = j; 13417 if (j == *n) { 13418 goto L110; 13419 } 13420 } else { 13421 jlam = j; 13422 goto L80; 13423 } 13424 /* L70: */ 13425 } 13426 L80: 13427 ++j; 13428 if (j > *n) { 13429 goto L100; 13430 } 13431 if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { 13432 13433 /* Deflate due to small z component. */ 13434 13435 --k2; 13436 indxp[k2] = j; 13437 } else { 13438 13439 /* Check if eigenvalues are close enough to allow deflation. */ 13440 13441 s = z__[jlam]; 13442 c__ = z__[j]; 13443 13444 /* Find sqrt(a**2+b**2) without overflow or 13445 destructive underflow. */ 13446 13447 tau = slapy2_(&c__, &s); 13448 t = d__[j] - d__[jlam]; 13449 c__ /= tau; 13450 s = -s / tau; 13451 if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { 13452 13453 /* Deflation is possible. */ 13454 13455 z__[j] = tau; 13456 z__[jlam] = 0.f; 13457 13458 /* Record the appropriate Givens rotation */ 13459 13460 ++(*givptr); 13461 givcol_ref(1, *givptr) = indxq[indx[jlam]]; 13462 givcol_ref(2, *givptr) = indxq[indx[j]]; 13463 givnum_ref(1, *givptr) = c__; 13464 givnum_ref(2, *givptr) = s; 13465 if (*icompq == 1) { 13466 srot_(qsiz, &q_ref(1, indxq[indx[jlam]]), &c__1, &q_ref(1, 13467 indxq[indx[j]]), &c__1, &c__, &s); 13468 } 13469 t = d__[jlam] * c__ * c__ + d__[j] * s * s; 13470 d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; 13471 d__[jlam] = t; 13472 --k2; 13473 i__ = 1; 13474 L90: 13475 if (k2 + i__ <= *n) { 13476 if (d__[jlam] < d__[indxp[k2 + i__]]) { 13477 indxp[k2 + i__ - 1] = indxp[k2 + i__]; 13478 indxp[k2 + i__] = jlam; 13479 ++i__; 13480 goto L90; 13481 } else { 13482 indxp[k2 + i__ - 1] = jlam; 13483 } 13484 } else { 13485 indxp[k2 + i__ - 1] = jlam; 13486 } 13487 jlam = j; 13488 } else { 13489 ++(*k); 13490 w[*k] = z__[jlam]; 13491 dlamda[*k] = d__[jlam]; 13492 indxp[*k] = jlam; 13493 jlam = j; 13494 } 13495 } 13496 goto L80; 13497 L100: 13498 13499 /* Record the last eigenvalue. */ 13500 13501 ++(*k); 13502 w[*k] = z__[jlam]; 13503 dlamda[*k] = d__[jlam]; 13504 indxp[*k] = jlam; 13505 13506 L110: 13507 13508 /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA 13509 and Q2 respectively. The eigenvalues/vectors which were not 13510 deflated go into the first K slots of DLAMDA and Q2 respectively, 13511 while those which were deflated go into the last N - K slots. */ 13512 13513 if (*icompq == 0) { 13514 i__1 = *n; 13515 for (j = 1; j <= i__1; ++j) { 13516 jp = indxp[j]; 13517 dlamda[j] = d__[jp]; 13518 perm[j] = indxq[indx[jp]]; 13519 /* L120: */ 13520 } 13521 } else { 13522 i__1 = *n; 13523 for (j = 1; j <= i__1; ++j) { 13524 jp = indxp[j]; 13525 dlamda[j] = d__[jp]; 13526 perm[j] = indxq[indx[jp]]; 13527 scopy_(qsiz, &q_ref(1, perm[j]), &c__1, &q2_ref(1, j), &c__1); 13528 /* L130: */ 13529 } 13530 } 13531 13532 /* The deflated eigenvalues and their corresponding vectors go back 13533 into the last N - K slots of D and Q respectively. */ 13534 13535 if (*k < *n) { 13536 if (*icompq == 0) { 13537 i__1 = *n - *k; 13538 scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); 13539 } else { 13540 i__1 = *n - *k; 13541 scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); 13542 i__1 = *n - *k; 13543 slacpy_("A", qsiz, &i__1, &q2_ref(1, *k + 1), ldq2, &q_ref(1, *k 13544 + 1), ldq); 13545 } 13546 } 13547 13548 return 0; 13549 13550 /* End of SLAED8 */ 13551 13552 } /* slaed8_ */
int slaed9_ | ( | integer * | k, | |
integer * | kstart, | |||
integer * | kstop, | |||
integer * | n, | |||
real * | d__, | |||
real * | q, | |||
integer * | ldq, | |||
real * | rho, | |||
real * | dlamda, | |||
real * | w, | |||
real * | s, | |||
integer * | lds, | |||
integer * | info | |||
) |
Definition at line 14075 of file lapackblas.cpp.
References f2cmax, integer, q_ref, r_sign(), s_ref, scopy_(), slaed4_(), slamc3_(), snrm2_(), sqrt(), and xerbla_().
Referenced by slaed7_().
14078 { 14079 /* -- LAPACK routine (version 3.0) -- 14080 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 14081 Courant Institute, NAG Ltd., and Rice University 14082 September 30, 1994 14083 14084 14085 Purpose 14086 ======= 14087 14088 SLAED9 finds the roots of the secular equation, as defined by the 14089 values in D, Z, and RHO, between KSTART and KSTOP. It makes the 14090 appropriate calls to SLAED4 and then stores the new matrix of 14091 eigenvectors for use in calculating the next level of Z vectors. 14092 14093 Arguments 14094 ========= 14095 14096 K (input) INTEGER 14097 The number of terms in the rational function to be solved by 14098 SLAED4. K >= 0. 14099 14100 KSTART (input) INTEGER 14101 KSTOP (input) INTEGER 14102 The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP 14103 are to be computed. 1 <= KSTART <= KSTOP <= K. 14104 14105 N (input) INTEGER 14106 The number of rows and columns in the Q matrix. 14107 N >= K (delation may result in N > K). 14108 14109 D (output) REAL array, dimension (N) 14110 D(I) contains the updated eigenvalues 14111 for KSTART <= I <= KSTOP. 14112 14113 Q (workspace) REAL array, dimension (LDQ,N) 14114 14115 LDQ (input) INTEGER 14116 The leading dimension of the array Q. LDQ >= max( 1, N ). 14117 14118 RHO (input) REAL 14119 The value of the parameter in the rank one update equation. 14120 RHO >= 0 required. 14121 14122 DLAMDA (input) REAL array, dimension (K) 14123 The first K elements of this array contain the old roots 14124 of the deflated updating problem. These are the poles 14125 of the secular equation. 14126 14127 W (input) REAL array, dimension (K) 14128 The first K elements of this array contain the components 14129 of the deflation-adjusted updating vector. 14130 14131 S (output) REAL array, dimension (LDS, K) 14132 Will contain the eigenvectors of the repaired matrix which 14133 will be stored for subsequent Z vector calculation and 14134 multiplied by the previously accumulated eigenvectors 14135 to update the system. 14136 14137 LDS (input) INTEGER 14138 The leading dimension of S. LDS >= max( 1, K ). 14139 14140 INFO (output) INTEGER 14141 = 0: successful exit. 14142 < 0: if INFO = -i, the i-th argument had an illegal value. 14143 > 0: if INFO = 1, an eigenvalue did not converge 14144 14145 Further Details 14146 =============== 14147 14148 Based on contributions by 14149 Jeff Rutter, Computer Science Division, University of California 14150 at Berkeley, USA 14151 14152 ===================================================================== 14153 14154 14155 Test the input parameters. 14156 14157 Parameter adjustments */ 14158 /* Table of constant values */ 14159 static integer c__1 = 1; 14160 14161 /* System generated locals */ 14162 integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; 14163 real r__1; 14164 /* Builtin functions */ 14165 // double sqrt(doublereal), r_sign(real *, real *); 14166 /* Local variables */ 14167 static real temp; 14168 extern doublereal snrm2_(integer *, real *, integer *); 14169 static integer i__, j; 14170 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 14171 integer *), slaed4_(integer *, integer *, real *, real *, real *, 14172 real *, real *, integer *); 14173 extern doublereal slamc3_(real *, real *); 14174 extern /* Subroutine */ int xerbla_(const char *, integer *); 14175 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] 14176 #define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1] 14177 14178 14179 --d__; 14180 q_dim1 = *ldq; 14181 q_offset = 1 + q_dim1 * 1; 14182 q -= q_offset; 14183 --dlamda; 14184 --w; 14185 s_dim1 = *lds; 14186 s_offset = 1 + s_dim1 * 1; 14187 s -= s_offset; 14188 14189 /* Function Body */ 14190 *info = 0; 14191 14192 if (*k < 0) { 14193 *info = -1; 14194 } else if (*kstart < 1 || *kstart > f2cmax(1,*k)) { 14195 *info = -2; 14196 } else if (f2cmax(1,*kstop) < *kstart || *kstop > f2cmax(1,*k)) { 14197 *info = -3; 14198 } else if (*n < *k) { 14199 *info = -4; 14200 } else if (*ldq < f2cmax(1,*k)) { 14201 *info = -7; 14202 } else if (*lds < f2cmax(1,*k)) { 14203 *info = -12; 14204 } 14205 if (*info != 0) { 14206 i__1 = -(*info); 14207 xerbla_("SLAED9", &i__1); 14208 return 0; 14209 } 14210 14211 /* Quick return if possible */ 14212 14213 if (*k == 0) { 14214 return 0; 14215 } 14216 14217 /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can 14218 be computed with high relative accuracy (barring over/underflow). 14219 This is a problem on machines without a guard digit in 14220 add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). 14221 The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), 14222 which on any of these machines zeros out the bottommost 14223 bit of DLAMDA(I) if it is 1; this makes the subsequent 14224 subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation 14225 occurs. On binary machines with a guard digit (almost all 14226 machines) it does not change DLAMDA(I) at all. On hexadecimal 14227 and decimal machines with a guard digit, it slightly 14228 changes the bottommost bits of DLAMDA(I). It does not account 14229 for hexadecimal or decimal machines without guard digits 14230 (we know of none). We use a subroutine call to compute 14231 2*DLAMBDA(I) to prevent optimizing compilers from eliminating 14232 this code. */ 14233 14234 i__1 = *n; 14235 for (i__ = 1; i__ <= i__1; ++i__) { 14236 dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; 14237 /* L10: */ 14238 } 14239 14240 i__1 = *kstop; 14241 for (j = *kstart; j <= i__1; ++j) { 14242 slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); 14243 14244 /* If the zero finder fails, the computation is terminated. */ 14245 14246 if (*info != 0) { 14247 goto L120; 14248 } 14249 /* L20: */ 14250 } 14251 14252 if (*k == 1 || *k == 2) { 14253 i__1 = *k; 14254 for (i__ = 1; i__ <= i__1; ++i__) { 14255 i__2 = *k; 14256 for (j = 1; j <= i__2; ++j) { 14257 s_ref(j, i__) = q_ref(j, i__); 14258 /* L30: */ 14259 } 14260 /* L40: */ 14261 } 14262 goto L120; 14263 } 14264 14265 /* Compute updated W. */ 14266 14267 scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); 14268 14269 /* Initialize W(I) = Q(I,I) */ 14270 14271 i__1 = *ldq + 1; 14272 scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); 14273 i__1 = *k; 14274 for (j = 1; j <= i__1; ++j) { 14275 i__2 = j - 1; 14276 for (i__ = 1; i__ <= i__2; ++i__) { 14277 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); 14278 /* L50: */ 14279 } 14280 i__2 = *k; 14281 for (i__ = j + 1; i__ <= i__2; ++i__) { 14282 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); 14283 /* L60: */ 14284 } 14285 /* L70: */ 14286 } 14287 i__1 = *k; 14288 for (i__ = 1; i__ <= i__1; ++i__) { 14289 r__1 = sqrt(-w[i__]); 14290 w[i__] = r_sign(&r__1, &s_ref(i__, 1)); 14291 /* L80: */ 14292 } 14293 14294 /* Compute eigenvectors of the modified rank-1 modification. */ 14295 14296 i__1 = *k; 14297 for (j = 1; j <= i__1; ++j) { 14298 i__2 = *k; 14299 for (i__ = 1; i__ <= i__2; ++i__) { 14300 q_ref(i__, j) = w[i__] / q_ref(i__, j); 14301 /* L90: */ 14302 } 14303 temp = snrm2_(k, &q_ref(1, j), &c__1); 14304 i__2 = *k; 14305 for (i__ = 1; i__ <= i__2; ++i__) { 14306 s_ref(i__, j) = q_ref(i__, j) / temp; 14307 /* L100: */ 14308 } 14309 /* L110: */ 14310 } 14311 14312 L120: 14313 return 0; 14314 14315 /* End of SLAED9 */ 14316 14317 } /* slaed9_ */
int slaeda_ | ( | integer * | n, | |
integer * | tlvls, | |||
integer * | curlvl, | |||
integer * | curpbm, | |||
integer * | prmptr, | |||
integer * | perm, | |||
integer * | givptr, | |||
integer * | givcol, | |||
real * | givnum, | |||
real * | q, | |||
integer * | qptr, | |||
real * | z__, | |||
real * | ztemp, | |||
integer * | info | |||
) |
Definition at line 15355 of file lapackblas.cpp.
References c__2, givcol_ref, givnum_ref, integer, pow_ii(), scopy_(), sgemv_(), sqrt(), srot_(), and xerbla_().
Referenced by slaed7_().
15359 { 15360 /* -- LAPACK routine (version 3.0) -- 15361 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 15362 Courant Institute, Argonne National Lab, and Rice University 15363 September 30, 1994 15364 15365 15366 Purpose 15367 ======= 15368 15369 SLAEDA computes the Z vector corresponding to the merge step in the 15370 CURLVLth step of the merge process with TLVLS steps for the CURPBMth 15371 problem. 15372 15373 Arguments 15374 ========= 15375 15376 N (input) INTEGER 15377 The dimension of the symmetric tridiagonal matrix. N >= 0. 15378 15379 TLVLS (input) INTEGER 15380 The total number of merging levels in the overall divide and 15381 conquer tree. 15382 15383 CURLVL (input) INTEGER 15384 The current level in the overall merge routine, 15385 0 <= curlvl <= tlvls. 15386 15387 CURPBM (input) INTEGER 15388 The current problem in the current level in the overall 15389 merge routine (counting from upper left to lower right). 15390 15391 PRMPTR (input) INTEGER array, dimension (N lg N) 15392 Contains a list of pointers which indicate where in PERM a 15393 level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) 15394 indicates the size of the permutation and incidentally the 15395 size of the full, non-deflated problem. 15396 15397 PERM (input) INTEGER array, dimension (N lg N) 15398 Contains the permutations (from deflation and sorting) to be 15399 applied to each eigenblock. 15400 15401 GIVPTR (input) INTEGER array, dimension (N lg N) 15402 Contains a list of pointers which indicate where in GIVCOL a 15403 level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) 15404 indicates the number of Givens rotations. 15405 15406 GIVCOL (input) INTEGER array, dimension (2, N lg N) 15407 Each pair of numbers indicates a pair of columns to take place 15408 in a Givens rotation. 15409 15410 GIVNUM (input) REAL array, dimension (2, N lg N) 15411 Each number indicates the S value to be used in the 15412 corresponding Givens rotation. 15413 15414 Q (input) REAL array, dimension (N**2) 15415 Contains the square eigenblocks from previous levels, the 15416 starting positions for blocks are given by QPTR. 15417 15418 QPTR (input) INTEGER array, dimension (N+2) 15419 Contains a list of pointers which indicate where in Q an 15420 eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates 15421 the size of the block. 15422 15423 Z (output) REAL array, dimension (N) 15424 On output this vector contains the updating vector (the last 15425 row of the first sub-eigenvector matrix and the first row of 15426 the second sub-eigenvector matrix). 15427 15428 ZTEMP (workspace) REAL array, dimension (N) 15429 15430 INFO (output) INTEGER 15431 = 0: successful exit. 15432 < 0: if INFO = -i, the i-th argument had an illegal value. 15433 15434 Further Details 15435 =============== 15436 15437 Based on contributions by 15438 Jeff Rutter, Computer Science Division, University of California 15439 at Berkeley, USA 15440 15441 ===================================================================== 15442 15443 15444 Test the input parameters. 15445 15446 Parameter adjustments */ 15447 /* Table of constant values */ 15448 static integer c__2 = 2; 15449 static integer c__1 = 1; 15450 static real c_b24 = 1.f; 15451 static real c_b26 = 0.f; 15452 15453 /* System generated locals */ 15454 integer i__1, i__2, i__3; 15455 /* Builtin functions */ 15456 integer pow_ii(integer *, integer *); 15457 // double sqrt(doublereal); 15458 /* Local variables */ 15459 static integer curr; 15460 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 15461 integer *, real *, real *); 15462 static integer bsiz1, bsiz2, psiz1, psiz2, i__, k, zptr1; 15463 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *, 15464 real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 15465 xerbla_(const char *, integer *); 15466 static integer mid, ptr; 15467 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1] 15468 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1] 15469 15470 15471 --ztemp; 15472 --z__; 15473 --qptr; 15474 --q; 15475 givnum -= 3; 15476 givcol -= 3; 15477 --givptr; 15478 --perm; 15479 --prmptr; 15480 15481 /* Function Body */ 15482 *info = 0; 15483 15484 if (*n < 0) { 15485 *info = -1; 15486 } 15487 if (*info != 0) { 15488 i__1 = -(*info); 15489 xerbla_("SLAEDA", &i__1); 15490 return 0; 15491 } 15492 15493 /* Quick return if possible */ 15494 15495 if (*n == 0) { 15496 return 0; 15497 } 15498 15499 /* Determine location of first number in second half. */ 15500 15501 mid = *n / 2 + 1; 15502 15503 /* Gather last/first rows of appropriate eigenblocks into center of Z */ 15504 15505 ptr = 1; 15506 15507 /* Determine location of lowest level subproblem in the full storage 15508 scheme */ 15509 15510 i__1 = *curlvl - 1; 15511 curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; 15512 15513 /* Determine size of these matrices. We add HALF to the value of 15514 the SQRT in case the machine underestimates one of these square 15515 roots. */ 15516 15517 bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); 15518 bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); 15519 i__1 = mid - bsiz1 - 1; 15520 for (k = 1; k <= i__1; ++k) { 15521 z__[k] = 0.f; 15522 /* L10: */ 15523 } 15524 scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & 15525 c__1); 15526 scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); 15527 i__1 = *n; 15528 for (k = mid + bsiz2; k <= i__1; ++k) { 15529 z__[k] = 0.f; 15530 /* L20: */ 15531 } 15532 15533 /* Loop thru remaining levels 1 -> CURLVL applying the Givens 15534 rotations and permutation and then multiplying the center matrices 15535 against the current Z. */ 15536 15537 ptr = pow_ii(&c__2, tlvls) + 1; 15538 i__1 = *curlvl - 1; 15539 for (k = 1; k <= i__1; ++k) { 15540 i__2 = *curlvl - k; 15541 i__3 = *curlvl - k - 1; 15542 curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 15543 1; 15544 psiz1 = prmptr[curr + 1] - prmptr[curr]; 15545 psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; 15546 zptr1 = mid - psiz1; 15547 15548 /* Apply Givens at CURR and CURR+1 */ 15549 15550 i__2 = givptr[curr + 1] - 1; 15551 for (i__ = givptr[curr]; i__ <= i__2; ++i__) { 15552 srot_(&c__1, &z__[zptr1 + givcol_ref(1, i__) - 1], &c__1, &z__[ 15553 zptr1 + givcol_ref(2, i__) - 1], &c__1, &givnum_ref(1, 15554 i__), &givnum_ref(2, i__)); 15555 /* L30: */ 15556 } 15557 i__2 = givptr[curr + 2] - 1; 15558 for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { 15559 srot_(&c__1, &z__[mid - 1 + givcol_ref(1, i__)], &c__1, &z__[mid 15560 - 1 + givcol_ref(2, i__)], &c__1, &givnum_ref(1, i__), & 15561 givnum_ref(2, i__)); 15562 /* L40: */ 15563 } 15564 psiz1 = prmptr[curr + 1] - prmptr[curr]; 15565 psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; 15566 i__2 = psiz1 - 1; 15567 for (i__ = 0; i__ <= i__2; ++i__) { 15568 ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; 15569 /* L50: */ 15570 } 15571 i__2 = psiz2 - 1; 15572 for (i__ = 0; i__ <= i__2; ++i__) { 15573 ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 15574 1]; 15575 /* L60: */ 15576 } 15577 15578 /* Multiply Blocks at CURR and CURR+1 15579 15580 Determine size of these matrices. We add HALF to the value of 15581 the SQRT in case the machine underestimates one of these 15582 square roots. */ 15583 15584 bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); 15585 bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + 15586 .5f); 15587 if (bsiz1 > 0) { 15588 sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & 15589 ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1); 15590 } 15591 i__2 = psiz1 - bsiz1; 15592 scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); 15593 if (bsiz2 > 0) { 15594 sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & 15595 ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1); 15596 } 15597 i__2 = psiz2 - bsiz2; 15598 scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & 15599 c__1); 15600 15601 i__2 = *tlvls - k; 15602 ptr += pow_ii(&c__2, &i__2); 15603 /* L70: */ 15604 } 15605 15606 return 0; 15607 15608 /* End of SLAEDA */ 15609 15610 } /* slaeda_ */
int slaev2_ | ( | real * | a, | |
real * | b, | |||
real * | c__, | |||
real * | rt1, | |||
real * | rt2, | |||
real * | cs1, | |||
real * | sn1 | |||
) |
Definition at line 2131 of file lapackblas.cpp.
References dabs, integer, and sqrt().
Referenced by ssteqr_().
02133 { 02134 /* -- LAPACK auxiliary routine (version 3.0) -- 02135 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02136 Courant Institute, Argonne National Lab, and Rice University 02137 October 31, 1992 02138 02139 02140 Purpose 02141 ======= 02142 02143 SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix 02144 [ A B ] 02145 [ B C ]. 02146 On return, RT1 is the eigenvalue of larger absolute value, RT2 is the 02147 eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right 02148 eigenvector for RT1, giving the decomposition 02149 02150 [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] 02151 [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. 02152 02153 Arguments 02154 ========= 02155 02156 A (input) REAL 02157 The (1,1) element of the 2-by-2 matrix. 02158 02159 B (input) REAL 02160 The (1,2) element and the conjugate of the (2,1) element of 02161 the 2-by-2 matrix. 02162 02163 C (input) REAL 02164 The (2,2) element of the 2-by-2 matrix. 02165 02166 RT1 (output) REAL 02167 The eigenvalue of larger absolute value. 02168 02169 RT2 (output) REAL 02170 The eigenvalue of smaller absolute value. 02171 02172 CS1 (output) REAL 02173 SN1 (output) REAL 02174 The vector (CS1, SN1) is a unit right eigenvector for RT1. 02175 02176 Further Details 02177 =============== 02178 02179 RT1 is accurate to a few ulps barring over/underflow. 02180 02181 RT2 may be inaccurate if there is massive cancellation in the 02182 determinant A*C-B*B; higher precision or correctly rounded or 02183 correctly truncated arithmetic would be needed to compute RT2 02184 accurately in all cases. 02185 02186 CS1 and SN1 are accurate to a few ulps barring over/underflow. 02187 02188 Overflow is possible only if RT1 is within a factor of 5 of overflow. 02189 Underflow is harmless if the input data is 0 or exceeds 02190 underflow_threshold / macheps. 02191 02192 ===================================================================== 02193 02194 02195 Compute the eigenvalues */ 02196 /* System generated locals */ 02197 real r__1; 02198 /* Builtin functions */ 02199 // double sqrt(doublereal); 02200 /* Local variables */ 02201 static real acmn, acmx, ab, df, cs, ct, tb, sm, tn, rt, adf, acs; 02202 static integer sgn1, sgn2; 02203 02204 02205 sm = *a + *c__; 02206 df = *a - *c__; 02207 adf = dabs(df); 02208 tb = *b + *b; 02209 ab = dabs(tb); 02210 if (dabs(*a) > dabs(*c__)) { 02211 acmx = *a; 02212 acmn = *c__; 02213 } else { 02214 acmx = *c__; 02215 acmn = *a; 02216 } 02217 if (adf > ab) { 02218 /* Computing 2nd power */ 02219 r__1 = ab / adf; 02220 rt = adf * sqrt(r__1 * r__1 + 1.f); 02221 } else if (adf < ab) { 02222 /* Computing 2nd power */ 02223 r__1 = adf / ab; 02224 rt = ab * sqrt(r__1 * r__1 + 1.f); 02225 } else { 02226 02227 /* Includes case AB=ADF=0 */ 02228 02229 rt = ab * sqrt(2.f); 02230 } 02231 if (sm < 0.f) { 02232 *rt1 = (sm - rt) * .5f; 02233 sgn1 = -1; 02234 02235 /* Order of execution important. 02236 To get fully accurate smaller eigenvalue, 02237 next line needs to be executed in higher precision. */ 02238 02239 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; 02240 } else if (sm > 0.f) { 02241 *rt1 = (sm + rt) * .5f; 02242 sgn1 = 1; 02243 02244 /* Order of execution important. 02245 To get fully accurate smaller eigenvalue, 02246 next line needs to be executed in higher precision. */ 02247 02248 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; 02249 } else { 02250 02251 /* Includes case RT1 = RT2 = 0 */ 02252 02253 *rt1 = rt * .5f; 02254 *rt2 = rt * -.5f; 02255 sgn1 = 1; 02256 } 02257 02258 /* Compute the eigenvector */ 02259 02260 if (df >= 0.f) { 02261 cs = df + rt; 02262 sgn2 = 1; 02263 } else { 02264 cs = df - rt; 02265 sgn2 = -1; 02266 } 02267 acs = dabs(cs); 02268 if (acs > ab) { 02269 ct = -tb / cs; 02270 *sn1 = 1.f / sqrt(ct * ct + 1.f); 02271 *cs1 = ct * *sn1; 02272 } else { 02273 if (ab == 0.f) { 02274 *cs1 = 1.f; 02275 *sn1 = 0.f; 02276 } else { 02277 tn = -cs / tb; 02278 *cs1 = 1.f / sqrt(tn * tn + 1.f); 02279 *sn1 = tn * *cs1; 02280 } 02281 } 02282 if (sgn1 == sgn2) { 02283 tn = *cs1; 02284 *cs1 = -(*sn1); 02285 *sn1 = tn; 02286 } 02287 return 0; 02288 02289 /* End of SLAEV2 */ 02290 02291 } /* slaev2_ */
Definition at line 2423 of file lapackblas.cpp.
References FALSE_, integer, slamc3_(), and TRUE_.
Referenced by slamc2_().
02425 { 02426 /* -- LAPACK auxiliary routine (version 3.0) -- 02427 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02428 Courant Institute, Argonne National Lab, and Rice University 02429 October 31, 1992 02430 02431 02432 Purpose 02433 ======= 02434 02435 SLAMC1 determines the machine parameters given by BETA, T, RND, and 02436 IEEE1. 02437 02438 Arguments 02439 ========= 02440 02441 BETA (output) INTEGER 02442 The base of the machine. 02443 02444 T (output) INTEGER 02445 The number of ( BETA ) digits in the mantissa. 02446 02447 RND (output) LOGICAL 02448 Specifies whether proper rounding ( RND = .TRUE. ) or 02449 chopping ( RND = .FALSE. ) occurs in addition. This may not 02450 02451 be a reliable guide to the way in which the machine performs 02452 02453 its arithmetic. 02454 02455 IEEE1 (output) LOGICAL 02456 Specifies whether rounding appears to be done in the IEEE 02457 'round to nearest' style. 02458 02459 Further Details 02460 =============== 02461 02462 The routine is based on the routine ENVRON by Malcolm and 02463 incorporates suggestions by Gentleman and Marovich. See 02464 02465 Malcolm M. A. (1972) Algorithms to reveal properties of 02466 floating-point arithmetic. Comms. of the ACM, 15, 949-951. 02467 02468 Gentleman W. M. and Marovich S. B. (1974) More on algorithms 02469 that reveal properties of floating point arithmetic units. 02470 Comms. of the ACM, 17, 276-277. 02471 02472 ===================================================================== 02473 */ 02474 /* Initialized data */ 02475 static logical first = TRUE_; 02476 /* System generated locals */ 02477 real r__1, r__2; 02478 /* Local variables */ 02479 static logical lrnd; 02480 static real a, b, c, f; 02481 static integer lbeta; 02482 static real savec; 02483 static logical lieee1; 02484 static real t1, t2; 02485 extern doublereal slamc3_(real *, real *); 02486 static integer lt; 02487 static real one, qtr; 02488 02489 02490 02491 if (first) { 02492 first = FALSE_; 02493 one = 1.f; 02494 02495 /* LBETA, LIEEE1, LT and LRND are the local values of BE 02496 TA, 02497 IEEE1, T and RND. 02498 02499 Throughout this routine we use the function SLAMC3 to ens 02500 ure 02501 that relevant values are stored and not held in registers, 02502 or 02503 are not affected by optimizers. 02504 02505 Compute a = 2.0**m with the smallest positive integer m s 02506 uch 02507 that 02508 02509 fl( a + 1.0 ) = a. */ 02510 02511 a = 1.f; 02512 c = 1.f; 02513 02514 /* + WHILE( C.EQ.ONE )LOOP */ 02515 L10: 02516 if (c == one) { 02517 a *= 2; 02518 c = slamc3_(&a, &one); 02519 r__1 = -(doublereal)a; 02520 c = slamc3_(&c, &r__1); 02521 goto L10; 02522 } 02523 /* + END WHILE 02524 02525 Now compute b = 2.0**m with the smallest positive integer 02526 m 02527 such that 02528 02529 fl( a + b ) .gt. a. */ 02530 02531 b = 1.f; 02532 c = slamc3_(&a, &b); 02533 02537 printf("\n"); 02538 02539 /* + WHILE( C.EQ.A )LOOP */ 02540 L20: 02541 if (c == a) { 02542 b *= 2; 02543 c = slamc3_(&a, &b); 02544 goto L20; 02545 } 02546 /* + END WHILE 02547 02548 Now compute the base. a and c are neighbouring floating po 02549 int 02550 numbers in the interval ( beta**t, beta**( t + 1 ) ) and 02551 so 02552 their difference is beta. Adding 0.25 to c is to ensure that 02553 it 02554 is truncated to beta and not ( beta - 1 ). */ 02555 02556 qtr = one / 4; 02557 savec = c; 02558 r__1 = -(doublereal)a; 02559 c = slamc3_(&c, &r__1); 02560 lbeta = static_cast<integer>(c + qtr); 02561 02562 /* Now determine whether rounding or chopping occurs, by addin 02563 g a 02564 bit less than beta/2 and a bit more than beta/2 to 02565 a. */ 02566 02567 b = (real) lbeta; 02568 r__1 = b / 2; 02569 r__2 = -(doublereal)b / 100; 02570 f = slamc3_(&r__1, &r__2); 02571 c = slamc3_(&f, &a); 02572 if (c == a) { 02573 lrnd = TRUE_; 02574 } else { 02575 lrnd = FALSE_; 02576 } 02577 r__1 = b / 2; 02578 r__2 = b / 100; 02579 f = slamc3_(&r__1, &r__2); 02580 c = slamc3_(&f, &a); 02581 if (lrnd && c == a) { 02582 lrnd = FALSE_; 02583 } 02584 02585 /* Try and decide whether rounding is done in the IEEE 'round 02586 to 02587 nearest' style. B/2 is half a unit in the last place of the 02588 two 02589 numbers A and SAVEC. Furthermore, A is even, i.e. has last 02590 bit 02591 zero, and SAVEC is odd. Thus adding B/2 to A should not cha 02592 nge 02593 A, but adding B/2 to SAVEC should change SAVEC. */ 02594 02595 r__1 = b / 2; 02596 t1 = slamc3_(&r__1, &a); 02597 r__1 = b / 2; 02598 t2 = slamc3_(&r__1, &savec); 02599 lieee1 = t1 == a && t2 > savec && lrnd; 02600 02601 /* Now find the mantissa, t. It should be the integer part 02602 of 02603 log to the base beta of a, however it is safer to determine 02604 t 02605 by powering. So we find t as the smallest positive integer 02606 for 02607 which 02608 02609 fl( beta**t + 1.0 ) = 1.0. */ 02610 02611 lt = 0; 02612 a = 1.f; 02613 c = 1.f; 02614 02615 /* + WHILE( C.EQ.ONE )LOOP */ 02616 L30: 02617 if (c == one) { 02618 ++lt; 02619 a *= lbeta; 02620 c = slamc3_(&a, &one); 02621 r__1 = -(doublereal)a; 02622 c = slamc3_(&c, &r__1); 02623 goto L30; 02624 } 02625 /* + END WHILE */ 02626 02627 } 02628 02629 *beta = lbeta; 02630 *t = lt; 02631 *rnd = lrnd; 02632 *ieee1 = lieee1; 02633 return 0; 02634 02635 /* End of SLAMC1 */ 02636 02637 } /* slamc1_ */
int slamc2_ | ( | integer * | beta, | |
integer * | t, | |||
logical * | rnd, | |||
real * | eps, | |||
integer * | emin, | |||
real * | rmin, | |||
integer * | emax, | |||
real * | rmax | |||
) |
Definition at line 2641 of file lapackblas.cpp.
References abs, dabs, f2cmax, f2cmin, FALSE_, integer, pow_ri(), slamc1_(), slamc3_(), slamc4_(), slamc5_(), and TRUE_.
Referenced by slamch_().
02643 { 02644 /* -- LAPACK auxiliary routine (version 3.0) -- 02645 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02646 Courant Institute, Argonne National Lab, and Rice University 02647 October 31, 1992 02648 02649 02650 Purpose 02651 ======= 02652 02653 SLAMC2 determines the machine parameters specified in its argument 02654 list. 02655 02656 Arguments 02657 ========= 02658 02659 BETA (output) INTEGER 02660 The base of the machine. 02661 02662 T (output) INTEGER 02663 The number of ( BETA ) digits in the mantissa. 02664 02665 RND (output) LOGICAL 02666 Specifies whether proper rounding ( RND = .TRUE. ) or 02667 chopping ( RND = .FALSE. ) occurs in addition. This may not 02668 02669 be a reliable guide to the way in which the machine performs 02670 02671 its arithmetic. 02672 02673 EPS (output) REAL 02674 The smallest positive number such that 02675 02676 fl( 1.0 - EPS ) .LT. 1.0, 02677 02678 where fl denotes the computed value. 02679 02680 EMIN (output) INTEGER 02681 The minimum exponent before (gradual) underflow occurs. 02682 02683 RMIN (output) REAL 02684 The smallest normalized number for the machine, given by 02685 BASE**( EMIN - 1 ), where BASE is the floating point value 02686 02687 of BETA. 02688 02689 EMAX (output) INTEGER 02690 The maximum exponent before overflow occurs. 02691 02692 RMAX (output) REAL 02693 The largest positive number for the machine, given by 02694 BASE**EMAX * ( 1 - EPS ), where BASE is the floating point 02695 02696 value of BETA. 02697 02698 Further Details 02699 =============== 02700 02701 The computation of EPS is based on a routine PARANOIA by 02702 W. Kahan of the University of California at Berkeley. 02703 02704 ===================================================================== 02705 */ 02706 /* Table of constant values */ 02707 // static integer c__1 = 1; //not used in this function 02708 02709 /* Initialized data */ 02710 static logical first = TRUE_; 02711 static logical iwarn = FALSE_; 02712 /* System generated locals */ 02713 integer i__1; 02714 real r__1, r__2, r__3, r__4, r__5; 02715 /* Builtin functions */ 02716 double pow_ri(real *, integer *); 02717 /* Local variables */ 02718 static logical ieee; 02719 static real half; 02720 static logical lrnd; 02721 static real leps, zero, a, b, c; 02722 static integer i, lbeta; 02723 static real rbase; 02724 static integer lemin, lemax, gnmin; 02725 static real small; 02726 static integer gpmin; 02727 static real third, lrmin, lrmax, sixth; 02728 static logical lieee1; 02729 extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, 02730 logical *); 02731 extern doublereal slamc3_(real *, real *); 02732 extern /* Subroutine */ int slamc4_(integer *, real *, integer *), 02733 slamc5_(integer *, integer *, integer *, logical *, integer *, 02734 real *); 02735 static integer lt, ngnmin, ngpmin; 02736 static real one, two; 02737 02738 02739 02740 if (first) { 02741 first = FALSE_; 02742 zero = 0.f; 02743 one = 1.f; 02744 two = 2.f; 02745 02746 /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values 02747 of 02748 BETA, T, RND, EPS, EMIN and RMIN. 02749 02750 Throughout this routine we use the function SLAMC3 to ens 02751 ure 02752 that relevant values are stored and not held in registers, 02753 or 02754 are not affected by optimizers. 02755 02756 SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. 02757 */ 02758 02759 slamc1_(&lbeta, <, &lrnd, &lieee1); 02760 02761 /* Start to find EPS. */ 02762 02763 b = (real) lbeta; 02764 i__1 = -lt; 02765 a = pow_ri(&b, &i__1); 02766 leps = a; 02767 02768 /* Try some tricks to see whether or not this is the correct E 02769 PS. */ 02770 02771 b = two / 3; 02772 half = one / 2; 02773 r__1 = -(doublereal)half; 02774 sixth = slamc3_(&b, &r__1); 02775 third = slamc3_(&sixth, &sixth); 02776 r__1 = -(doublereal)half; 02777 b = slamc3_(&third, &r__1); 02778 b = slamc3_(&b, &sixth); 02779 b = dabs(b); 02780 if (b < leps) { 02781 b = leps; 02782 } 02783 02784 leps = 1.f; 02785 02786 /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ 02787 L10: 02788 if (leps > b && b > zero) { 02789 leps = b; 02790 r__1 = half * leps; 02791 /* Computing 5th power */ 02792 r__3 = two, r__4 = r__3, r__3 *= r__3; 02793 /* Computing 2nd power */ 02794 r__5 = leps; 02795 r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); 02796 c = slamc3_(&r__1, &r__2); 02797 r__1 = -(doublereal)c; 02798 c = slamc3_(&half, &r__1); 02799 b = slamc3_(&half, &c); 02800 r__1 = -(doublereal)b; 02801 c = slamc3_(&half, &r__1); 02802 b = slamc3_(&half, &c); 02803 goto L10; 02804 } 02805 /* + END WHILE */ 02806 02807 if (a < leps) { 02808 leps = a; 02809 } 02810 02811 /* Computation of EPS complete. 02812 02813 Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3 02814 )). 02815 Keep dividing A by BETA until (gradual) underflow occurs. T 02816 his 02817 is detected when we cannot recover the previous A. */ 02818 02819 rbase = one / lbeta; 02820 small = one; 02821 for (i = 1; i <= 3; ++i) { 02822 r__1 = small * rbase; 02823 small = slamc3_(&r__1, &zero); 02824 /* L20: */ 02825 } 02826 a = slamc3_(&one, &small); 02827 slamc4_(&ngpmin, &one, &lbeta); 02828 r__1 = -(doublereal)one; 02829 slamc4_(&ngnmin, &r__1, &lbeta); 02830 slamc4_(&gpmin, &a, &lbeta); 02831 r__1 = -(doublereal)a; 02832 slamc4_(&gnmin, &r__1, &lbeta); 02833 ieee = FALSE_; 02834 02835 if (ngpmin == ngnmin && gpmin == gnmin) { 02836 if (ngpmin == gpmin) { 02837 lemin = ngpmin; 02838 /* ( Non twos-complement machines, no gradual under 02839 flow; 02840 e.g., VAX ) */ 02841 } else if (gpmin - ngpmin == 3) { 02842 lemin = ngpmin - 1 + lt; 02843 ieee = TRUE_; 02844 /* ( Non twos-complement machines, with gradual und 02845 erflow; 02846 e.g., IEEE standard followers ) */ 02847 } else { 02848 lemin = f2cmin(ngpmin,gpmin); 02849 /* ( A guess; no known machine ) */ 02850 iwarn = TRUE_; 02851 } 02852 02853 } else if (ngpmin == gpmin && ngnmin == gnmin) { 02854 if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { 02855 lemin = f2cmax(ngpmin,ngnmin); 02856 /* ( Twos-complement machines, no gradual underflow 02857 ; 02858 e.g., CYBER 205 ) */ 02859 } else { 02860 lemin = f2cmin(ngpmin,ngnmin); 02861 /* ( A guess; no known machine ) */ 02862 iwarn = TRUE_; 02863 } 02864 02865 } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) 02866 { 02867 if (gpmin - f2cmin(ngpmin,ngnmin) == 3) { 02868 lemin = f2cmax(ngpmin,ngnmin) - 1 + lt; 02869 /* ( Twos-complement machines with gradual underflo 02870 w; 02871 no known machine ) */ 02872 } else { 02873 lemin = f2cmin(ngpmin,ngnmin); 02874 /* ( A guess; no known machine ) */ 02875 iwarn = TRUE_; 02876 } 02877 02878 } else { 02879 /* Computing MIN */ 02880 i__1 = f2cmin(ngpmin,ngnmin), i__1 = f2cmin(i__1,gpmin); 02881 lemin = f2cmin(i__1,gnmin); 02882 /* ( A guess; no known machine ) */ 02883 iwarn = TRUE_; 02884 } 02885 /* ** 02886 Comment out this if block if EMIN is ok */ 02887 if (iwarn) { 02888 first = TRUE_; 02889 printf("\n\n WARNING. The value EMIN may be incorrect:- "); 02890 printf("EMIN = %8i\n",lemin); 02891 printf("If, after inspection, the value EMIN looks acceptable"); 02892 printf("please comment out \n the IF block as marked within the"); 02893 printf("code of routine SLAMC2, \n otherwise supply EMIN"); 02894 printf("explicitly.\n"); 02895 } 02896 /* ** 02897 02898 Assume IEEE arithmetic if we found denormalised numbers abo 02899 ve, 02900 or if arithmetic seems to round in the IEEE style, determi 02901 ned 02902 in routine SLAMC1. A true IEEE machine should have both thi 02903 ngs 02904 true; however, faulty machines may have one or the other. */ 02905 02906 ieee = ieee || lieee1; 02907 02908 /* Compute RMIN by successive division by BETA. We could comp 02909 ute 02910 RMIN as BASE**( EMIN - 1 ), but some machines underflow dur 02911 ing 02912 this computation. */ 02913 02914 lrmin = 1.f; 02915 i__1 = 1 - lemin; 02916 for (i = 1; i <= 1-lemin; ++i) { 02917 r__1 = lrmin * rbase; 02918 lrmin = slamc3_(&r__1, &zero); 02919 /* L30: */ 02920 } 02921 02922 /* Finally, call SLAMC5 to compute EMAX and RMAX. */ 02923 02924 slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); 02925 } 02926 02927 *beta = lbeta; 02928 *t = lt; 02929 *rnd = lrnd; 02930 *eps = leps; 02931 *emin = lemin; 02932 *rmin = lrmin; 02933 *emax = lemax; 02934 *rmax = lrmax; 02935 02936 return 0; 02937 02938 02939 /* End of SLAMC2 */ 02940 02941 } /* slamc2_ */
doublereal slamc3_ | ( | real * | a, | |
real * | b | |||
) |
Definition at line 2945 of file lapackblas.cpp.
Referenced by slaed3_(), slaed9_(), slamc1_(), slamc2_(), slamc4_(), and slamc5_().
02946 { 02947 /* -- LAPACK auxiliary routine (version 3.0) -- 02948 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02949 Courant Institute, Argonne National Lab, and Rice University 02950 October 31, 1992 02951 02952 02953 Purpose 02954 ======= 02955 02956 SLAMC3 is intended to force A and B to be stored prior to doing 02957 02958 the addition of A and B , for use in situations where optimizers 02959 02960 might hold one of these in a register. 02961 02962 Arguments 02963 ========= 02964 02965 A, B (input) REAL 02966 The values A and B. 02967 02968 ===================================================================== 02969 */ 02970 /* >>Start of File<< 02971 System generated locals */ 02972 real ret_val; 02973 02974 02975 02976 ret_val = *a + *b; 02977 02978 return ret_val; 02979 02980 /* End of SLAMC3 */ 02981 02982 } /* slamc3_ */
Definition at line 2986 of file lapackblas.cpp.
References integer, and slamc3_().
Referenced by slamc2_().
02987 { 02988 /* -- LAPACK auxiliary routine (version 3.0) -- 02989 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02990 Courant Institute, Argonne National Lab, and Rice University 02991 October 31, 1992 02992 02993 02994 Purpose 02995 ======= 02996 02997 SLAMC4 is a service routine for SLAMC2. 02998 02999 Arguments 03000 ========= 03001 03002 EMIN (output) EMIN 03003 The minimum exponent before (gradual) underflow, computed by 03004 03005 setting A = START and dividing by BASE until the previous A 03006 can not be recovered. 03007 03008 START (input) REAL 03009 The starting point for determining EMIN. 03010 03011 BASE (input) INTEGER 03012 The base of the machine. 03013 03014 ===================================================================== 03015 */ 03016 /* System generated locals */ 03017 integer i__1; 03018 real r__1; 03019 /* Local variables */ 03020 static real zero, a; 03021 static integer i; 03022 static real rbase, b1, b2, c1, c2, d1, d2; 03023 extern doublereal slamc3_(real *, real *); 03024 static real one; 03025 03026 03027 03028 a = *start; 03029 one = 1.f; 03030 rbase = one / *base; 03031 zero = 0.f; 03032 *emin = 1; 03033 r__1 = a * rbase; 03034 b1 = slamc3_(&r__1, &zero); 03035 c1 = a; 03036 c2 = a; 03037 d1 = a; 03038 d2 = a; 03039 /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. 03040 $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ 03041 L10: 03042 if (c1 == a && c2 == a && d1 == a && d2 == a) { 03043 --(*emin); 03044 a = b1; 03045 r__1 = a / *base; 03046 b1 = slamc3_(&r__1, &zero); 03047 r__1 = b1 * *base; 03048 c1 = slamc3_(&r__1, &zero); 03049 d1 = zero; 03050 i__1 = *base; 03051 for (i = 1; i <= *base; ++i) { 03052 d1 += b1; 03053 /* L20: */ 03054 } 03055 r__1 = a * rbase; 03056 b2 = slamc3_(&r__1, &zero); 03057 r__1 = b2 / rbase; 03058 c2 = slamc3_(&r__1, &zero); 03059 d2 = zero; 03060 i__1 = *base; 03061 for (i = 1; i <= *base; ++i) { 03062 d2 += b2; 03063 /* L30: */ 03064 } 03065 goto L10; 03066 } 03067 /* + END WHILE */ 03068 03069 return 0; 03070 03071 /* End of SLAMC4 */ 03072 03073 } /* slamc4_ */
int slamc5_ | ( | integer * | beta, | |
integer * | p, | |||
integer * | emin, | |||
logical * | ieee, | |||
integer * | emax, | |||
real * | rmax | |||
) |
Definition at line 3077 of file lapackblas.cpp.
References integer, slamc3_(), and y.
Referenced by slamc2_().
03079 { 03080 /* -- LAPACK auxiliary routine (version 3.0) -- 03081 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 03082 Courant Institute, Argonne National Lab, and Rice University 03083 October 31, 1992 03084 03085 03086 Purpose 03087 ======= 03088 03089 SLAMC5 attempts to compute RMAX, the largest machine floating-point 03090 number, without overflow. It assumes that EMAX + abs(EMIN) sum 03091 approximately to a power of 2. It will fail on machines where this 03092 assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 03093 03094 EMAX = 28718). It will also fail if the value supplied for EMIN is 03095 too large (i.e. too close to zero), probably with overflow. 03096 03097 Arguments 03098 ========= 03099 03100 BETA (input) INTEGER 03101 The base of floating-point arithmetic. 03102 03103 P (input) INTEGER 03104 The number of base BETA digits in the mantissa of a 03105 floating-point value. 03106 03107 EMIN (input) INTEGER 03108 The minimum exponent before (gradual) underflow. 03109 03110 IEEE (input) LOGICAL 03111 A logical flag specifying whether or not the arithmetic 03112 system is thought to comply with the IEEE standard. 03113 03114 EMAX (output) INTEGER 03115 The largest exponent before overflow 03116 03117 RMAX (output) REAL 03118 The largest machine floating-point number. 03119 03120 ===================================================================== 03121 03122 03123 03124 First compute LEXP and UEXP, two powers of 2 that bound 03125 abs(EMIN). We then assume that EMAX + abs(EMIN) will sum 03126 approximately to the bound that is closest to abs(EMIN). 03127 (EMAX is the exponent of the required number RMAX). */ 03128 /* Table of constant values */ 03129 static real c_b5 = 0.f; 03130 03131 /* System generated locals */ 03132 integer i__1; 03133 real r__1; 03134 /* Local variables */ 03135 static integer lexp; 03136 static real oldy; 03137 static integer uexp, i; 03138 static real y, z; 03139 static integer nbits; 03140 extern doublereal slamc3_(real *, real *); 03141 static real recbas; 03142 static integer exbits, expsum, try__; 03143 03144 03145 03146 lexp = 1; 03147 exbits = 1; 03148 L10: 03149 try__ = lexp << 1; 03150 if (try__ <= -(*emin)) { 03151 lexp = try__; 03152 ++exbits; 03153 goto L10; 03154 } 03155 if (lexp == -(*emin)) { 03156 uexp = lexp; 03157 } else { 03158 uexp = try__; 03159 ++exbits; 03160 } 03161 03162 /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater 03163 than or equal to EMIN. EXBITS is the number of bits needed to 03164 store the exponent. */ 03165 03166 if (uexp + *emin > -lexp - *emin) { 03167 expsum = lexp << 1; 03168 } else { 03169 expsum = uexp << 1; 03170 } 03171 03172 /* EXPSUM is the exponent range, approximately equal to 03173 EMAX - EMIN + 1 . */ 03174 03175 *emax = expsum + *emin - 1; 03176 nbits = exbits + 1 + *p; 03177 03178 /* NBITS is the total number of bits needed to store a 03179 floating-point number. */ 03180 03181 if (nbits % 2 == 1 && *beta == 2) { 03182 03183 /* Either there are an odd number of bits used to store a 03184 floating-point number, which is unlikely, or some bits are 03185 03186 not used in the representation of numbers, which is possible 03187 , 03188 (e.g. Cray machines) or the mantissa has an implicit bit, 03189 (e.g. IEEE machines, Dec Vax machines), which is perhaps the 03190 03191 most likely. We have to assume the last alternative. 03192 If this is true, then we need to reduce EMAX by one because 03193 03194 there must be some way of representing zero in an implicit-b 03195 it 03196 system. On machines like Cray, we are reducing EMAX by one 03197 03198 unnecessarily. */ 03199 03200 --(*emax); 03201 } 03202 03203 if (*ieee) { 03204 03205 /* Assume we are on an IEEE machine which reserves one exponent 03206 03207 for infinity and NaN. */ 03208 03209 --(*emax); 03210 } 03211 03212 /* Now create RMAX, the largest machine number, which should 03213 be equal to (1.0 - BETA**(-P)) * BETA**EMAX . 03214 03215 First compute 1.0 - BETA**(-P), being careful that the 03216 result is less than 1.0 . */ 03217 03218 recbas = 1.f / *beta; 03219 z = *beta - 1.f; 03220 y = 0.f; 03221 i__1 = *p; 03222 for (i = 1; i <= *p; ++i) { 03223 z *= recbas; 03224 if (y < 1.f) { 03225 oldy = y; 03226 } 03227 y = slamc3_(&y, &z); 03228 /* L20: */ 03229 } 03230 if (y >= 1.f) { 03231 y = oldy; 03232 } 03233 03234 /* Now multiply by BETA**EMAX to get RMAX. */ 03235 03236 i__1 = *emax; 03237 for (i = 1; i <= *emax; ++i) { 03238 r__1 = y * *beta; 03239 y = slamc3_(&r__1, &c_b5); 03240 /* L30: */ 03241 } 03242 03243 *rmax = y; 03244 return 0; 03245 03246 /* End of SLAMC5 */ 03247 03248 } /* slamc5_ */
doublereal slamch_ | ( | const char * | cmach | ) |
Definition at line 2295 of file lapackblas.cpp.
References FALSE_, integer, lsame_(), pow_ri(), slamc2_(), t, and TRUE_.
Referenced by sbdsqr_(), sgesvd_(), slaed2_(), slaed4_(), slaed6_(), slaed8_(), slarfg_(), slartg_(), slascl_(), slasq1_(), slasq2_(), slasq3_(), slasq6_(), slasv2_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), and ssyev_().
02296 { 02297 /* -- LAPACK auxiliary routine (version 3.0) -- 02298 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 02299 Courant Institute, Argonne National Lab, and Rice University 02300 October 31, 1992 02301 02302 02303 Purpose 02304 ======= 02305 02306 SLAMCH determines single precision machine parameters. 02307 02308 Arguments 02309 ========= 02310 02311 CMACH (input) CHARACTER*1 02312 Specifies the value to be returned by SLAMCH: 02313 = 'E' or 'e', SLAMCH := eps 02314 = 'S' or 's , SLAMCH := sfmin 02315 = 'B' or 'b', SLAMCH := base 02316 = 'P' or 'p', SLAMCH := eps*base 02317 = 'N' or 'n', SLAMCH := t 02318 = 'R' or 'r', SLAMCH := rnd 02319 = 'M' or 'm', SLAMCH := emin 02320 = 'U' or 'u', SLAMCH := rmin 02321 = 'L' or 'l', SLAMCH := emax 02322 = 'O' or 'o', SLAMCH := rmax 02323 02324 where 02325 02326 eps = relative machine precision 02327 sfmin = safe minimum, such that 1/sfmin does not overflow 02328 base = base of the machine 02329 prec = eps*base 02330 t = number of (base) digits in the mantissa 02331 rnd = 1.0 when rounding occurs in addition, 0.0 otherwise 02332 emin = minimum exponent before (gradual) underflow 02333 rmin = underflow threshold - base**(emin-1) 02334 emax = largest exponent before overflow 02335 rmax = overflow threshold - (base**emax)*(1-eps) 02336 02337 ===================================================================== 02338 */ 02339 /* >>Start of File<< 02340 Initialized data */ 02341 static logical first = TRUE_; 02342 /* System generated locals */ 02343 integer i__1; 02344 real ret_val; 02345 /* Builtin functions */ 02346 double pow_ri(real *, integer *); 02347 /* Local variables */ 02348 static real base; 02349 static integer beta; 02350 static real emin, prec, emax; 02351 static integer imin, imax; 02352 static logical lrnd; 02353 static real rmin, rmax, t, rmach; 02354 extern logical lsame_(const char *, const char *); 02355 static real small, sfmin; 02356 extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real 02357 *, integer *, real *, integer *, real *); 02358 static integer it; 02359 static real rnd, eps; 02360 02361 02362 02363 if (first) { 02364 first = FALSE_; 02365 slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); 02366 base = (real) beta; 02367 t = (real) it; 02368 if (lrnd) { 02369 rnd = 1.f; 02370 i__1 = 1 - it; 02371 eps = pow_ri(&base, &i__1) / 2; 02372 } else { 02373 rnd = 0.f; 02374 i__1 = 1 - it; 02375 eps = pow_ri(&base, &i__1); 02376 } 02377 prec = eps * base; 02378 emin = (real) imin; 02379 emax = (real) imax; 02380 sfmin = rmin; 02381 small = 1.f / rmax; 02382 if (small >= sfmin) { 02383 02384 /* Use SMALL plus a bit, to avoid the possibility of rou 02385 nding 02386 causing overflow when computing 1/sfmin. */ 02387 02388 sfmin = small * (eps + 1.f); 02389 } 02390 } 02391 02392 if (lsame_(cmach, "E")) { 02393 rmach = eps; 02394 } else if (lsame_(cmach, "S")) { 02395 rmach = sfmin; 02396 } else if (lsame_(cmach, "B")) { 02397 rmach = base; 02398 } else if (lsame_(cmach, "P")) { 02399 rmach = prec; 02400 } else if (lsame_(cmach, "N")) { 02401 rmach = t; 02402 } else if (lsame_(cmach, "R")) { 02403 rmach = rnd; 02404 } else if (lsame_(cmach, "M")) { 02405 rmach = emin; 02406 } else if (lsame_(cmach, "U")) { 02407 rmach = rmin; 02408 } else if (lsame_(cmach, "L")) { 02409 rmach = emax; 02410 } else if (lsame_(cmach, "O")) { 02411 rmach = rmax; 02412 } 02413 02414 ret_val = rmach; 02415 return ret_val; 02416 02417 /* End of SLAMCH */ 02418 02419 } /* slamch_ */
int slamrg_ | ( | integer * | n1, | |
integer * | n2, | |||
real * | a, | |||
integer * | strd1, | |||
integer * | strd2, | |||
integer * | index | |||
) |
Definition at line 12994 of file lapackblas.cpp.
References integer.
Referenced by slaed1_(), slaed2_(), slaed7_(), and slaed8_().
12996 { 12997 /* -- LAPACK routine (version 3.0) -- 12998 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 12999 Courant Institute, Argonne National Lab, and Rice University 13000 September 30, 1994 13001 13002 13003 Purpose 13004 ======= 13005 13006 SLAMRG will create a permutation list which will merge the elements 13007 of A (which is composed of two independently sorted sets) into a 13008 single set which is sorted in ascending order. 13009 13010 Arguments 13011 ========= 13012 13013 N1 (input) INTEGER 13014 N2 (input) INTEGER 13015 These arguements contain the respective lengths of the two 13016 sorted lists to be merged. 13017 13018 A (input) REAL array, dimension (N1+N2) 13019 The first N1 elements of A contain a list of numbers which 13020 are sorted in either ascending or descending order. Likewise 13021 for the final N2 elements. 13022 13023 STRD1 (input) INTEGER 13024 STRD2 (input) INTEGER 13025 These are the strides to be taken through the array A. 13026 Allowable strides are 1 and -1. They indicate whether a 13027 subset of A is sorted in ascending (STRDx = 1) or descending 13028 (STRDx = -1) order. 13029 13030 INDEX (output) INTEGER array, dimension (N1+N2) 13031 On exit this array will contain a permutation such that 13032 if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be 13033 sorted in ascending order. 13034 13035 ===================================================================== 13036 13037 13038 Parameter adjustments */ 13039 /* System generated locals */ 13040 integer i__1; 13041 /* Local variables */ 13042 static integer i__, ind1, ind2, n1sv, n2sv; 13043 13044 --index; 13045 --a; 13046 13047 /* Function Body */ 13048 n1sv = *n1; 13049 n2sv = *n2; 13050 if (*strd1 > 0) { 13051 ind1 = 1; 13052 } else { 13053 ind1 = *n1; 13054 } 13055 if (*strd2 > 0) { 13056 ind2 = *n1 + 1; 13057 } else { 13058 ind2 = *n1 + *n2; 13059 } 13060 i__ = 1; 13061 /* while ( (N1SV > 0) & (N2SV > 0) ) */ 13062 L10: 13063 if (n1sv > 0 && n2sv > 0) { 13064 if (a[ind1] <= a[ind2]) { 13065 index[i__] = ind1; 13066 ++i__; 13067 ind1 += *strd1; 13068 --n1sv; 13069 } else { 13070 index[i__] = ind2; 13071 ++i__; 13072 ind2 += *strd2; 13073 --n2sv; 13074 } 13075 goto L10; 13076 } 13077 /* end while */ 13078 if (n1sv == 0) { 13079 i__1 = n2sv; 13080 for (n1sv = 1; n1sv <= i__1; ++n1sv) { 13081 index[i__] = ind2; 13082 ++i__; 13083 ind2 += *strd2; 13084 /* L20: */ 13085 } 13086 } else { 13087 /* N2SV .EQ. 0 */ 13088 i__1 = n1sv; 13089 for (n2sv = 1; n2sv <= i__1; ++n2sv) { 13090 index[i__] = ind1; 13091 ++i__; 13092 ind1 += *strd1; 13093 /* L30: */ 13094 } 13095 } 13096 13097 return 0; 13098 13099 /* End of SLAMRG */ 13100 13101 } /* slamrg_ */
doublereal slange_ | ( | const char * | norm, | |
integer * | m, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | work | |||
) |
Definition at line 20853 of file lapackblas.cpp.
References a_ref, dabs, df2cmax, f2cmin, integer, lsame_(), slassq_(), and sqrt().
Referenced by sgesvd_().
20855 { 20856 /* -- LAPACK auxiliary routine (version 3.0) -- 20857 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 20858 Courant Institute, Argonne National Lab, and Rice University 20859 October 31, 1992 20860 20861 20862 Purpose 20863 ======= 20864 20865 SLANGE returns the value of the one norm, or the Frobenius norm, or 20866 the infinity norm, or the element of largest absolute value of a 20867 real matrix A. 20868 20869 Description 20870 =========== 20871 20872 SLANGE returns the value 20873 20874 SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' 20875 ( 20876 ( norm1(A), NORM = '1', 'O' or 'o' 20877 ( 20878 ( normI(A), NORM = 'I' or 'i' 20879 ( 20880 ( normF(A), NORM = 'F', 'f', 'E' or 'e' 20881 20882 where norm1 denotes the one norm of a matrix (maximum column sum), 20883 normI denotes the infinity norm of a matrix (maximum row sum) and 20884 normF denotes the Frobenius norm of a matrix (square root of sum of 20885 squares). Note that max(abs(A(i,j))) is not a matrix norm. 20886 20887 Arguments 20888 ========= 20889 20890 NORM (input) CHARACTER*1 20891 Specifies the value to be returned in SLANGE as described 20892 above. 20893 20894 M (input) INTEGER 20895 The number of rows of the matrix A. M >= 0. When M = 0, 20896 SLANGE is set to zero. 20897 20898 N (input) INTEGER 20899 The number of columns of the matrix A. N >= 0. When N = 0, 20900 SLANGE is set to zero. 20901 20902 A (input) REAL array, dimension (LDA,N) 20903 The m by n matrix A. 20904 20905 LDA (input) INTEGER 20906 The leading dimension of the array A. LDA >= max(M,1). 20907 20908 WORK (workspace) REAL array, dimension (LWORK), 20909 where LWORK >= M when NORM = 'I'; otherwise, WORK is not 20910 referenced. 20911 20912 ===================================================================== 20913 20914 20915 Parameter adjustments */ 20916 /* Table of constant values */ 20917 static integer c__1 = 1; 20918 20919 /* System generated locals */ 20920 integer a_dim1, a_offset, i__1, i__2; 20921 real ret_val, r__1, r__2, r__3; 20922 /* Builtin functions */ 20923 //double sqrt(doublereal); 20924 /* Local variables */ 20925 static integer i__, j; 20926 static real scale; 20927 extern logical lsame_(const char *, const char *); 20928 static real value; 20929 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 20930 real *); 20931 static real sum; 20932 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 20933 20934 20935 a_dim1 = *lda; 20936 a_offset = 1 + a_dim1 * 1; 20937 a -= a_offset; 20938 --work; 20939 20940 /* Function Body */ 20941 if (f2cmin(*m,*n) == 0) { 20942 value = 0.f; 20943 } else if (lsame_(norm, "M")) { 20944 20945 /* Find max(abs(A(i,j))). */ 20946 20947 value = 0.f; 20948 i__1 = *n; 20949 for (j = 1; j <= i__1; ++j) { 20950 i__2 = *m; 20951 for (i__ = 1; i__ <= i__2; ++i__) { 20952 /* Computing MAX */ 20953 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1)); 20954 value = df2cmax(r__2,r__3); 20955 /* L10: */ 20956 } 20957 /* L20: */ 20958 } 20959 } else if (lsame_(norm, "O") || *(unsigned char *) 20960 norm == '1') { 20961 20962 /* Find norm1(A). */ 20963 20964 value = 0.f; 20965 i__1 = *n; 20966 for (j = 1; j <= i__1; ++j) { 20967 sum = 0.f; 20968 i__2 = *m; 20969 for (i__ = 1; i__ <= i__2; ++i__) { 20970 sum += (r__1 = a_ref(i__, j), dabs(r__1)); 20971 /* L30: */ 20972 } 20973 value = df2cmax(value,sum); 20974 /* L40: */ 20975 } 20976 } else if (lsame_(norm, "I")) { 20977 20978 /* Find normI(A). */ 20979 20980 i__1 = *m; 20981 for (i__ = 1; i__ <= i__1; ++i__) { 20982 work[i__] = 0.f; 20983 /* L50: */ 20984 } 20985 i__1 = *n; 20986 for (j = 1; j <= i__1; ++j) { 20987 i__2 = *m; 20988 for (i__ = 1; i__ <= i__2; ++i__) { 20989 work[i__] += (r__1 = a_ref(i__, j), dabs(r__1)); 20990 /* L60: */ 20991 } 20992 /* L70: */ 20993 } 20994 value = 0.f; 20995 i__1 = *m; 20996 for (i__ = 1; i__ <= i__1; ++i__) { 20997 /* Computing MAX */ 20998 r__1 = value, r__2 = work[i__]; 20999 value = df2cmax(r__1,r__2); 21000 /* L80: */ 21001 } 21002 } else if (lsame_(norm, "F") || lsame_(norm, "E")) { 21003 21004 /* Find normF(A). */ 21005 21006 scale = 0.f; 21007 sum = 1.f; 21008 i__1 = *n; 21009 for (j = 1; j <= i__1; ++j) { 21010 slassq_(m, &a_ref(1, j), &c__1, &scale, &sum); 21011 /* L90: */ 21012 } 21013 value = scale * sqrt(sum); 21014 } 21015 21016 ret_val = value; 21017 return ret_val; 21018 21019 /* End of SLANGE */ 21020 21021 } /* slange_ */
doublereal slanst_ | ( | const char * | norm, | |
integer * | n, | |||
real * | d__, | |||
real * | e | |||
) |
Definition at line 3253 of file lapackblas.cpp.
References c__1, dabs, df2cmax, integer, lsame_(), slassq_(), and sqrt().
Referenced by sstedc_(), ssteqr_(), ssterf_(), and sstevd_().
03254 { 03255 /* -- LAPACK auxiliary routine (version 3.0) -- 03256 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 03257 Courant Institute, Argonne National Lab, and Rice University 03258 February 29, 1992 03259 03260 03261 Purpose 03262 ======= 03263 03264 SLANST returns the value of the one norm, or the Frobenius norm, or 03265 the infinity norm, or the element of largest absolute value of a 03266 real symmetric tridiagonal matrix A. 03267 03268 Description 03269 =========== 03270 03271 SLANST returns the value 03272 03273 SLANST = ( f2cmax(abs(A(i,j))), NORM = 'M' or 'm' 03274 ( 03275 ( norm1(A), NORM = '1', 'O' or 'o' 03276 ( 03277 ( normI(A), NORM = 'I' or 'i' 03278 ( 03279 ( normF(A), NORM = 'F', 'f', 'E' or 'e' 03280 03281 where norm1 denotes the one norm of a matrix (maximum column sum), 03282 normI denotes the infinity norm of a matrix (maximum row sum) and 03283 normF denotes the Frobenius norm of a matrix (square root of sum of 03284 squares). Note that f2cmax(abs(A(i,j))) is not a matrix norm. 03285 03286 Arguments 03287 ========= 03288 03289 NORM (input) CHARACTER*1 03290 Specifies the value to be returned in SLANST as described 03291 above. 03292 03293 N (input) INTEGER 03294 The order of the matrix A. N >= 0. When N = 0, SLANST is 03295 set to zero. 03296 03297 D (input) REAL array, dimension (N) 03298 The diagonal elements of A. 03299 03300 E (input) REAL array, dimension (N-1) 03301 The (n-1) sub-diagonal or super-diagonal elements of A. 03302 03303 ===================================================================== 03304 03305 03306 Parameter adjustments */ 03307 /* Table of constant values */ 03308 static integer c__1 = 1; 03309 03310 /* System generated locals */ 03311 integer i__1; 03312 real ret_val, r__1, r__2, r__3, r__4, r__5; 03313 /* Builtin functions */ 03314 // double sqrt(doublereal); 03315 /* Local variables */ 03316 static integer i__; 03317 static real scale; 03318 extern logical lsame_(const char *, const char *); 03319 static real anorm; 03320 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 03321 real *); 03322 static real sum; 03323 03324 03325 --e; 03326 --d__; 03327 03328 /* Function Body */ 03329 if (*n <= 0) { 03330 anorm = 0.f; 03331 } else if (lsame_(norm, "M")) { 03332 03333 /* Find f2cmax(abs(A(i,j))). */ 03334 03335 anorm = (r__1 = d__[*n], dabs(r__1)); 03336 i__1 = *n - 1; 03337 for (i__ = 1; i__ <= i__1; ++i__) { 03338 /* Computing MAX */ 03339 r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1)); 03340 anorm = df2cmax(r__2,r__3); 03341 /* Computing MAX */ 03342 r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1)); 03343 anorm = df2cmax(r__2,r__3); 03344 /* L10: */ 03345 } 03346 } else if (lsame_(norm, "O") || *(unsigned char *) 03347 norm == '1' || lsame_(norm, "I")) { 03348 03349 /* Find norm1(A). */ 03350 03351 if (*n == 1) { 03352 anorm = dabs(d__[1]); 03353 } else { 03354 /* Computing MAX */ 03355 r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs( 03356 r__1)) + (r__2 = d__[*n], dabs(r__2)); 03357 anorm = df2cmax(r__3,r__4); 03358 i__1 = *n - 1; 03359 for (i__ = 2; i__ <= i__1; ++i__) { 03360 /* Computing MAX */ 03361 r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 = 03362 e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3)); 03363 anorm = df2cmax(r__4,r__5); 03364 /* L20: */ 03365 } 03366 } 03367 } else if (lsame_(norm, "F") || lsame_(norm, "E")) { 03368 03369 /* Find normF(A). */ 03370 03371 scale = 0.f; 03372 sum = 1.f; 03373 if (*n > 1) { 03374 i__1 = *n - 1; 03375 slassq_(&i__1, &e[1], &c__1, &scale, &sum); 03376 sum *= 2; 03377 } 03378 slassq_(n, &d__[1], &c__1, &scale, &sum); 03379 anorm = scale * sqrt(sum); 03380 } 03381 03382 ret_val = anorm; 03383 return ret_val; 03384 03385 /* End of SLANST */ 03386 03387 } /* slanst_ */
doublereal slansy_ | ( | const char * | norm, | |
char * | uplo, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | work | |||
) |
Definition at line 3392 of file lapackblas.cpp.
References a_ref, c__1, dabs, df2cmax, integer, lsame_(), slassq_(), and sqrt().
Referenced by ssyev_().
03394 { 03395 /* -- LAPACK auxiliary routine (version 3.0) -- 03396 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 03397 Courant Institute, Argonne National Lab, and Rice University 03398 October 31, 1992 03399 03400 03401 Purpose 03402 ======= 03403 03404 SLANSY returns the value of the one norm, or the Frobenius norm, or 03405 the infinity norm, or the element of largest absolute value of a 03406 real symmetric matrix A. 03407 03408 Description 03409 =========== 03410 03411 SLANSY returns the value 03412 03413 SLANSY = ( f2cmax(abs(A(i,j))), NORM = 'M' or 'm' 03414 ( 03415 ( norm1(A), NORM = '1', 'O' or 'o' 03416 ( 03417 ( normI(A), NORM = 'I' or 'i' 03418 ( 03419 ( normF(A), NORM = 'F', 'f', 'E' or 'e' 03420 03421 where norm1 denotes the one norm of a matrix (maximum column sum), 03422 normI denotes the infinity norm of a matrix (maximum row sum) and 03423 normF denotes the Frobenius norm of a matrix (square root of sum of 03424 squares). Note that f2cmax(abs(A(i,j))) is not a matrix norm. 03425 03426 Arguments 03427 ========= 03428 03429 NORM (input) CHARACTER*1 03430 Specifies the value to be returned in SLANSY as described 03431 above. 03432 03433 UPLO (input) CHARACTER*1 03434 Specifies whether the upper or lower triangular part of the 03435 symmetric matrix A is to be referenced. 03436 = 'U': Upper triangular part of A is referenced 03437 = 'L': Lower triangular part of A is referenced 03438 03439 N (input) INTEGER 03440 The order of the matrix A. N >= 0. When N = 0, SLANSY is 03441 set to zero. 03442 03443 A (input) REAL array, dimension (LDA,N) 03444 The symmetric matrix A. If UPLO = 'U', the leading n by n 03445 upper triangular part of A contains the upper triangular part 03446 of the matrix A, and the strictly lower triangular part of A 03447 is not referenced. If UPLO = 'L', the leading n by n lower 03448 triangular part of A contains the lower triangular part of 03449 the matrix A, and the strictly upper triangular part of A is 03450 not referenced. 03451 03452 LDA (input) INTEGER 03453 The leading dimension of the array A. LDA >= f2cmax(N,1). 03454 03455 WORK (workspace) REAL array, dimension (LWORK), 03456 where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, 03457 WORK is not referenced. 03458 03459 ===================================================================== 03460 03461 03462 Parameter adjustments */ 03463 /* Table of constant values */ 03464 static integer c__1 = 1; 03465 03466 /* System generated locals */ 03467 integer a_dim1, a_offset, i__1, i__2; 03468 real ret_val, r__1, r__2, r__3; 03469 /* Builtin functions */ 03470 // double sqrt(doublereal); 03471 /* Local variables */ 03472 static real absa; 03473 static integer i__, j; 03474 static real scale; 03475 extern logical lsame_(const char *, const char *); 03476 static real value; 03477 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 03478 real *); 03479 static real sum; 03480 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 03481 03482 03483 a_dim1 = *lda; 03484 a_offset = 1 + a_dim1 * 1; 03485 a -= a_offset; 03486 --work; 03487 03488 /* Function Body */ 03489 if (*n == 0) { 03490 value = 0.f; 03491 } else if (lsame_(norm, "M")) { 03492 03493 /* Find f2cmax(abs(A(i,j))). */ 03494 03495 value = 0.f; 03496 if (lsame_(uplo, "U")) { 03497 i__1 = *n; 03498 for (j = 1; j <= i__1; ++j) { 03499 i__2 = j; 03500 for (i__ = 1; i__ <= i__2; ++i__) { 03501 /* Computing MAX */ 03502 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1)); 03503 value = df2cmax(r__2,r__3); 03504 /* L10: */ 03505 } 03506 /* L20: */ 03507 } 03508 } else { 03509 i__1 = *n; 03510 for (j = 1; j <= i__1; ++j) { 03511 i__2 = *n; 03512 for (i__ = j; i__ <= i__2; ++i__) { 03513 /* Computing MAX */ 03514 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1)); 03515 value = df2cmax(r__2,r__3); 03516 /* L30: */ 03517 } 03518 /* L40: */ 03519 } 03520 } 03521 } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { 03522 03523 /* Find normI(A) ( = norm1(A), since A is symmetric). */ 03524 03525 value = 0.f; 03526 if (lsame_(uplo, "U")) { 03527 i__1 = *n; 03528 for (j = 1; j <= i__1; ++j) { 03529 sum = 0.f; 03530 i__2 = j - 1; 03531 for (i__ = 1; i__ <= i__2; ++i__) { 03532 absa = (r__1 = a_ref(i__, j), dabs(r__1)); 03533 sum += absa; 03534 work[i__] += absa; 03535 /* L50: */ 03536 } 03537 work[j] = sum + (r__1 = a_ref(j, j), dabs(r__1)); 03538 /* L60: */ 03539 } 03540 i__1 = *n; 03541 for (i__ = 1; i__ <= i__1; ++i__) { 03542 /* Computing MAX */ 03543 r__1 = value, r__2 = work[i__]; 03544 value = df2cmax(r__1,r__2); 03545 /* L70: */ 03546 } 03547 } else { 03548 i__1 = *n; 03549 for (i__ = 1; i__ <= i__1; ++i__) { 03550 work[i__] = 0.f; 03551 /* L80: */ 03552 } 03553 i__1 = *n; 03554 for (j = 1; j <= i__1; ++j) { 03555 sum = work[j] + (r__1 = a_ref(j, j), dabs(r__1)); 03556 i__2 = *n; 03557 for (i__ = j + 1; i__ <= i__2; ++i__) { 03558 absa = (r__1 = a_ref(i__, j), dabs(r__1)); 03559 sum += absa; 03560 work[i__] += absa; 03561 /* L90: */ 03562 } 03563 value = df2cmax(value,sum); 03564 /* L100: */ 03565 } 03566 } 03567 } else if (lsame_(norm, "F") || lsame_(norm, "E")) { 03568 03569 /* Find normF(A). */ 03570 03571 scale = 0.f; 03572 sum = 1.f; 03573 if (lsame_(uplo, "U")) { 03574 i__1 = *n; 03575 for (j = 2; j <= i__1; ++j) { 03576 i__2 = j - 1; 03577 slassq_(&i__2, &a_ref(1, j), &c__1, &scale, &sum); 03578 /* L110: */ 03579 } 03580 } else { 03581 i__1 = *n - 1; 03582 for (j = 1; j <= i__1; ++j) { 03583 i__2 = *n - j; 03584 slassq_(&i__2, &a_ref(j + 1, j), &c__1, &scale, &sum); 03585 /* L120: */ 03586 } 03587 } 03588 sum *= 2; 03589 i__1 = *lda + 1; 03590 slassq_(n, &a[a_offset], &i__1, &scale, &sum); 03591 value = scale * sqrt(sum); 03592 } 03593 03594 ret_val = value; 03595 return ret_val; 03596 03597 /* End of SLANSY */ 03598 03599 } /* slansy_ */
doublereal slapy2_ | ( | real * | x, | |
real * | y | |||
) |
Definition at line 3607 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, and sqrt().
Referenced by slaed2_(), slaed8_(), slarfg_(), ssteqr_(), and ssterf_().
03608 { 03609 /* -- LAPACK auxiliary routine (version 3.0) -- 03610 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 03611 Courant Institute, Argonne National Lab, and Rice University 03612 October 31, 1992 03613 03614 03615 Purpose 03616 ======= 03617 03618 SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary 03619 overflow. 03620 03621 Arguments 03622 ========= 03623 03624 X (input) REAL 03625 Y (input) REAL 03626 X and Y specify the values x and y. 03627 03628 ===================================================================== */ 03629 /* System generated locals */ 03630 real ret_val, r__1; 03631 /* Builtin functions */ 03632 // double sqrt(doublereal); 03633 /* Local variables */ 03634 static real xabs, yabs, w, z__; 03635 03636 03637 03638 xabs = dabs(*x); 03639 yabs = dabs(*y); 03640 w = df2cmax(xabs,yabs); 03641 z__ = df2cmin(xabs,yabs); 03642 if (z__ == 0.f) { 03643 ret_val = w; 03644 } else { 03645 /* Computing 2nd power */ 03646 r__1 = z__ / w; 03647 ret_val = w * sqrt(r__1 * r__1 + 1.f); 03648 } 03649 return ret_val; 03650 03651 /* End of SLAPY2 */ 03652 03653 } /* slapy2_ */
int slarf_ | ( | const char * | side, | |
integer * | m, | |||
integer * | n, | |||
real * | v, | |||
integer * | incv, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work | |||
) |
Definition at line 4362 of file lapackblas.cpp.
References c__1, integer, lsame_(), sgemv_(), and sger_().
Referenced by sgebd2_(), sgelq2_(), sgeqr2_(), sorg2l_(), sorg2r_(), sorgl2_(), sorm2r_(), and sorml2_().
04364 { 04365 /* -- LAPACK auxiliary routine (version 3.0) -- 04366 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 04367 Courant Institute, Argonne National Lab, and Rice University 04368 February 29, 1992 04369 04370 04371 Purpose 04372 ======= 04373 04374 SLARF applies a real elementary reflector H to a real m by n matrix 04375 C, from either the left or the right. H is represented in the form 04376 04377 H = I - tau * v * v' 04378 04379 where tau is a real scalar and v is a real vector. 04380 04381 If tau = 0, then H is taken to be the unit matrix. 04382 04383 Arguments 04384 ========= 04385 04386 SIDE (input) CHARACTER*1 04387 = 'L': form H * C 04388 = 'R': form C * H 04389 04390 M (input) INTEGER 04391 The number of rows of the matrix C. 04392 04393 N (input) INTEGER 04394 The number of columns of the matrix C. 04395 04396 V (input) REAL array, dimension 04397 (1 + (M-1)*abs(INCV)) if SIDE = 'L' 04398 or (1 + (N-1)*abs(INCV)) if SIDE = 'R' 04399 The vector v in the representation of H. V is not used if 04400 TAU = 0. 04401 04402 INCV (input) INTEGER 04403 The increment between elements of v. INCV <> 0. 04404 04405 TAU (input) REAL 04406 The value tau in the representation of H. 04407 04408 C (input/output) REAL array, dimension (LDC,N) 04409 On entry, the m by n matrix C. 04410 On exit, C is overwritten by the matrix H * C if SIDE = 'L', 04411 or C * H if SIDE = 'R'. 04412 04413 LDC (input) INTEGER 04414 The leading dimension of the array C. LDC >= f2cmax(1,M). 04415 04416 WORK (workspace) REAL array, dimension 04417 (N) if SIDE = 'L' 04418 or (M) if SIDE = 'R' 04419 04420 ===================================================================== 04421 04422 04423 Parameter adjustments */ 04424 /* Table of constant values */ 04425 static real c_b4 = 1.f; 04426 static real c_b5 = 0.f; 04427 static integer c__1 = 1; 04428 04429 /* System generated locals */ 04430 integer c_dim1, c_offset; 04431 real r__1; 04432 /* Local variables */ 04433 extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 04434 integer *, real *, integer *, real *, integer *); 04435 extern logical lsame_(const char *, const char *); 04436 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *, 04437 real *, integer *, real *, integer *, real *, real *, integer *); 04438 04439 04440 --v; 04441 c_dim1 = *ldc; 04442 c_offset = 1 + c_dim1 * 1; 04443 c__ -= c_offset; 04444 --work; 04445 04446 /* Function Body */ 04447 if (lsame_(side, "L")) { 04448 04449 /* Form H * C */ 04450 04451 if (*tau != 0.f) { 04452 04453 /* w := C' * v */ 04454 04455 sgemv_("Transpose", m, n, &c_b4, &c__[c_offset], ldc, &v[1], incv, 04456 &c_b5, &work[1], &c__1); 04457 04458 /* C := C - v * w' */ 04459 04460 r__1 = -(*tau); 04461 sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], 04462 ldc); 04463 } 04464 } else { 04465 04466 /* Form C * H */ 04467 04468 if (*tau != 0.f) { 04469 04470 /* w := C * v */ 04471 04472 sgemv_("No transpose", m, n, &c_b4, &c__[c_offset], ldc, &v[1], 04473 incv, &c_b5, &work[1], &c__1); 04474 04475 /* C := C - w * v' */ 04476 04477 r__1 = -(*tau); 04478 sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], 04479 ldc); 04480 } 04481 } 04482 return 0; 04483 04484 /* End of SLARF */ 04485 04486 } /* slarf_ */
int slarfb_ | ( | const char * | side, | |
const char * | trans, | |||
const char * | direct, | |||
const char * | storev, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | v, | |||
integer * | ldv, | |||
real * | t, | |||
integer * | ldt, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | ldwork | |||
) |
Definition at line 3658 of file lapackblas.cpp.
References c__1, c___ref, integer, lsame_(), scopy_(), sgemm_(), strmm_(), v_ref, and work_ref.
Referenced by sgelqf_(), sgeqrf_(), sorglq_(), sorgql_(), sorgqr_(), sormlq_(), and sormqr_().
03662 { 03663 /* -- LAPACK auxiliary routine (version 3.0) -- 03664 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 03665 Courant Institute, Argonne National Lab, and Rice University 03666 February 29, 1992 03667 03668 03669 Purpose 03670 ======= 03671 03672 SLARFB applies a real block reflector H or its transpose H' to a 03673 real m by n matrix C, from either the left or the right. 03674 03675 Arguments 03676 ========= 03677 03678 SIDE (input) CHARACTER*1 03679 = 'L': apply H or H' from the Left 03680 = 'R': apply H or H' from the Right 03681 03682 TRANS (input) CHARACTER*1 03683 = 'N': apply H (No transpose) 03684 = 'T': apply H' (Transpose) 03685 03686 DIRECT (input) CHARACTER*1 03687 Indicates how H is formed from a product of elementary 03688 reflectors 03689 = 'F': H = H(1) H(2) . . . H(k) (Forward) 03690 = 'B': H = H(k) . . . H(2) H(1) (Backward) 03691 03692 STOREV (input) CHARACTER*1 03693 Indicates how the vectors which define the elementary 03694 reflectors are stored: 03695 = 'C': Columnwise 03696 = 'R': Rowwise 03697 03698 M (input) INTEGER 03699 The number of rows of the matrix C. 03700 03701 N (input) INTEGER 03702 The number of columns of the matrix C. 03703 03704 K (input) INTEGER 03705 The order of the matrix T (= the number of elementary 03706 reflectors whose product defines the block reflector). 03707 03708 V (input) REAL array, dimension 03709 (LDV,K) if STOREV = 'C' 03710 (LDV,M) if STOREV = 'R' and SIDE = 'L' 03711 (LDV,N) if STOREV = 'R' and SIDE = 'R' 03712 The matrix V. See further details. 03713 03714 LDV (input) INTEGER 03715 The leading dimension of the array V. 03716 If STOREV = 'C' and SIDE = 'L', LDV >= f2cmax(1,M); 03717 if STOREV = 'C' and SIDE = 'R', LDV >= f2cmax(1,N); 03718 if STOREV = 'R', LDV >= K. 03719 03720 T (input) REAL array, dimension (LDT,K) 03721 The triangular k by k matrix T in the representation of the 03722 block reflector. 03723 03724 LDT (input) INTEGER 03725 The leading dimension of the array T. LDT >= K. 03726 03727 C (input/output) REAL array, dimension (LDC,N) 03728 On entry, the m by n matrix C. 03729 On exit, C is overwritten by H*C or H'*C or C*H or C*H'. 03730 03731 LDC (input) INTEGER 03732 The leading dimension of the array C. LDA >= f2cmax(1,M). 03733 03734 WORK (workspace) REAL array, dimension (LDWORK,K) 03735 03736 LDWORK (input) INTEGER 03737 The leading dimension of the array WORK. 03738 If SIDE = 'L', LDWORK >= f2cmax(1,N); 03739 if SIDE = 'R', LDWORK >= f2cmax(1,M). 03740 03741 ===================================================================== 03742 03743 03744 Quick return if possible 03745 03746 Parameter adjustments */ 03747 /* Table of constant values */ 03748 static integer c__1 = 1; 03749 static real c_b14 = 1.f; 03750 static real c_b25 = -1.f; 03751 03752 /* System generated locals */ 03753 integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, 03754 work_offset, i__1, i__2; 03755 /* Local variables */ 03756 static integer i__, j; 03757 extern logical lsame_(const char *, const char *); 03758 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 03759 integer *, real *, real *, integer *, real *, integer *, real *, 03760 real *, integer *), scopy_(integer *, real *, 03761 integer *, real *, integer *), strmm_(const char *, const char *, const char *, 03762 const char *, integer *, integer *, real *, real *, integer *, real *, 03763 integer *); 03764 static char transt[1]; 03765 #define work_ref(a_1,a_2) work[(a_2)*work_dim1 + a_1] 03766 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 03767 #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1] 03768 03769 03770 v_dim1 = *ldv; 03771 v_offset = 1 + v_dim1 * 1; 03772 v -= v_offset; 03773 t_dim1 = *ldt; 03774 t_offset = 1 + t_dim1 * 1; 03775 t -= t_offset; 03776 c_dim1 = *ldc; 03777 c_offset = 1 + c_dim1 * 1; 03778 c__ -= c_offset; 03779 work_dim1 = *ldwork; 03780 work_offset = 1 + work_dim1 * 1; 03781 work -= work_offset; 03782 03783 /* Function Body */ 03784 if (*m <= 0 || *n <= 0) { 03785 return 0; 03786 } 03787 03788 if (lsame_(trans, "N")) { 03789 *(unsigned char *)transt = 'T'; 03790 } else { 03791 *(unsigned char *)transt = 'N'; 03792 } 03793 03794 if (lsame_(storev, "C")) { 03795 03796 if (lsame_(direct, "F")) { 03797 03798 /* Let V = ( V1 ) (first K rows) 03799 ( V2 ) 03800 where V1 is unit lower triangular. */ 03801 03802 if (lsame_(side, "L")) { 03803 03804 /* Form H * C or H' * C where C = ( C1 ) 03805 ( C2 ) 03806 03807 W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) 03808 03809 W := C1' */ 03810 03811 i__1 = *k; 03812 for (j = 1; j <= i__1; ++j) { 03813 scopy_(n, &c___ref(j, 1), ldc, &work_ref(1, j), &c__1); 03814 /* L10: */ 03815 } 03816 03817 /* W := W * V1 */ 03818 03819 strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b14, 03820 &v[v_offset], ldv, &work[work_offset], ldwork); 03821 if (*m > *k) { 03822 03823 /* W := W + C2'*V2 */ 03824 03825 i__1 = *m - *k; 03826 sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b14, & 03827 c___ref(*k + 1, 1), ldc, &v_ref(*k + 1, 1), ldv, & 03828 c_b14, &work[work_offset], ldwork); 03829 } 03830 03831 /* W := W * T' or W * T */ 03832 03833 strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b14, &t[ 03834 t_offset], ldt, &work[work_offset], ldwork); 03835 03836 /* C := C - V * W' */ 03837 03838 if (*m > *k) { 03839 03840 /* C2 := C2 - V2 * W' */ 03841 03842 i__1 = *m - *k; 03843 sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b25, & 03844 v_ref(*k + 1, 1), ldv, &work[work_offset], ldwork, 03845 &c_b14, &c___ref(*k + 1, 1), ldc); 03846 } 03847 03848 /* W := W * V1' */ 03849 03850 strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b14, & 03851 v[v_offset], ldv, &work[work_offset], ldwork); 03852 03853 /* C1 := C1 - W' */ 03854 03855 i__1 = *k; 03856 for (j = 1; j <= i__1; ++j) { 03857 i__2 = *n; 03858 for (i__ = 1; i__ <= i__2; ++i__) { 03859 c___ref(j, i__) = c___ref(j, i__) - work_ref(i__, j); 03860 /* L20: */ 03861 } 03862 /* L30: */ 03863 } 03864 03865 } else if (lsame_(side, "R")) { 03866 03867 /* Form C * H or C * H' where C = ( C1 C2 ) 03868 03869 W := C * V = (C1*V1 + C2*V2) (stored in WORK) 03870 03871 W := C1 */ 03872 03873 i__1 = *k; 03874 for (j = 1; j <= i__1; ++j) { 03875 scopy_(m, &c___ref(1, j), &c__1, &work_ref(1, j), &c__1); 03876 /* L40: */ 03877 } 03878 03879 /* W := W * V1 */ 03880 03881 strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b14, 03882 &v[v_offset], ldv, &work[work_offset], ldwork); 03883 if (*n > *k) { 03884 03885 /* W := W + C2 * V2 */ 03886 03887 i__1 = *n - *k; 03888 sgemm_("No transpose", "No transpose", m, k, &i__1, & 03889 c_b14, &c___ref(1, *k + 1), ldc, &v_ref(*k + 1, 1) 03890 , ldv, &c_b14, &work[work_offset], ldwork); 03891 } 03892 03893 /* W := W * T or W * T' */ 03894 03895 strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b14, &t[ 03896 t_offset], ldt, &work[work_offset], ldwork); 03897 03898 /* C := C - W * V' */ 03899 03900 if (*n > *k) { 03901 03902 /* C2 := C2 - W * V2' */ 03903 03904 i__1 = *n - *k; 03905 sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b25, & 03906 work[work_offset], ldwork, &v_ref(*k + 1, 1), ldv, 03907 &c_b14, &c___ref(1, *k + 1), ldc); 03908 } 03909 03910 /* W := W * V1' */ 03911 03912 strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b14, & 03913 v[v_offset], ldv, &work[work_offset], ldwork); 03914 03915 /* C1 := C1 - W */ 03916 03917 i__1 = *k; 03918 for (j = 1; j <= i__1; ++j) { 03919 i__2 = *m; 03920 for (i__ = 1; i__ <= i__2; ++i__) { 03921 c___ref(i__, j) = c___ref(i__, j) - work_ref(i__, j); 03922 /* L50: */ 03923 } 03924 /* L60: */ 03925 } 03926 } 03927 03928 } else { 03929 03930 /* Let V = ( V1 ) 03931 ( V2 ) (last K rows) 03932 where V2 is unit upper triangular. */ 03933 03934 if (lsame_(side, "L")) { 03935 03936 /* Form H * C or H' * C where C = ( C1 ) 03937 ( C2 ) 03938 03939 W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) 03940 03941 W := C2' */ 03942 03943 i__1 = *k; 03944 for (j = 1; j <= i__1; ++j) { 03945 scopy_(n, &c___ref(*m - *k + j, 1), ldc, &work_ref(1, j), 03946 &c__1); 03947 /* L70: */ 03948 } 03949 03950 /* W := W * V2 */ 03951 03952 strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b14, 03953 &v_ref(*m - *k + 1, 1), ldv, &work[work_offset], 03954 ldwork); 03955 if (*m > *k) { 03956 03957 /* W := W + C1'*V1 */ 03958 03959 i__1 = *m - *k; 03960 sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b14, & 03961 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & 03962 work[work_offset], ldwork); 03963 } 03964 03965 /* W := W * T' or W * T */ 03966 03967 strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b14, &t[ 03968 t_offset], ldt, &work[work_offset], ldwork); 03969 03970 /* C := C - V * W' */ 03971 03972 if (*m > *k) { 03973 03974 /* C1 := C1 - V1 * W' */ 03975 03976 i__1 = *m - *k; 03977 sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b25, & 03978 v[v_offset], ldv, &work[work_offset], ldwork, & 03979 c_b14, &c__[c_offset], ldc) 03980 ; 03981 } 03982 03983 /* W := W * V2' */ 03984 03985 strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b14, & 03986 v_ref(*m - *k + 1, 1), ldv, &work[work_offset], 03987 ldwork); 03988 03989 /* C2 := C2 - W' */ 03990 03991 i__1 = *k; 03992 for (j = 1; j <= i__1; ++j) { 03993 i__2 = *n; 03994 for (i__ = 1; i__ <= i__2; ++i__) { 03995 c___ref(*m - *k + j, i__) = c___ref(*m - *k + j, i__) 03996 - work_ref(i__, j); 03997 /* L80: */ 03998 } 03999 /* L90: */ 04000 } 04001 04002 } else if (lsame_(side, "R")) { 04003 04004 /* Form C * H or C * H' where C = ( C1 C2 ) 04005 04006 W := C * V = (C1*V1 + C2*V2) (stored in WORK) 04007 04008 W := C2 */ 04009 04010 i__1 = *k; 04011 for (j = 1; j <= i__1; ++j) { 04012 scopy_(m, &c___ref(1, *n - *k + j), &c__1, &work_ref(1, j) 04013 , &c__1); 04014 /* L100: */ 04015 } 04016 04017 /* W := W * V2 */ 04018 04019 strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b14, 04020 &v_ref(*n - *k + 1, 1), ldv, &work[work_offset], 04021 ldwork); 04022 if (*n > *k) { 04023 04024 /* W := W + C1 * V1 */ 04025 04026 i__1 = *n - *k; 04027 sgemm_("No transpose", "No transpose", m, k, &i__1, & 04028 c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & 04029 c_b14, &work[work_offset], ldwork); 04030 } 04031 04032 /* W := W * T or W * T' */ 04033 04034 strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b14, &t[ 04035 t_offset], ldt, &work[work_offset], ldwork); 04036 04037 /* C := C - W * V' */ 04038 04039 if (*n > *k) { 04040 04041 /* C1 := C1 - W * V1' */ 04042 04043 i__1 = *n - *k; 04044 sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b25, & 04045 work[work_offset], ldwork, &v[v_offset], ldv, & 04046 c_b14, &c__[c_offset], ldc) 04047 ; 04048 } 04049 04050 /* W := W * V2' */ 04051 04052 strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b14, & 04053 v_ref(*n - *k + 1, 1), ldv, &work[work_offset], 04054 ldwork); 04055 04056 /* C2 := C2 - W */ 04057 04058 i__1 = *k; 04059 for (j = 1; j <= i__1; ++j) { 04060 i__2 = *m; 04061 for (i__ = 1; i__ <= i__2; ++i__) { 04062 c___ref(i__, *n - *k + j) = c___ref(i__, *n - *k + j) 04063 - work_ref(i__, j); 04064 /* L110: */ 04065 } 04066 /* L120: */ 04067 } 04068 } 04069 } 04070 04071 } else if (lsame_(storev, "R")) { 04072 04073 if (lsame_(direct, "F")) { 04074 04075 /* Let V = ( V1 V2 ) (V1: first K columns) 04076 where V1 is unit upper triangular. */ 04077 04078 if (lsame_(side, "L")) { 04079 04080 /* Form H * C or H' * C where C = ( C1 ) 04081 ( C2 ) 04082 04083 W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) 04084 04085 W := C1' */ 04086 04087 i__1 = *k; 04088 for (j = 1; j <= i__1; ++j) { 04089 scopy_(n, &c___ref(j, 1), ldc, &work_ref(1, j), &c__1); 04090 /* L130: */ 04091 } 04092 04093 /* W := W * V1' */ 04094 04095 strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b14, & 04096 v[v_offset], ldv, &work[work_offset], ldwork); 04097 if (*m > *k) { 04098 04099 /* W := W + C2'*V2' */ 04100 04101 i__1 = *m - *k; 04102 sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b14, & 04103 c___ref(*k + 1, 1), ldc, &v_ref(1, *k + 1), ldv, & 04104 c_b14, &work[work_offset], ldwork); 04105 } 04106 04107 /* W := W * T' or W * T */ 04108 04109 strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b14, &t[ 04110 t_offset], ldt, &work[work_offset], ldwork); 04111 04112 /* C := C - V' * W' */ 04113 04114 if (*m > *k) { 04115 04116 /* C2 := C2 - V2' * W' */ 04117 04118 i__1 = *m - *k; 04119 sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b25, & 04120 v_ref(1, *k + 1), ldv, &work[work_offset], ldwork, 04121 &c_b14, &c___ref(*k + 1, 1), ldc); 04122 } 04123 04124 /* W := W * V1 */ 04125 04126 strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b14, 04127 &v[v_offset], ldv, &work[work_offset], ldwork); 04128 04129 /* C1 := C1 - W' */ 04130 04131 i__1 = *k; 04132 for (j = 1; j <= i__1; ++j) { 04133 i__2 = *n; 04134 for (i__ = 1; i__ <= i__2; ++i__) { 04135 c___ref(j, i__) = c___ref(j, i__) - work_ref(i__, j); 04136 /* L140: */ 04137 } 04138 /* L150: */ 04139 } 04140 04141 } else if (lsame_(side, "R")) { 04142 04143 /* Form C * H or C * H' where C = ( C1 C2 ) 04144 04145 W := C * V' = (C1*V1' + C2*V2') (stored in WORK) 04146 04147 W := C1 */ 04148 04149 i__1 = *k; 04150 for (j = 1; j <= i__1; ++j) { 04151 scopy_(m, &c___ref(1, j), &c__1, &work_ref(1, j), &c__1); 04152 /* L160: */ 04153 } 04154 04155 /* W := W * V1' */ 04156 04157 strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b14, & 04158 v[v_offset], ldv, &work[work_offset], ldwork); 04159 if (*n > *k) { 04160 04161 /* W := W + C2 * V2' */ 04162 04163 i__1 = *n - *k; 04164 sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b14, & 04165 c___ref(1, *k + 1), ldc, &v_ref(1, *k + 1), ldv, & 04166 c_b14, &work[work_offset], ldwork); 04167 } 04168 04169 /* W := W * T or W * T' */ 04170 04171 strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b14, &t[ 04172 t_offset], ldt, &work[work_offset], ldwork); 04173 04174 /* C := C - W * V */ 04175 04176 if (*n > *k) { 04177 04178 /* C2 := C2 - W * V2 */ 04179 04180 i__1 = *n - *k; 04181 sgemm_("No transpose", "No transpose", m, &i__1, k, & 04182 c_b25, &work[work_offset], ldwork, &v_ref(1, *k + 04183 1), ldv, &c_b14, &c___ref(1, *k + 1), ldc); 04184 } 04185 04186 /* W := W * V1 */ 04187 04188 strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b14, 04189 &v[v_offset], ldv, &work[work_offset], ldwork); 04190 04191 /* C1 := C1 - W */ 04192 04193 i__1 = *k; 04194 for (j = 1; j <= i__1; ++j) { 04195 i__2 = *m; 04196 for (i__ = 1; i__ <= i__2; ++i__) { 04197 c___ref(i__, j) = c___ref(i__, j) - work_ref(i__, j); 04198 /* L170: */ 04199 } 04200 /* L180: */ 04201 } 04202 04203 } 04204 04205 } else { 04206 04207 /* Let V = ( V1 V2 ) (V2: last K columns) 04208 where V2 is unit lower triangular. */ 04209 04210 if (lsame_(side, "L")) { 04211 04212 /* Form H * C or H' * C where C = ( C1 ) 04213 ( C2 ) 04214 04215 W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) 04216 04217 W := C2' */ 04218 04219 i__1 = *k; 04220 for (j = 1; j <= i__1; ++j) { 04221 scopy_(n, &c___ref(*m - *k + j, 1), ldc, &work_ref(1, j), 04222 &c__1); 04223 /* L190: */ 04224 } 04225 04226 /* W := W * V2' */ 04227 04228 strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b14, & 04229 v_ref(1, *m - *k + 1), ldv, &work[work_offset], 04230 ldwork); 04231 if (*m > *k) { 04232 04233 /* W := W + C1'*V1' */ 04234 04235 i__1 = *m - *k; 04236 sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b14, & 04237 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & 04238 work[work_offset], ldwork); 04239 } 04240 04241 /* W := W * T' or W * T */ 04242 04243 strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b14, &t[ 04244 t_offset], ldt, &work[work_offset], ldwork); 04245 04246 /* C := C - V' * W' */ 04247 04248 if (*m > *k) { 04249 04250 /* C1 := C1 - V1' * W' */ 04251 04252 i__1 = *m - *k; 04253 sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b25, &v[ 04254 v_offset], ldv, &work[work_offset], ldwork, & 04255 c_b14, &c__[c_offset], ldc); 04256 } 04257 04258 /* W := W * V2 */ 04259 04260 strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b14, 04261 &v_ref(1, *m - *k + 1), ldv, &work[work_offset], 04262 ldwork); 04263 04264 /* C2 := C2 - W' */ 04265 04266 i__1 = *k; 04267 for (j = 1; j <= i__1; ++j) { 04268 i__2 = *n; 04269 for (i__ = 1; i__ <= i__2; ++i__) { 04270 c___ref(*m - *k + j, i__) = c___ref(*m - *k + j, i__) 04271 - work_ref(i__, j); 04272 /* L200: */ 04273 } 04274 /* L210: */ 04275 } 04276 04277 } else if (lsame_(side, "R")) { 04278 04279 /* Form C * H or C * H' where C = ( C1 C2 ) 04280 04281 W := C * V' = (C1*V1' + C2*V2') (stored in WORK) 04282 04283 W := C2 */ 04284 04285 i__1 = *k; 04286 for (j = 1; j <= i__1; ++j) { 04287 scopy_(m, &c___ref(1, *n - *k + j), &c__1, &work_ref(1, j) 04288 , &c__1); 04289 /* L220: */ 04290 } 04291 04292 /* W := W * V2' */ 04293 04294 strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b14, & 04295 v_ref(1, *n - *k + 1), ldv, &work[work_offset], 04296 ldwork); 04297 if (*n > *k) { 04298 04299 /* W := W + C1 * V1' */ 04300 04301 i__1 = *n - *k; 04302 sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b14, & 04303 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & 04304 work[work_offset], ldwork); 04305 } 04306 04307 /* W := W * T or W * T' */ 04308 04309 strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b14, &t[ 04310 t_offset], ldt, &work[work_offset], ldwork); 04311 04312 /* C := C - W * V */ 04313 04314 if (*n > *k) { 04315 04316 /* C1 := C1 - W * V1 */ 04317 04318 i__1 = *n - *k; 04319 sgemm_("No transpose", "No transpose", m, &i__1, k, & 04320 c_b25, &work[work_offset], ldwork, &v[v_offset], 04321 ldv, &c_b14, &c__[c_offset], ldc); 04322 } 04323 04324 /* W := W * V2 */ 04325 04326 strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b14, 04327 &v_ref(1, *n - *k + 1), ldv, &work[work_offset], 04328 ldwork); 04329 04330 /* C1 := C1 - W */ 04331 04332 i__1 = *k; 04333 for (j = 1; j <= i__1; ++j) { 04334 i__2 = *m; 04335 for (i__ = 1; i__ <= i__2; ++i__) { 04336 c___ref(i__, *n - *k + j) = c___ref(i__, *n - *k + j) 04337 - work_ref(i__, j); 04338 /* L230: */ 04339 } 04340 /* L240: */ 04341 } 04342 04343 } 04344 04345 } 04346 } 04347 04348 return 0; 04349 04350 /* End of SLARFB */ 04351 04352 } /* slarfb_ */
Definition at line 4491 of file lapackblas.cpp.
References dabs, integer, r_sign(), slamch_(), slapy2_(), snrm2_(), and sscal_().
Referenced by sgebd2_(), sgelq2_(), sgeqr2_(), slabrd_(), slatrd_(), and ssytd2_().
04493 { 04494 /* -- LAPACK auxiliary routine (version 3.0) -- 04495 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 04496 Courant Institute, Argonne National Lab, and Rice University 04497 September 30, 1994 04498 04499 04500 Purpose 04501 ======= 04502 04503 SLARFG generates a real elementary reflector H of order n, such 04504 that 04505 04506 H * ( alpha ) = ( beta ), H' * H = I. 04507 ( x ) ( 0 ) 04508 04509 where alpha and beta are scalars, and x is an (n-1)-element real 04510 vector. H is represented in the form 04511 04512 H = I - tau * ( 1 ) * ( 1 v' ) , 04513 ( v ) 04514 04515 where tau is a real scalar and v is a real (n-1)-element 04516 vector. 04517 04518 If the elements of x are all zero, then tau = 0 and H is taken to be 04519 the unit matrix. 04520 04521 Otherwise 1 <= tau <= 2. 04522 04523 Arguments 04524 ========= 04525 04526 N (input) INTEGER 04527 The order of the elementary reflector. 04528 04529 ALPHA (input/output) REAL 04530 On entry, the value alpha. 04531 On exit, it is overwritten with the value beta. 04532 04533 X (input/output) REAL array, dimension 04534 (1+(N-2)*abs(INCX)) 04535 On entry, the vector x. 04536 On exit, it is overwritten with the vector v. 04537 04538 INCX (input) INTEGER 04539 The increment between elements of X. INCX > 0. 04540 04541 TAU (output) REAL 04542 The value tau. 04543 04544 ===================================================================== 04545 04546 04547 Parameter adjustments */ 04548 /* System generated locals */ 04549 integer i__1; 04550 real r__1; 04551 /* Builtin functions */ 04552 double r_sign(real *, real *); 04553 /* Local variables */ 04554 static real beta; 04555 extern doublereal snrm2_(integer *, real *, integer *); 04556 static integer j; 04557 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 04558 static real xnorm; 04559 extern doublereal slapy2_(real *, real *), slamch_(const char *); 04560 static real safmin, rsafmn; 04561 static integer knt; 04562 04563 --x; 04564 04565 /* Function Body */ 04566 if (*n <= 1) { 04567 *tau = 0.f; 04568 return 0; 04569 } 04570 04571 i__1 = *n - 1; 04572 xnorm = snrm2_(&i__1, &x[1], incx); 04573 04574 if (xnorm == 0.f) { 04575 04576 /* H = I */ 04577 04578 *tau = 0.f; 04579 } else { 04580 04581 /* general case */ 04582 04583 r__1 = slapy2_(alpha, &xnorm); 04584 beta = -r_sign(&r__1, alpha); 04585 safmin = slamch_("S") / slamch_("E"); 04586 if (dabs(beta) < safmin) { 04587 04588 /* XNORM, BETA may be inaccurate; scale X and recompute them */ 04589 04590 rsafmn = 1.f / safmin; 04591 knt = 0; 04592 L10: 04593 ++knt; 04594 i__1 = *n - 1; 04595 sscal_(&i__1, &rsafmn, &x[1], incx); 04596 beta *= rsafmn; 04597 *alpha *= rsafmn; 04598 if (dabs(beta) < safmin) { 04599 goto L10; 04600 } 04601 04602 /* New BETA is at most 1, at least SAFMIN */ 04603 04604 i__1 = *n - 1; 04605 xnorm = snrm2_(&i__1, &x[1], incx); 04606 r__1 = slapy2_(alpha, &xnorm); 04607 beta = -r_sign(&r__1, alpha); 04608 *tau = (beta - *alpha) / beta; 04609 i__1 = *n - 1; 04610 r__1 = 1.f / (*alpha - beta); 04611 sscal_(&i__1, &r__1, &x[1], incx); 04612 04613 /* If ALPHA is subnormal, it may lose relative accuracy */ 04614 04615 *alpha = beta; 04616 i__1 = knt; 04617 for (j = 1; j <= i__1; ++j) { 04618 *alpha *= safmin; 04619 /* L20: */ 04620 } 04621 } else { 04622 *tau = (beta - *alpha) / beta; 04623 i__1 = *n - 1; 04624 r__1 = 1.f / (*alpha - beta); 04625 sscal_(&i__1, &r__1, &x[1], incx); 04626 *alpha = beta; 04627 } 04628 } 04629 04630 return 0; 04631 04632 /* End of SLARFG */ 04633 04634 } /* slarfg_ */
int slarft_ | ( | const char * | direct, | |
const char * | storev, | |||
integer * | n, | |||
integer * | k, | |||
real * | v, | |||
integer * | ldv, | |||
real * | tau, | |||
real * | t, | |||
integer * | ldt | |||
) |
Definition at line 4639 of file lapackblas.cpp.
References c__1, integer, lsame_(), sgemv_(), strmv_(), t_ref, and v_ref.
Referenced by sgelqf_(), sgeqrf_(), sorglq_(), sorgql_(), sorgqr_(), sormlq_(), and sormqr_().
04641 { 04642 /* -- LAPACK auxiliary routine (version 3.0) -- 04643 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 04644 Courant Institute, Argonne National Lab, and Rice University 04645 February 29, 1992 04646 04647 04648 Purpose 04649 ======= 04650 04651 SLARFT forms the triangular factor T of a real block reflector H 04652 of order n, which is defined as a product of k elementary reflectors. 04653 04654 If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; 04655 04656 If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. 04657 04658 If STOREV = 'C', the vector which defines the elementary reflector 04659 H(i) is stored in the i-th column of the array V, and 04660 04661 H = I - V * T * V' 04662 04663 If STOREV = 'R', the vector which defines the elementary reflector 04664 H(i) is stored in the i-th row of the array V, and 04665 04666 H = I - V' * T * V 04667 04668 Arguments 04669 ========= 04670 04671 DIRECT (input) CHARACTER*1 04672 Specifies the order in which the elementary reflectors are 04673 multiplied to form the block reflector: 04674 = 'F': H = H(1) H(2) . . . H(k) (Forward) 04675 = 'B': H = H(k) . . . H(2) H(1) (Backward) 04676 04677 STOREV (input) CHARACTER*1 04678 Specifies how the vectors which define the elementary 04679 reflectors are stored (see also Further Details): 04680 = 'C': columnwise 04681 = 'R': rowwise 04682 04683 N (input) INTEGER 04684 The order of the block reflector H. N >= 0. 04685 04686 K (input) INTEGER 04687 The order of the triangular factor T (= the number of 04688 elementary reflectors). K >= 1. 04689 04690 V (input/output) REAL array, dimension 04691 (LDV,K) if STOREV = 'C' 04692 (LDV,N) if STOREV = 'R' 04693 The matrix V. See further details. 04694 04695 LDV (input) INTEGER 04696 The leading dimension of the array V. 04697 If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. 04698 04699 TAU (input) REAL array, dimension (K) 04700 TAU(i) must contain the scalar factor of the elementary 04701 reflector H(i). 04702 04703 T (output) REAL array, dimension (LDT,K) 04704 The k by k triangular factor T of the block reflector. 04705 If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is 04706 lower triangular. The rest of the array is not used. 04707 04708 LDT (input) INTEGER 04709 The leading dimension of the array T. LDT >= K. 04710 04711 Further Details 04712 =============== 04713 04714 The shape of the matrix V and the storage of the vectors which define 04715 the H(i) is best illustrated by the following example with n = 5 and 04716 k = 3. The elements equal to 1 are not stored; the corresponding 04717 array elements are modified but restored on exit. The rest of the 04718 array is not used. 04719 04720 DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': 04721 04722 V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) 04723 ( v1 1 ) ( 1 v2 v2 v2 ) 04724 ( v1 v2 1 ) ( 1 v3 v3 ) 04725 ( v1 v2 v3 ) 04726 ( v1 v2 v3 ) 04727 04728 DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': 04729 04730 V = ( v1 v2 v3 ) V = ( v1 v1 1 ) 04731 ( v1 v2 v3 ) ( v2 v2 v2 1 ) 04732 ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) 04733 ( 1 v3 ) 04734 ( 1 ) 04735 04736 ===================================================================== 04737 04738 04739 Quick return if possible 04740 04741 Parameter adjustments */ 04742 /* Table of constant values */ 04743 static integer c__1 = 1; 04744 static real c_b8 = 0.f; 04745 04746 /* System generated locals */ 04747 integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; 04748 real r__1; 04749 /* Local variables */ 04750 static integer i__, j; 04751 extern logical lsame_(const char *, const char *); 04752 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *, 04753 real *, integer *, real *, integer *, real *, real *, integer *), strmv_(const char *, const char *, const char *, integer *, real *, 04754 integer *, real *, integer *); 04755 static real vii; 04756 #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] 04757 #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1] 04758 04759 04760 v_dim1 = *ldv; 04761 v_offset = 1 + v_dim1 * 1; 04762 v -= v_offset; 04763 --tau; 04764 t_dim1 = *ldt; 04765 t_offset = 1 + t_dim1 * 1; 04766 t -= t_offset; 04767 04768 /* Function Body */ 04769 if (*n == 0) { 04770 return 0; 04771 } 04772 04773 if (lsame_(direct, "F")) { 04774 i__1 = *k; 04775 for (i__ = 1; i__ <= i__1; ++i__) { 04776 if (tau[i__] == 0.f) { 04777 04778 /* H(i) = I */ 04779 04780 i__2 = i__; 04781 for (j = 1; j <= i__2; ++j) { 04782 t_ref(j, i__) = 0.f; 04783 /* L10: */ 04784 } 04785 } else { 04786 04787 /* general case */ 04788 04789 vii = v_ref(i__, i__); 04790 v_ref(i__, i__) = 1.f; 04791 if (lsame_(storev, "C")) { 04792 04793 /* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ 04794 04795 i__2 = *n - i__ + 1; 04796 i__3 = i__ - 1; 04797 r__1 = -tau[i__]; 04798 sgemv_("Transpose", &i__2, &i__3, &r__1, &v_ref(i__, 1), 04799 ldv, &v_ref(i__, i__), &c__1, &c_b8, &t_ref(1, 04800 i__), &c__1); 04801 } else { 04802 04803 /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ 04804 04805 i__2 = i__ - 1; 04806 i__3 = *n - i__ + 1; 04807 r__1 = -tau[i__]; 04808 sgemv_("No transpose", &i__2, &i__3, &r__1, &v_ref(1, i__) 04809 , ldv, &v_ref(i__, i__), ldv, &c_b8, &t_ref(1, 04810 i__), &c__1); 04811 } 04812 v_ref(i__, i__) = vii; 04813 04814 /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ 04815 04816 i__2 = i__ - 1; 04817 strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ 04818 t_offset], ldt, &t_ref(1, i__), &c__1); 04819 t_ref(i__, i__) = tau[i__]; 04820 } 04821 /* L20: */ 04822 } 04823 } else { 04824 for (i__ = *k; i__ >= 1; --i__) { 04825 if (tau[i__] == 0.f) { 04826 04827 /* H(i) = I */ 04828 04829 i__1 = *k; 04830 for (j = i__; j <= i__1; ++j) { 04831 t_ref(j, i__) = 0.f; 04832 /* L30: */ 04833 } 04834 } else { 04835 04836 /* general case */ 04837 04838 if (i__ < *k) { 04839 if (lsame_(storev, "C")) { 04840 vii = v_ref(*n - *k + i__, i__); 04841 v_ref(*n - *k + i__, i__) = 1.f; 04842 04843 /* T(i+1:k,i) := 04844 - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) */ 04845 04846 i__1 = *n - *k + i__; 04847 i__2 = *k - i__; 04848 r__1 = -tau[i__]; 04849 sgemv_("Transpose", &i__1, &i__2, &r__1, &v_ref(1, 04850 i__ + 1), ldv, &v_ref(1, i__), &c__1, &c_b8, & 04851 t_ref(i__ + 1, i__), &c__1); 04852 v_ref(*n - *k + i__, i__) = vii; 04853 } else { 04854 vii = v_ref(i__, *n - *k + i__); 04855 v_ref(i__, *n - *k + i__) = 1.f; 04856 04857 /* T(i+1:k,i) := 04858 - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' */ 04859 04860 i__1 = *k - i__; 04861 i__2 = *n - *k + i__; 04862 r__1 = -tau[i__]; 04863 sgemv_("No transpose", &i__1, &i__2, &r__1, &v_ref( 04864 i__ + 1, 1), ldv, &v_ref(i__, 1), ldv, &c_b8, 04865 &t_ref(i__ + 1, i__), &c__1); 04866 v_ref(i__, *n - *k + i__) = vii; 04867 } 04868 04869 /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ 04870 04871 i__1 = *k - i__; 04872 strmv_("Lower", "No transpose", "Non-unit", &i__1, &t_ref( 04873 i__ + 1, i__ + 1), ldt, &t_ref(i__ + 1, i__), & 04874 c__1); 04875 } 04876 t_ref(i__, i__) = tau[i__]; 04877 } 04878 /* L40: */ 04879 } 04880 } 04881 return 0; 04882 04883 /* End of SLARFT */ 04884 04885 } /* slarft_ */
int slartg_ | ( | real * | f, | |
real * | g, | |||
real * | cs, | |||
real * | sn, | |||
real * | r__ | |||
) |
Definition at line 4894 of file lapackblas.cpp.
References dabs, df2cmax, FALSE_, integer, log(), pow_ri(), slamch_(), sqrt(), and TRUE_.
Referenced by sbdsqr_(), and ssteqr_().
04895 { 04896 /* -- LAPACK auxiliary routine (version 3.0) -- 04897 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 04898 Courant Institute, Argonne National Lab, and Rice University 04899 September 30, 1994 04900 04901 04902 Purpose 04903 ======= 04904 04905 SLARTG generate a plane rotation so that 04906 04907 [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. 04908 [ -SN CS ] [ G ] [ 0 ] 04909 04910 This is a slower, more accurate version of the BLAS1 routine SROTG, 04911 with the following other differences: 04912 F and G are unchanged on return. 04913 If G=0, then CS=1 and SN=0. 04914 If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any 04915 floating point operations (saves work in SBDSQR when 04916 there are zeros on the diagonal). 04917 04918 If F exceeds G in magnitude, CS will be positive. 04919 04920 Arguments 04921 ========= 04922 04923 F (input) REAL 04924 The first component of vector to be rotated. 04925 04926 G (input) REAL 04927 The second component of vector to be rotated. 04928 04929 CS (output) REAL 04930 The cosine of the rotation. 04931 04932 SN (output) REAL 04933 The sine of the rotation. 04934 04935 R (output) REAL 04936 The nonzero component of the rotated vector. 04937 04938 ===================================================================== */ 04939 /* Initialized data */ 04940 static logical first = TRUE_; 04941 /* System generated locals */ 04942 integer i__1; 04943 real r__1, r__2; 04944 /* Builtin functions */ 04945 // double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal); 04946 double pow_ri(real *, integer *); 04947 /* Local variables */ 04948 static integer i__; 04949 static real scale; 04950 static integer count; 04951 static real f1, g1, safmn2, safmx2; 04952 extern doublereal slamch_(const char *); 04953 static real safmin, eps; 04954 04955 04956 04957 if (first) { 04958 first = FALSE_; 04959 safmin = slamch_("S"); 04960 eps = slamch_("E"); 04961 r__1 = slamch_("B"); 04962 i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 04963 2.f); 04964 safmn2 = pow_ri(&r__1, &i__1); 04965 safmx2 = 1.f / safmn2; 04966 } 04967 if (*g == 0.f) { 04968 *cs = 1.f; 04969 *sn = 0.f; 04970 *r__ = *f; 04971 } else if (*f == 0.f) { 04972 *cs = 0.f; 04973 *sn = 1.f; 04974 *r__ = *g; 04975 } else { 04976 f1 = *f; 04977 g1 = *g; 04978 /* Computing MAX */ 04979 r__1 = dabs(f1), r__2 = dabs(g1); 04980 scale = df2cmax(r__1,r__2); 04981 if (scale >= safmx2) { 04982 count = 0; 04983 L10: 04984 ++count; 04985 f1 *= safmn2; 04986 g1 *= safmn2; 04987 /* Computing MAX */ 04988 r__1 = dabs(f1), r__2 = dabs(g1); 04989 scale = df2cmax(r__1,r__2); 04990 if (scale >= safmx2) { 04991 goto L10; 04992 } 04993 /* Computing 2nd power */ 04994 r__1 = f1; 04995 /* Computing 2nd power */ 04996 r__2 = g1; 04997 *r__ = sqrt(r__1 * r__1 + r__2 * r__2); 04998 *cs = f1 / *r__; 04999 *sn = g1 / *r__; 05000 i__1 = count; 05001 for (i__ = 1; i__ <= i__1; ++i__) { 05002 *r__ *= safmx2; 05003 /* L20: */ 05004 } 05005 } else if (scale <= safmn2) { 05006 count = 0; 05007 L30: 05008 ++count; 05009 f1 *= safmx2; 05010 g1 *= safmx2; 05011 /* Computing MAX */ 05012 r__1 = dabs(f1), r__2 = dabs(g1); 05013 scale = df2cmax(r__1,r__2); 05014 if (scale <= safmn2) { 05015 goto L30; 05016 } 05017 /* Computing 2nd power */ 05018 r__1 = f1; 05019 /* Computing 2nd power */ 05020 r__2 = g1; 05021 *r__ = sqrt(r__1 * r__1 + r__2 * r__2); 05022 *cs = f1 / *r__; 05023 *sn = g1 / *r__; 05024 i__1 = count; 05025 for (i__ = 1; i__ <= i__1; ++i__) { 05026 *r__ *= safmn2; 05027 /* L40: */ 05028 } 05029 } else { 05030 /* Computing 2nd power */ 05031 r__1 = f1; 05032 /* Computing 2nd power */ 05033 r__2 = g1; 05034 *r__ = sqrt(r__1 * r__1 + r__2 * r__2); 05035 *cs = f1 / *r__; 05036 *sn = g1 / *r__; 05037 } 05038 if (dabs(*f) > dabs(*g) && *cs < 0.f) { 05039 *cs = -(*cs); 05040 *sn = -(*sn); 05041 *r__ = -(*r__); 05042 } 05043 } 05044 return 0; 05045 05046 /* End of SLARTG */ 05047 05048 } /* slartg_ */
int slas2_ | ( | real * | f, | |
real * | g, | |||
real * | h__, | |||
real * | ssmin, | |||
real * | ssmax | |||
) |
Definition at line 27334 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, and sqrt().
Referenced by sbdsqr_(), and slasq1_().
27336 { 27337 /* -- LAPACK auxiliary routine (version 3.0) -- 27338 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 27339 Courant Institute, Argonne National Lab, and Rice University 27340 September 30, 1994 27341 27342 27343 Purpose 27344 ======= 27345 27346 SLAS2 computes the singular values of the 2-by-2 matrix 27347 [ F G ] 27348 [ 0 H ]. 27349 On return, SSMIN is the smaller singular value and SSMAX is the 27350 larger singular value. 27351 27352 Arguments 27353 ========= 27354 27355 F (input) REAL 27356 The (1,1) element of the 2-by-2 matrix. 27357 27358 G (input) REAL 27359 The (1,2) element of the 2-by-2 matrix. 27360 27361 H (input) REAL 27362 The (2,2) element of the 2-by-2 matrix. 27363 27364 SSMIN (output) REAL 27365 The smaller singular value. 27366 27367 SSMAX (output) REAL 27368 The larger singular value. 27369 27370 Further Details 27371 =============== 27372 27373 Barring over/underflow, all output quantities are correct to within 27374 a few units in the last place (ulps), even in the absence of a guard 27375 digit in addition/subtraction. 27376 27377 In IEEE arithmetic, the code works correctly if one matrix element is 27378 infinite. 27379 27380 Overflow will not occur unless the largest singular value itself 27381 overflows, or is within a few ulps of overflow. (On machines with 27382 partial overflow, like the Cray, overflow may occur if the largest 27383 singular value is within a factor of 2 of overflow.) 27384 27385 Underflow is harmless if underflow is gradual. Otherwise, results 27386 may correspond to a matrix modified by perturbations of size near 27387 the underflow threshold. 27388 27389 ==================================================================== */ 27390 /* System generated locals */ 27391 real r__1, r__2; 27392 /* Builtin functions */ 27393 //double sqrt(doublereal); 27394 /* Local variables */ 27395 static real fhmn, fhmx, c__, fa, ga, ha, as, at, au; 27396 27397 27398 27399 fa = dabs(*f); 27400 ga = dabs(*g); 27401 ha = dabs(*h__); 27402 fhmn = df2cmin(fa,ha); 27403 fhmx = df2cmax(fa,ha); 27404 if (fhmn == 0.f) { 27405 *ssmin = 0.f; 27406 if (fhmx == 0.f) { 27407 *ssmax = ga; 27408 } else { 27409 /* Computing 2nd power */ 27410 r__1 = df2cmin(fhmx,ga) / df2cmax(fhmx,ga); 27411 *ssmax = df2cmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f); 27412 } 27413 } else { 27414 if (ga < fhmx) { 27415 as = fhmn / fhmx + 1.f; 27416 at = (fhmx - fhmn) / fhmx; 27417 /* Computing 2nd power */ 27418 r__1 = ga / fhmx; 27419 au = r__1 * r__1; 27420 c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au)); 27421 *ssmin = fhmn * c__; 27422 *ssmax = fhmx / c__; 27423 } else { 27424 au = fhmx / ga; 27425 if (au == 0.f) { 27426 27427 /* Avoid possible harmful underflow if exponent range 27428 asymmetric (true SSMIN may not underflow even if 27429 AU underflows) */ 27430 27431 *ssmin = fhmn * fhmx / ga; 27432 *ssmax = ga; 27433 } else { 27434 as = fhmn / fhmx + 1.f; 27435 at = (fhmx - fhmn) / fhmx; 27436 /* Computing 2nd power */ 27437 r__1 = as * au; 27438 /* Computing 2nd power */ 27439 r__2 = at * au; 27440 c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f) 27441 ); 27442 *ssmin = fhmn * c__ * au; 27443 *ssmin += *ssmin; 27444 *ssmax = ga / (c__ + c__); 27445 } 27446 } 27447 } 27448 return 0; 27449 27450 /* End of SLAS2 */ 27451 27452 } /* slas2_ */
int slascl_ | ( | const char * | type__, | |
integer * | kl, | |||
integer * | ku, | |||
real * | cfrom, | |||
real * | cto, | |||
integer * | m, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
integer * | info | |||
) |
Definition at line 5053 of file lapackblas.cpp.
References a_ref, dabs, f2cmax, f2cmin, FALSE_, integer, lsame_(), slamch_(), TRUE_, and xerbla_().
Referenced by sgesvd_(), slasq1_(), sstedc_(), ssteqr_(), ssterf_(), and ssyev_().
05056 { 05057 /* -- LAPACK auxiliary routine (version 3.0) -- 05058 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 05059 Courant Institute, Argonne National Lab, and Rice University 05060 February 29, 1992 05061 05062 05063 Purpose 05064 ======= 05065 05066 SLASCL multiplies the M by N real matrix A by the real scalar 05067 CTO/CFROM. This is done without over/underflow as long as the final 05068 result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that 05069 A may be full, upper triangular, lower triangular, upper Hessenberg, 05070 or banded. 05071 05072 Arguments 05073 ========= 05074 05075 TYPE (input) CHARACTER*1 05076 TYPE indices the storage type of the input matrix. 05077 = 'G': A is a full matrix. 05078 = 'L': A is a lower triangular matrix. 05079 = 'U': A is an upper triangular matrix. 05080 = 'H': A is an upper Hessenberg matrix. 05081 = 'B': A is a symmetric band matrix with lower bandwidth KL 05082 and upper bandwidth KU and with the only the lower 05083 half stored. 05084 = 'Q': A is a symmetric band matrix with lower bandwidth KL 05085 and upper bandwidth KU and with the only the upper 05086 half stored. 05087 = 'Z': A is a band matrix with lower bandwidth KL and upper 05088 bandwidth KU. 05089 05090 KL (input) INTEGER 05091 The lower bandwidth of A. Referenced only if TYPE = 'B', 05092 'Q' or 'Z'. 05093 05094 KU (input) INTEGER 05095 The upper bandwidth of A. Referenced only if TYPE = 'B', 05096 'Q' or 'Z'. 05097 05098 CFROM (input) REAL 05099 CTO (input) REAL 05100 The matrix A is multiplied by CTO/CFROM. A(I,J) is computed 05101 without over/underflow if the final result CTO*A(I,J)/CFROM 05102 can be represented without over/underflow. CFROM must be 05103 nonzero. 05104 05105 M (input) INTEGER 05106 The number of rows of the matrix A. M >= 0. 05107 05108 N (input) INTEGER 05109 The number of columns of the matrix A. N >= 0. 05110 05111 A (input/output) REAL array, dimension (LDA,M) 05112 The matrix to be multiplied by CTO/CFROM. See TYPE for the 05113 storage type. 05114 05115 LDA (input) INTEGER 05116 The leading dimension of the array A. LDA >= f2cmax(1,M). 05117 05118 INFO (output) INTEGER 05119 0 - successful exit 05120 <0 - if INFO = -i, the i-th argument had an illegal value. 05121 05122 ===================================================================== 05123 05124 05125 Test the input arguments 05126 05127 Parameter adjustments */ 05128 /* System generated locals */ 05129 integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; 05130 /* Local variables */ 05131 static logical done; 05132 static real ctoc; 05133 static integer i__, j; 05134 extern logical lsame_(const char *, const char *); 05135 static integer itype, k1, k2, k3, k4; 05136 static real cfrom1; 05137 extern doublereal slamch_(const char *); 05138 static real cfromc; 05139 extern /* Subroutine */ int xerbla_(const char *, integer *); 05140 static real bignum, smlnum, mul, cto1; 05141 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 05142 05143 a_dim1 = *lda; 05144 a_offset = 1 + a_dim1 * 1; 05145 a -= a_offset; 05146 05147 /* Function Body */ 05148 *info = 0; 05149 05150 if (lsame_(type__, "G")) { 05151 itype = 0; 05152 } else if (lsame_(type__, "L")) { 05153 itype = 1; 05154 } else if (lsame_(type__, "U")) { 05155 itype = 2; 05156 } else if (lsame_(type__, "H")) { 05157 itype = 3; 05158 } else if (lsame_(type__, "B")) { 05159 itype = 4; 05160 } else if (lsame_(type__, "Q")) { 05161 itype = 5; 05162 } else if (lsame_(type__, "Z")) { 05163 itype = 6; 05164 } else { 05165 itype = -1; 05166 } 05167 05168 if (itype == -1) { 05169 *info = -1; 05170 } else if (*cfrom == 0.f) { 05171 *info = -4; 05172 } else if (*m < 0) { 05173 *info = -6; 05174 } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { 05175 *info = -7; 05176 } else if (itype <= 3 && *lda < f2cmax(1,*m)) { 05177 *info = -9; 05178 } else if (itype >= 4) { 05179 /* Computing MAX */ 05180 i__1 = *m - 1; 05181 if (*kl < 0 || *kl > f2cmax(i__1,0)) { 05182 *info = -2; 05183 } else /* if(complicated condition) */ { 05184 /* Computing MAX */ 05185 i__1 = *n - 1; 05186 if (*ku < 0 || *ku > f2cmax(i__1,0) || (itype == 4 || itype == 5) && 05187 *kl != *ku) { 05188 *info = -3; 05189 } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * 05190 ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { 05191 *info = -9; 05192 } 05193 } 05194 } 05195 05196 if (*info != 0) { 05197 i__1 = -(*info); 05198 xerbla_("SLASCL", &i__1); 05199 return 0; 05200 } 05201 05202 /* Quick return if possible */ 05203 05204 if (*n == 0 || *m == 0) { 05205 return 0; 05206 } 05207 05208 /* Get machine parameters */ 05209 05210 smlnum = slamch_("S"); 05211 bignum = 1.f / smlnum; 05212 05213 cfromc = *cfrom; 05214 ctoc = *cto; 05215 05216 L10: 05217 cfrom1 = cfromc * smlnum; 05218 cto1 = ctoc / bignum; 05219 if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { 05220 mul = smlnum; 05221 done = FALSE_; 05222 cfromc = cfrom1; 05223 } else if (dabs(cto1) > dabs(cfromc)) { 05224 mul = bignum; 05225 done = FALSE_; 05226 ctoc = cto1; 05227 } else { 05228 mul = ctoc / cfromc; 05229 done = TRUE_; 05230 } 05231 05232 if (itype == 0) { 05233 05234 /* Full matrix */ 05235 05236 i__1 = *n; 05237 for (j = 1; j <= i__1; ++j) { 05238 i__2 = *m; 05239 for (i__ = 1; i__ <= i__2; ++i__) { 05240 a_ref(i__, j) = a_ref(i__, j) * mul; 05241 /* L20: */ 05242 } 05243 /* L30: */ 05244 } 05245 05246 } else if (itype == 1) { 05247 05248 /* Lower triangular matrix */ 05249 05250 i__1 = *n; 05251 for (j = 1; j <= i__1; ++j) { 05252 i__2 = *m; 05253 for (i__ = j; i__ <= i__2; ++i__) { 05254 a_ref(i__, j) = a_ref(i__, j) * mul; 05255 /* L40: */ 05256 } 05257 /* L50: */ 05258 } 05259 05260 } else if (itype == 2) { 05261 05262 /* Upper triangular matrix */ 05263 05264 i__1 = *n; 05265 for (j = 1; j <= i__1; ++j) { 05266 i__2 = f2cmin(j,*m); 05267 for (i__ = 1; i__ <= i__2; ++i__) { 05268 a_ref(i__, j) = a_ref(i__, j) * mul; 05269 /* L60: */ 05270 } 05271 /* L70: */ 05272 } 05273 05274 } else if (itype == 3) { 05275 05276 /* Upper Hessenberg matrix */ 05277 05278 i__1 = *n; 05279 for (j = 1; j <= i__1; ++j) { 05280 /* Computing MIN */ 05281 i__3 = j + 1; 05282 i__2 = f2cmin(i__3,*m); 05283 for (i__ = 1; i__ <= i__2; ++i__) { 05284 a_ref(i__, j) = a_ref(i__, j) * mul; 05285 /* L80: */ 05286 } 05287 /* L90: */ 05288 } 05289 05290 } else if (itype == 4) { 05291 05292 /* Lower half of a symmetric band matrix */ 05293 05294 k3 = *kl + 1; 05295 k4 = *n + 1; 05296 i__1 = *n; 05297 for (j = 1; j <= i__1; ++j) { 05298 /* Computing MIN */ 05299 i__3 = k3, i__4 = k4 - j; 05300 i__2 = f2cmin(i__3,i__4); 05301 for (i__ = 1; i__ <= i__2; ++i__) { 05302 a_ref(i__, j) = a_ref(i__, j) * mul; 05303 /* L100: */ 05304 } 05305 /* L110: */ 05306 } 05307 05308 } else if (itype == 5) { 05309 05310 /* Upper half of a symmetric band matrix */ 05311 05312 k1 = *ku + 2; 05313 k3 = *ku + 1; 05314 i__1 = *n; 05315 for (j = 1; j <= i__1; ++j) { 05316 /* Computing MAX */ 05317 i__2 = k1 - j; 05318 i__3 = k3; 05319 for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) { 05320 a_ref(i__, j) = a_ref(i__, j) * mul; 05321 /* L120: */ 05322 } 05323 /* L130: */ 05324 } 05325 05326 } else if (itype == 6) { 05327 05328 /* Band matrix */ 05329 05330 k1 = *kl + *ku + 2; 05331 k2 = *kl + 1; 05332 k3 = (*kl << 1) + *ku + 1; 05333 k4 = *kl + *ku + 1 + *m; 05334 i__1 = *n; 05335 for (j = 1; j <= i__1; ++j) { 05336 /* Computing MAX */ 05337 i__3 = k1 - j; 05338 /* Computing MIN */ 05339 i__4 = k3, i__5 = k4 - j; 05340 i__2 = f2cmin(i__4,i__5); 05341 for (i__ = f2cmax(i__3,k2); i__ <= i__2; ++i__) { 05342 a_ref(i__, j) = a_ref(i__, j) * mul; 05343 /* L140: */ 05344 } 05345 /* L150: */ 05346 } 05347 05348 } 05349 05350 if (! done) { 05351 goto L10; 05352 } 05353 05354 return 0; 05355 05356 /* End of SLASCL */ 05357 05358 } /* slascl_ */
int slaset_ | ( | const char * | uplo, | |
integer * | m, | |||
integer * | n, | |||
real * | alpha, | |||
real * | beta, | |||
real * | a, | |||
integer * | lda | |||
) |
Definition at line 5366 of file lapackblas.cpp.
References a_ref, f2cmin, integer, and lsame_().
Referenced by sgesvd_(), slaed3_(), sstedc_(), and ssteqr_().
05368 { 05369 /* -- LAPACK auxiliary routine (version 3.0) -- 05370 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 05371 Courant Institute, Argonne National Lab, and Rice University 05372 October 31, 1992 05373 05374 05375 Purpose 05376 ======= 05377 05378 SLASET initializes an m-by-n matrix A to BETA on the diagonal and 05379 ALPHA on the offdiagonals. 05380 05381 Arguments 05382 ========= 05383 05384 UPLO (input) CHARACTER*1 05385 Specifies the part of the matrix A to be set. 05386 = 'U': Upper triangular part is set; the strictly lower 05387 triangular part of A is not changed. 05388 = 'L': Lower triangular part is set; the strictly upper 05389 triangular part of A is not changed. 05390 Otherwise: All of the matrix A is set. 05391 05392 M (input) INTEGER 05393 The number of rows of the matrix A. M >= 0. 05394 05395 N (input) INTEGER 05396 The number of columns of the matrix A. N >= 0. 05397 05398 ALPHA (input) REAL 05399 The constant to which the offdiagonal elements are to be set. 05400 05401 BETA (input) REAL 05402 The constant to which the diagonal elements are to be set. 05403 05404 A (input/output) REAL array, dimension (LDA,N) 05405 On exit, the leading m-by-n submatrix of A is set as follows: 05406 05407 if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, 05408 if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, 05409 otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, 05410 05411 and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). 05412 05413 LDA (input) INTEGER 05414 The leading dimension of the array A. LDA >= f2cmax(1,M). 05415 05416 ===================================================================== 05417 05418 05419 Parameter adjustments */ 05420 /* System generated locals */ 05421 integer a_dim1, a_offset, i__1, i__2, i__3; 05422 /* Local variables */ 05423 static integer i__, j; 05424 extern logical lsame_(const char *, const char *); 05425 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 05426 05427 a_dim1 = *lda; 05428 a_offset = 1 + a_dim1 * 1; 05429 a -= a_offset; 05430 05431 /* Function Body */ 05432 if (lsame_(uplo, "U")) { 05433 05434 /* Set the strictly upper triangular or trapezoidal part of the 05435 array to ALPHA. */ 05436 05437 i__1 = *n; 05438 for (j = 2; j <= i__1; ++j) { 05439 /* Computing MIN */ 05440 i__3 = j - 1; 05441 i__2 = f2cmin(i__3,*m); 05442 for (i__ = 1; i__ <= i__2; ++i__) { 05443 a_ref(i__, j) = *alpha; 05444 /* L10: */ 05445 } 05446 /* L20: */ 05447 } 05448 05449 } else if (lsame_(uplo, "L")) { 05450 05451 /* Set the strictly lower triangular or trapezoidal part of the 05452 array to ALPHA. */ 05453 05454 i__1 = f2cmin(*m,*n); 05455 for (j = 1; j <= i__1; ++j) { 05456 i__2 = *m; 05457 for (i__ = j + 1; i__ <= i__2; ++i__) { 05458 a_ref(i__, j) = *alpha; 05459 /* L30: */ 05460 } 05461 /* L40: */ 05462 } 05463 05464 } else { 05465 05466 /* Set the leading m-by-n submatrix to ALPHA. */ 05467 05468 i__1 = *n; 05469 for (j = 1; j <= i__1; ++j) { 05470 i__2 = *m; 05471 for (i__ = 1; i__ <= i__2; ++i__) { 05472 a_ref(i__, j) = *alpha; 05473 /* L50: */ 05474 } 05475 /* L60: */ 05476 } 05477 } 05478 05479 /* Set the first f2cmin(M,N) diagonal elements to BETA. */ 05480 05481 i__1 = f2cmin(*m,*n); 05482 for (i__ = 1; i__ <= i__1; ++i__) { 05483 a_ref(i__, i__) = *beta; 05484 /* L70: */ 05485 } 05486 05487 return 0; 05488 05489 /* End of SLASET */ 05490 05491 } /* slaset_ */
Definition at line 25318 of file lapackblas.cpp.
References dabs, df2cmax, integer, scopy_(), slamch_(), slas2_(), slascl_(), slasq2_(), slasrt_(), sqrt(), and xerbla_().
Referenced by sbdsqr_().
25320 { 25321 /* System generated locals */ 25322 integer i__1, i__2; 25323 real r__1, r__2, r__3; 25324 25325 /* Builtin functions */ 25326 //double sqrt(doublereal); 25327 25328 /* Local variables */ 25329 extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *) 25330 ; 25331 static integer i__; 25332 static real scale; 25333 static integer iinfo; 25334 static real sigmn, sigmx; 25335 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 25336 integer *), slasq2_(integer *, real *, integer *); 25337 extern doublereal slamch_(const char *); 25338 static real safmin; 25339 extern /* Subroutine */ int xerbla_(const char *, integer *), slascl_( 25340 const char *, integer *, integer *, real *, real *, integer *, integer * 25341 , real *, integer *, integer *), slasrt_(const char *, integer * 25342 , real *, integer *); 25343 static real eps; 25344 25345 25346 /* -- LAPACK routine (version 3.0) -- 25347 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 25348 Courant Institute, Argonne National Lab, and Rice University 25349 October 31, 1999 25350 25351 25352 Purpose 25353 ======= 25354 25355 SLASQ1 computes the singular values of a real N-by-N bidiagonal 25356 matrix with diagonal D and off-diagonal E. The singular values 25357 are computed to high relative accuracy, in the absence of 25358 denormalization, underflow and overflow. The algorithm was first 25359 presented in 25360 25361 "Accurate singular values and differential qd algorithms" by K. V. 25362 Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, 25363 1994, 25364 25365 and the present implementation is described in "An implementation of 25366 the dqds Algorithm (Positive Case)", LAPACK Working Note. 25367 25368 Arguments 25369 ========= 25370 25371 N (input) INTEGER 25372 The number of rows and columns in the matrix. N >= 0. 25373 25374 D (input/output) REAL array, dimension (N) 25375 On entry, D contains the diagonal elements of the 25376 bidiagonal matrix whose SVD is desired. On normal exit, 25377 D contains the singular values in decreasing order. 25378 25379 E (input/output) REAL array, dimension (N) 25380 On entry, elements E(1:N-1) contain the off-diagonal elements 25381 of the bidiagonal matrix whose SVD is desired. 25382 On exit, E is overwritten. 25383 25384 WORK (workspace) REAL array, dimension (4*N) 25385 25386 INFO (output) INTEGER 25387 = 0: successful exit 25388 < 0: if INFO = -i, the i-th argument had an illegal value 25389 > 0: the algorithm failed 25390 = 1, a split was marked by a positive value in E 25391 = 2, current block of Z not diagonalized after 30*N 25392 iterations (in inner while loop) 25393 = 3, termination criterion of outer while loop not met 25394 (program created more than N unreduced blocks) 25395 25396 ===================================================================== 25397 25398 25399 Parameter adjustments */ 25400 --work; 25401 --e; 25402 --d__; 25403 25404 /* Function Body */ 25405 *info = 0; 25406 if (*n < 0) { 25407 *info = -2; 25408 i__1 = -(*info); 25409 xerbla_("SLASQ1", &i__1); 25410 return 0; 25411 } else if (*n == 0) { 25412 return 0; 25413 } else if (*n == 1) { 25414 d__[1] = dabs(d__[1]); 25415 return 0; 25416 } else if (*n == 2) { 25417 slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); 25418 d__[1] = sigmx; 25419 d__[2] = sigmn; 25420 return 0; 25421 } 25422 25423 /* Estimate the largest singular value. */ 25424 25425 sigmx = 0.f; 25426 i__1 = *n - 1; 25427 for (i__ = 1; i__ <= i__1; ++i__) { 25428 d__[i__] = (r__1 = d__[i__], dabs(r__1)); 25429 /* Computing MAX */ 25430 r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1)); 25431 sigmx = df2cmax(r__2,r__3); 25432 /* L10: */ 25433 } 25434 d__[*n] = (r__1 = d__[*n], dabs(r__1)); 25435 25436 /* Early return if SIGMX is zero (matrix is already diagonal). */ 25437 25438 if (sigmx == 0.f) { 25439 slasrt_("D", n, &d__[1], &iinfo); 25440 return 0; 25441 } 25442 25443 i__1 = *n; 25444 for (i__ = 1; i__ <= i__1; ++i__) { 25445 /* Computing MAX */ 25446 r__1 = sigmx, r__2 = d__[i__]; 25447 sigmx = df2cmax(r__1,r__2); 25448 /* L20: */ 25449 } 25450 25451 /* Copy D and E into WORK (in the Z format) and scale (squaring the 25452 input data makes scaling by a power of the radix pointless). */ 25453 25454 eps = slamch_("Precision"); 25455 safmin = slamch_("Safe minimum"); 25456 scale = sqrt(eps / safmin); 25457 scopy_(n, &d__[1], &c__1, &work[1], &c__2); 25458 i__1 = *n - 1; 25459 scopy_(&i__1, &e[1], &c__1, &work[2], &c__2); 25460 i__1 = (*n << 1) - 1; 25461 i__2 = (*n << 1) - 1; 25462 slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, 25463 &iinfo); 25464 25465 /* Compute the q's and e's. */ 25466 25467 i__1 = (*n << 1) - 1; 25468 for (i__ = 1; i__ <= i__1; ++i__) { 25469 /* Computing 2nd power */ 25470 r__1 = work[i__]; 25471 work[i__] = r__1 * r__1; 25472 /* L30: */ 25473 } 25474 work[*n * 2] = 0.f; 25475 25476 slasq2_(n, &work[1], info); 25477 25478 if (*info == 0) { 25479 i__1 = *n; 25480 for (i__ = 1; i__ <= i__1; ++i__) { 25481 d__[i__] = sqrt(work[i__]); 25482 /* L40: */ 25483 } 25484 slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & 25485 iinfo); 25486 } 25487 25488 return 0; 25489 25490 /* End of SLASQ1 */ 25491 25492 } /* slasq1_ */
Definition at line 25503 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, f2cmax, ilaenv_(), integer, slamch_(), slasq3_(), slasrt_(), sqrt(), t, and xerbla_().
Referenced by slasq1_().
25504 { 25505 /* System generated locals */ 25506 integer i__1, i__2, i__3; 25507 real r__1, r__2; 25508 25509 /* Builtin functions */ 25510 //double sqrt(doublereal); 25511 25512 /* Local variables */ 25513 static logical ieee; 25514 static integer nbig; 25515 static real dmin__, emin, emax; 25516 static integer ndiv, iter; 25517 static real qmin, temp, qmax, zmax; 25518 static integer splt; 25519 static real d__, e; 25520 static integer k; 25521 static real s, t; 25522 static integer nfail; 25523 static real desig, trace, sigma; 25524 static integer iinfo, i0, i4, n0; 25525 extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer 25526 *, real *, real *, real *, real *, integer *, integer *, integer * 25527 , logical *); 25528 static integer pp; 25529 extern doublereal slamch_(const char *); 25530 static integer iwhila, iwhilb; 25531 static real oldemn, safmin; 25532 extern /* Subroutine */ int xerbla_(const char *, integer *); 25533 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 25534 integer *, integer *, ftnlen, ftnlen); 25535 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *); 25536 static real eps, tol; 25537 static integer ipn4; 25538 static real tol2; 25539 25540 25541 /* -- LAPACK routine (version 3.0) -- 25542 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 25543 Courant Institute, Argonne National Lab, and Rice University 25544 October 31, 1999 25545 25546 25547 Purpose 25548 ======= 25549 25550 SLASQ2 computes all the eigenvalues of the symmetric positive 25551 definite tridiagonal matrix associated with the qd array Z to high 25552 relative accuracy are computed to high relative accuracy, in the 25553 absence of denormalization, underflow and overflow. 25554 25555 To see the relation of Z to the tridiagonal matrix, let L be a 25556 unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and 25557 let U be an upper bidiagonal matrix with 1's above and diagonal 25558 Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the 25559 symmetric tridiagonal to which it is similar. 25560 25561 Note : SLASQ2 defines a logical variable, IEEE, which is true 25562 on machines which follow ieee-754 floating-point standard in their 25563 handling of infinities and NaNs, and false otherwise. This variable 25564 is passed to SLASQ3. 25565 25566 Arguments 25567 ========= 25568 25569 N (input) INTEGER 25570 The number of rows and columns in the matrix. N >= 0. 25571 25572 Z (workspace) REAL array, dimension ( 4*N ) 25573 On entry Z holds the qd array. On exit, entries 1 to N hold 25574 the eigenvalues in decreasing order, Z( 2*N+1 ) holds the 25575 trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If 25576 N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) 25577 holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of 25578 shifts that failed. 25579 25580 INFO (output) INTEGER 25581 = 0: successful exit 25582 < 0: if the i-th argument is a scalar and had an illegal 25583 value, then INFO = -i, if the i-th argument is an 25584 array and the j-entry had an illegal value, then 25585 INFO = -(i*100+j) 25586 > 0: the algorithm failed 25587 = 1, a split was marked by a positive value in E 25588 = 2, current block of Z not diagonalized after 30*N 25589 iterations (in inner while loop) 25590 = 3, termination criterion of outer while loop not met 25591 (program created more than N unreduced blocks) 25592 25593 Further Details 25594 =============== 25595 Local Variables: I0:N0 defines a current unreduced segment of Z. 25596 The shifts are accumulated in SIGMA. Iteration count is in ITER. 25597 Ping-pong is controlled by PP (alternates between 0 and 1). 25598 25599 ===================================================================== 25600 25601 25602 Test the input arguments. 25603 (in case SLASQ2 is not called by SLASQ1) 25604 25605 Parameter adjustments */ 25606 --z__; 25607 25608 /* Function Body */ 25609 *info = 0; 25610 eps = slamch_("Precision"); 25611 safmin = slamch_("Safe minimum"); 25612 tol = eps * 100.f; 25613 /* Computing 2nd power */ 25614 r__1 = tol; 25615 tol2 = r__1 * r__1; 25616 25617 if (*n < 0) { 25618 *info = -1; 25619 xerbla_("SLASQ2", &c__1); 25620 return 0; 25621 } else if (*n == 0) { 25622 return 0; 25623 } else if (*n == 1) { 25624 25625 /* 1-by-1 case. */ 25626 25627 if (z__[1] < 0.f) { 25628 *info = -201; 25629 xerbla_("SLASQ2", &c__2); 25630 } 25631 return 0; 25632 } else if (*n == 2) { 25633 25634 /* 2-by-2 case. */ 25635 25636 if (z__[2] < 0.f || z__[3] < 0.f) { 25637 *info = -2; 25638 xerbla_("SLASQ2", &c__2); 25639 return 0; 25640 } else if (z__[3] > z__[1]) { 25641 d__ = z__[3]; 25642 z__[3] = z__[1]; 25643 z__[1] = d__; 25644 } 25645 z__[5] = z__[1] + z__[2] + z__[3]; 25646 if (z__[2] > z__[3] * tol2) { 25647 t = (z__[1] - z__[3] + z__[2]) * .5f; 25648 s = z__[3] * (z__[2] / t); 25649 if (s <= t) { 25650 s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f))); 25651 } else { 25652 s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); 25653 } 25654 t = z__[1] + (s + z__[2]); 25655 z__[3] *= z__[1] / t; 25656 z__[1] = t; 25657 } 25658 z__[2] = z__[3]; 25659 z__[6] = z__[2] + z__[1]; 25660 return 0; 25661 } 25662 25663 /* Check for negative data and compute sums of q's and e's. */ 25664 25665 z__[*n * 2] = 0.f; 25666 emin = z__[2]; 25667 qmax = 0.f; 25668 zmax = 0.f; 25669 d__ = 0.f; 25670 e = 0.f; 25671 25672 i__1 = *n - 1 << 1; 25673 for (k = 1; k <= i__1; k += 2) { 25674 if (z__[k] < 0.f) { 25675 *info = -(k + 200); 25676 xerbla_("SLASQ2", &c__2); 25677 return 0; 25678 } else if (z__[k + 1] < 0.f) { 25679 *info = -(k + 201); 25680 xerbla_("SLASQ2", &c__2); 25681 return 0; 25682 } 25683 d__ += z__[k]; 25684 e += z__[k + 1]; 25685 /* Computing MAX */ 25686 r__1 = qmax, r__2 = z__[k]; 25687 qmax = df2cmax(r__1,r__2); 25688 /* Computing MIN */ 25689 r__1 = emin, r__2 = z__[k + 1]; 25690 emin = df2cmin(r__1,r__2); 25691 /* Computing MAX */ 25692 r__1 = f2cmax(qmax,zmax), r__2 = z__[k + 1]; 25693 zmax = df2cmax(r__1,r__2); 25694 /* L10: */ 25695 } 25696 if (z__[(*n << 1) - 1] < 0.f) { 25697 *info = -((*n << 1) + 199); 25698 xerbla_("SLASQ2", &c__2); 25699 return 0; 25700 } 25701 d__ += z__[(*n << 1) - 1]; 25702 /* Computing MAX */ 25703 r__1 = qmax, r__2 = z__[(*n << 1) - 1]; 25704 qmax = df2cmax(r__1,r__2); 25705 zmax = df2cmax(qmax,zmax); 25706 25707 /* Check for diagonality. */ 25708 25709 if (e == 0.f) { 25710 i__1 = *n; 25711 for (k = 2; k <= i__1; ++k) { 25712 z__[k] = z__[(k << 1) - 1]; 25713 /* L20: */ 25714 } 25715 slasrt_("D", n, &z__[1], &iinfo); 25716 z__[(*n << 1) - 1] = d__; 25717 return 0; 25718 } 25719 25720 trace = d__ + e; 25721 25722 /* Check for zero data. */ 25723 25724 if (trace == 0.f) { 25725 z__[(*n << 1) - 1] = 0.f; 25726 return 0; 25727 } 25728 25729 /* Check whether the machine is IEEE conformable. */ 25730 25731 ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) 25732 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2, 25733 &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; 25734 25735 /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ 25736 25737 for (k = *n << 1; k >= 2; k += -2) { 25738 z__[k * 2] = 0.f; 25739 z__[(k << 1) - 1] = z__[k]; 25740 z__[(k << 1) - 2] = 0.f; 25741 z__[(k << 1) - 3] = z__[k - 1]; 25742 /* L30: */ 25743 } 25744 25745 i0 = 1; 25746 n0 = *n; 25747 25748 /* Reverse the qd-array, if warranted. */ 25749 25750 if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) { 25751 ipn4 = i0 + n0 << 2; 25752 i__1 = i0 + n0 - 1 << 1; 25753 for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { 25754 temp = z__[i4 - 3]; 25755 z__[i4 - 3] = z__[ipn4 - i4 - 3]; 25756 z__[ipn4 - i4 - 3] = temp; 25757 temp = z__[i4 - 1]; 25758 z__[i4 - 1] = z__[ipn4 - i4 - 5]; 25759 z__[ipn4 - i4 - 5] = temp; 25760 /* L40: */ 25761 } 25762 } 25763 25764 /* Initial split checking via dqd and Li's test. */ 25765 25766 pp = 0; 25767 25768 for (k = 1; k <= 2; ++k) { 25769 25770 d__ = z__[(n0 << 2) + pp - 3]; 25771 i__1 = (i0 << 2) + pp; 25772 for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { 25773 if (z__[i4 - 1] <= tol2 * d__) { 25774 z__[i4 - 1] = 0.f; 25775 d__ = z__[i4 - 3]; 25776 } else { 25777 d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); 25778 } 25779 /* L50: */ 25780 } 25781 25782 /* dqd maps Z to ZZ plus Li's test. */ 25783 25784 emin = z__[(i0 << 2) + pp + 1]; 25785 d__ = z__[(i0 << 2) + pp - 3]; 25786 i__1 = (n0 - 1 << 2) + pp; 25787 for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { 25788 z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; 25789 if (z__[i4 - 1] <= tol2 * d__) { 25790 z__[i4 - 1] = 0.f; 25791 z__[i4 - (pp << 1) - 2] = d__; 25792 z__[i4 - (pp << 1)] = 0.f; 25793 d__ = z__[i4 + 1]; 25794 } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && 25795 safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { 25796 temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; 25797 z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; 25798 d__ *= temp; 25799 } else { 25800 z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( 25801 pp << 1) - 2]); 25802 d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); 25803 } 25804 /* Computing MIN */ 25805 r__1 = emin, r__2 = z__[i4 - (pp << 1)]; 25806 emin = df2cmin(r__1,r__2); 25807 /* L60: */ 25808 } 25809 z__[(n0 << 2) - pp - 2] = d__; 25810 25811 /* Now find qmax. */ 25812 25813 qmax = z__[(i0 << 2) - pp - 2]; 25814 i__1 = (n0 << 2) - pp - 2; 25815 for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { 25816 /* Computing MAX */ 25817 r__1 = qmax, r__2 = z__[i4]; 25818 qmax = df2cmax(r__1,r__2); 25819 /* L70: */ 25820 } 25821 25822 /* Prepare for the next iteration on K. */ 25823 25824 pp = 1 - pp; 25825 /* L80: */ 25826 } 25827 25828 iter = 2; 25829 nfail = 0; 25830 ndiv = n0 - i0 << 1; 25831 25832 i__1 = *n + 1; 25833 for (iwhila = 1; iwhila <= i__1; ++iwhila) { 25834 if (n0 < 1) { 25835 goto L150; 25836 } 25837 25838 /* While array unfinished do 25839 25840 E(N0) holds the value of SIGMA when submatrix in I0:N0 25841 splits from the rest of the array, but is negated. */ 25842 25843 desig = 0.f; 25844 if (n0 == *n) { 25845 sigma = 0.f; 25846 } else { 25847 sigma = -z__[(n0 << 2) - 1]; 25848 } 25849 if (sigma < 0.f) { 25850 *info = 1; 25851 return 0; 25852 } 25853 25854 /* Find last unreduced submatrix's top index I0, find QMAX and 25855 EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ 25856 25857 emax = 0.f; 25858 if (n0 > i0) { 25859 emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1)); 25860 } else { 25861 emin = 0.f; 25862 } 25863 qmin = z__[(n0 << 2) - 3]; 25864 qmax = qmin; 25865 for (i4 = n0 << 2; i4 >= 8; i4 += -4) { 25866 if (z__[i4 - 5] <= 0.f) { 25867 goto L100; 25868 } 25869 if (qmin >= emax * 4.f) { 25870 /* Computing MIN */ 25871 r__1 = qmin, r__2 = z__[i4 - 3]; 25872 qmin = df2cmin(r__1,r__2); 25873 /* Computing MAX */ 25874 r__1 = emax, r__2 = z__[i4 - 5]; 25875 emax = df2cmax(r__1,r__2); 25876 } 25877 /* Computing MAX */ 25878 r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5]; 25879 qmax = df2cmax(r__1,r__2); 25880 /* Computing MIN */ 25881 r__1 = emin, r__2 = z__[i4 - 5]; 25882 emin = df2cmin(r__1,r__2); 25883 /* L90: */ 25884 } 25885 i4 = 4; 25886 25887 L100: 25888 i0 = i4 / 4; 25889 25890 /* Store EMIN for passing to SLASQ3. */ 25891 25892 z__[(n0 << 2) - 1] = emin; 25893 25894 /* Put -(initial shift) into DMIN. 25895 25896 Computing MAX */ 25897 r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax); 25898 dmin__ = -df2cmax(r__1,r__2); 25899 25900 /* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */ 25901 25902 pp = 0; 25903 25904 nbig = (n0 - i0 + 1) * 30; 25905 i__2 = nbig; 25906 for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { 25907 if (i0 > n0) { 25908 goto L130; 25909 } 25910 25911 /* While submatrix unfinished take a good dqds step. */ 25912 25913 slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & 25914 nfail, &iter, &ndiv, &ieee); 25915 25916 pp = 1 - pp; 25917 25918 /* When EMIN is very small check for splits. */ 25919 25920 if (pp == 0 && n0 - i0 >= 3) { 25921 if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * 25922 sigma) { 25923 splt = i0 - 1; 25924 qmax = z__[(i0 << 2) - 3]; 25925 emin = z__[(i0 << 2) - 1]; 25926 oldemn = z__[i0 * 4]; 25927 i__3 = n0 - 3 << 2; 25928 for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { 25929 if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= 25930 tol2 * sigma) { 25931 z__[i4 - 1] = -sigma; 25932 splt = i4 / 4; 25933 qmax = 0.f; 25934 emin = z__[i4 + 3]; 25935 oldemn = z__[i4 + 4]; 25936 } else { 25937 /* Computing MAX */ 25938 r__1 = qmax, r__2 = z__[i4 + 1]; 25939 qmax = df2cmax(r__1,r__2); 25940 /* Computing MIN */ 25941 r__1 = emin, r__2 = z__[i4 - 1]; 25942 emin = df2cmin(r__1,r__2); 25943 /* Computing MIN */ 25944 r__1 = oldemn, r__2 = z__[i4]; 25945 oldemn = df2cmin(r__1,r__2); 25946 } 25947 /* L110: */ 25948 } 25949 z__[(n0 << 2) - 1] = emin; 25950 z__[n0 * 4] = oldemn; 25951 i0 = splt + 1; 25952 } 25953 } 25954 25955 /* L120: */ 25956 } 25957 25958 *info = 2; 25959 return 0; 25960 25961 /* end IWHILB */ 25962 25963 L130: 25964 25965 /* L140: */ 25966 ; 25967 } 25968 25969 *info = 3; 25970 return 0; 25971 25972 /* end IWHILA */ 25973 25974 L150: 25975 25976 /* Move q's to the front. */ 25977 25978 i__1 = *n; 25979 for (k = 2; k <= i__1; ++k) { 25980 z__[k] = z__[(k << 2) - 3]; 25981 /* L160: */ 25982 } 25983 25984 /* Sort and compute sum of eigenvalues. */ 25985 25986 slasrt_("D", n, &z__[1], &iinfo); 25987 25988 e = 0.f; 25989 for (k = *n; k >= 1; --k) { 25990 e += z__[k]; 25991 /* L170: */ 25992 } 25993 25994 /* Store trace, sum(eigenvalues) and information on performance. */ 25995 25996 z__[(*n << 1) + 1] = trace; 25997 z__[(*n << 1) + 2] = e; 25998 z__[(*n << 1) + 3] = (real) iter; 25999 /* Computing 2nd power */ 26000 i__1 = *n; 26001 z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1); 26002 z__[(*n << 1) + 5] = nfail * 100.f / (real) iter; 26003 return 0; 26004 26005 /* End of SLASQ2 */ 26006 26007 } /* slasq2_ */
int slasq3_ | ( | integer * | i0, | |
integer * | n0, | |||
real * | z__, | |||
integer * | pp, | |||
real * | dmin__, | |||
real * | sigma, | |||
real * | desig, | |||
real * | qmax, | |||
integer * | nfail, | |||
integer * | iter, | |||
integer * | ndiv, | |||
logical * | ieee | |||
) |
Definition at line 26011 of file lapackblas.cpp.
References dabs, df2cmax, df2cmin, f2cmax, f2cmin, integer, nn(), slamch_(), slasq4_(), slasq5_(), slasq6_(), sqrt(), and t.
Referenced by slasq2_().
26014 { 26015 /* -- LAPACK auxiliary routine (version 3.0) -- 26016 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 26017 Courant Institute, Argonne National Lab, and Rice University 26018 May 17, 2000 26019 26020 26021 Purpose 26022 ======= 26023 26024 SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. 26025 In case of failure it changes shifts, and tries again until output 26026 is positive. 26027 26028 Arguments 26029 ========= 26030 26031 I0 (input) INTEGER 26032 First index. 26033 26034 N0 (input) INTEGER 26035 Last index. 26036 26037 Z (input) REAL array, dimension ( 4*N ) 26038 Z holds the qd array. 26039 26040 PP (input) INTEGER 26041 PP=0 for ping, PP=1 for pong. 26042 26043 DMIN (output) REAL 26044 Minimum value of d. 26045 26046 SIGMA (output) REAL 26047 Sum of shifts used in current segment. 26048 26049 DESIG (input/output) REAL 26050 Lower order part of SIGMA 26051 26052 QMAX (input) REAL 26053 Maximum value of q. 26054 26055 NFAIL (output) INTEGER 26056 Number of times shift was too big. 26057 26058 ITER (output) INTEGER 26059 Number of iterations. 26060 26061 NDIV (output) INTEGER 26062 Number of divisions. 26063 26064 TTYPE (output) INTEGER 26065 Shift type. 26066 26067 IEEE (input) LOGICAL 26068 Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). 26069 26070 ===================================================================== 26071 26072 Parameter adjustments */ 26073 /* Initialized data */ 26074 static integer ttype = 0; 26075 static real dmin1 = 0.f; 26076 static real dmin2 = 0.f; 26077 static real dn = 0.f; 26078 static real dn1 = 0.f; 26079 static real dn2 = 0.f; 26080 static real tau = 0.f; 26081 /* System generated locals */ 26082 integer i__1; 26083 real r__1, r__2; 26084 /* Builtin functions */ 26085 //double sqrt(doublereal); 26086 /* Local variables */ 26087 static real temp, s, t; 26088 static integer j4; 26089 extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer 26090 *, integer *, real *, real *, real *, real *, real *, real *, 26091 real *, integer *), slasq5_(integer *, integer *, real *, integer 26092 *, real *, real *, real *, real *, real *, real *, real *, 26093 logical *), slasq6_(integer *, integer *, real *, integer *, real 26094 *, real *, real *, real *, real *, real *); 26095 static integer nn; 26096 extern doublereal slamch_(const char *); 26097 static real safmin, eps, tol; 26098 static integer n0in, ipn4; 26099 static real tol2; 26100 26101 --z__; 26102 26103 /* Function Body */ 26104 26105 n0in = *n0; 26106 eps = slamch_("Precision"); 26107 safmin = slamch_("Safe minimum"); 26108 tol = eps * 100.f; 26109 /* Computing 2nd power */ 26110 r__1 = tol; 26111 tol2 = r__1 * r__1; 26112 26113 /* Check for deflation. */ 26114 26115 L10: 26116 26117 if (*n0 < *i0) { 26118 return 0; 26119 } 26120 if (*n0 == *i0) { 26121 goto L20; 26122 } 26123 nn = (*n0 << 2) + *pp; 26124 if (*n0 == *i0 + 1) { 26125 goto L40; 26126 } 26127 26128 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */ 26129 26130 if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 26131 4] > tol2 * z__[nn - 7]) { 26132 goto L30; 26133 } 26134 26135 L20: 26136 26137 z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; 26138 --(*n0); 26139 goto L10; 26140 26141 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */ 26142 26143 L30: 26144 26145 if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ 26146 nn - 11]) { 26147 goto L50; 26148 } 26149 26150 L40: 26151 26152 if (z__[nn - 3] > z__[nn - 7]) { 26153 s = z__[nn - 3]; 26154 z__[nn - 3] = z__[nn - 7]; 26155 z__[nn - 7] = s; 26156 } 26157 if (z__[nn - 5] > z__[nn - 3] * tol2) { 26158 t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f; 26159 s = z__[nn - 3] * (z__[nn - 5] / t); 26160 if (s <= t) { 26161 s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f))); 26162 } else { 26163 s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); 26164 } 26165 t = z__[nn - 7] + (s + z__[nn - 5]); 26166 z__[nn - 3] *= z__[nn - 7] / t; 26167 z__[nn - 7] = t; 26168 } 26169 z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; 26170 z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; 26171 *n0 += -2; 26172 goto L10; 26173 26174 L50: 26175 26176 /* Reverse the qd-array, if warranted. */ 26177 26178 if (*dmin__ <= 0.f || *n0 < n0in) { 26179 if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) { 26180 ipn4 = *i0 + *n0 << 2; 26181 i__1 = *i0 + *n0 - 1 << 1; 26182 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26183 temp = z__[j4 - 3]; 26184 z__[j4 - 3] = z__[ipn4 - j4 - 3]; 26185 z__[ipn4 - j4 - 3] = temp; 26186 temp = z__[j4 - 2]; 26187 z__[j4 - 2] = z__[ipn4 - j4 - 2]; 26188 z__[ipn4 - j4 - 2] = temp; 26189 temp = z__[j4 - 1]; 26190 z__[j4 - 1] = z__[ipn4 - j4 - 5]; 26191 z__[ipn4 - j4 - 5] = temp; 26192 temp = z__[j4]; 26193 z__[j4] = z__[ipn4 - j4 - 4]; 26194 z__[ipn4 - j4 - 4] = temp; 26195 /* L60: */ 26196 } 26197 if (*n0 - *i0 <= 4) { 26198 z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; 26199 z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; 26200 } 26201 /* Computing MIN */ 26202 r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1]; 26203 dmin2 = df2cmin(r__1,r__2); 26204 /* Computing MIN */ 26205 r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1] 26206 , r__1 = f2cmin(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3]; 26207 z__[(*n0 << 2) + *pp - 1] = df2cmin(r__1,r__2); 26208 /* Computing MIN */ 26209 r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 = 26210 f2cmin(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4]; 26211 z__[(*n0 << 2) - *pp] = df2cmin(r__1,r__2); 26212 /* Computing MAX */ 26213 r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = f2cmax(r__1, 26214 r__2), r__2 = z__[(*i0 << 2) + *pp + 1]; 26215 *qmax = df2cmax(r__1,r__2); 26216 *dmin__ = 0.f; 26217 } 26218 } 26219 26220 /* L70: 26221 26222 Computing MIN */ 26223 r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 = 26224 f2cmin(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp]; 26225 if (*dmin__ < 0.f || safmin * *qmax < df2cmin(r__1,r__2)) { 26226 26227 /* Choose a shift. */ 26228 26229 slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1, 26230 &dn2, &tau, &ttype); 26231 26232 /* Call dqds until DMIN > 0. */ 26233 26234 L80: 26235 26236 slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1, 26237 &dn2, ieee); 26238 26239 *ndiv += *n0 - *i0 + 2; 26240 ++(*iter); 26241 26242 /* Check status. */ 26243 26244 if (*dmin__ >= 0.f && dmin1 > 0.f) { 26245 26246 /* Success. */ 26247 26248 goto L100; 26249 26250 } else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] < 26251 tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) { 26252 26253 /* Convergence hidden by negative DN. */ 26254 26255 z__[(*n0 - 1 << 2) - *pp + 2] = 0.f; 26256 *dmin__ = 0.f; 26257 goto L100; 26258 } else if (*dmin__ < 0.f) { 26259 26260 /* TAU too big. Select new TAU and try again. */ 26261 26262 ++(*nfail); 26263 if (ttype < -22) { 26264 26265 /* Failed twice. Play it safe. */ 26266 26267 tau = 0.f; 26268 } else if (dmin1 > 0.f) { 26269 26270 /* Late failure. Gives excellent shift. */ 26271 26272 tau = (tau + *dmin__) * (1.f - eps * 2.f); 26273 ttype += -11; 26274 } else { 26275 26276 /* Early failure. Divide by 4. */ 26277 26278 tau *= .25f; 26279 ttype += -12; 26280 } 26281 goto L80; 26282 } else if (*dmin__ != *dmin__) { 26283 26284 /* NaN. */ 26285 26286 tau = 0.f; 26287 goto L80; 26288 } else { 26289 26290 /* Possible underflow. Play it safe. */ 26291 26292 goto L90; 26293 } 26294 } 26295 26296 /* Risk of underflow. */ 26297 26298 L90: 26299 slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2); 26300 *ndiv += *n0 - *i0 + 2; 26301 ++(*iter); 26302 tau = 0.f; 26303 26304 L100: 26305 if (tau < *sigma) { 26306 *desig += tau; 26307 t = *sigma + *desig; 26308 *desig -= t - *sigma; 26309 } else { 26310 t = *sigma + tau; 26311 *desig = *sigma - (t - tau) + *desig; 26312 } 26313 *sigma = t; 26314 26315 return 0; 26316 26317 /* End of SLASQ3 */ 26318 26319 } /* slasq3_ */
int slasq4_ | ( | integer * | i0, | |
integer * | n0, | |||
real * | z__, | |||
integer * | pp, | |||
integer * | n0in, | |||
real * | dmin__, | |||
real * | dmin1, | |||
real * | dmin2, | |||
real * | dn, | |||
real * | dn1, | |||
real * | dn2, | |||
real * | tau, | |||
integer * | ttype | |||
) |
Definition at line 26323 of file lapackblas.cpp.
References df2cmax, df2cmin, integer, nn(), and sqrt().
Referenced by slasq3_().
26326 { 26327 /* Initialized data */ 26328 26329 static real g = 0.f; 26330 26331 /* System generated locals */ 26332 integer i__1; 26333 real r__1, r__2; 26334 26335 /* Builtin functions */ 26336 //double sqrt(doublereal); 26337 26338 /* Local variables */ 26339 static real s, a2, b1, b2; 26340 static integer i4, nn, np; 26341 static real gam, gap1, gap2; 26342 26343 26344 /* -- LAPACK auxiliary routine (version 3.0) -- 26345 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 26346 Courant Institute, Argonne National Lab, and Rice University 26347 October 31, 1999 26348 26349 26350 Purpose 26351 ======= 26352 26353 SLASQ4 computes an approximation TAU to the smallest eigenvalue 26354 using values of d from the previous transform. 26355 26356 I0 (input) INTEGER 26357 First index. 26358 26359 N0 (input) INTEGER 26360 Last index. 26361 26362 Z (input) REAL array, dimension ( 4*N ) 26363 Z holds the qd array. 26364 26365 PP (input) INTEGER 26366 PP=0 for ping, PP=1 for pong. 26367 26368 NOIN (input) INTEGER 26369 The value of N0 at start of EIGTEST. 26370 26371 DMIN (input) REAL 26372 Minimum value of d. 26373 26374 DMIN1 (input) REAL 26375 Minimum value of d, excluding D( N0 ). 26376 26377 DMIN2 (input) REAL 26378 Minimum value of d, excluding D( N0 ) and D( N0-1 ). 26379 26380 DN (input) REAL 26381 d(N) 26382 26383 DN1 (input) REAL 26384 d(N-1) 26385 26386 DN2 (input) REAL 26387 d(N-2) 26388 26389 TAU (output) REAL 26390 This is the shift. 26391 26392 TTYPE (output) INTEGER 26393 Shift type. 26394 26395 Further Details 26396 =============== 26397 CNST1 = 9/16 26398 26399 ===================================================================== 26400 26401 Parameter adjustments */ 26402 --z__; 26403 26404 /* Function Body 26405 26406 A negative DMIN forces the shift to take that absolute value 26407 TTYPE records the type of shift. */ 26408 26409 if (*dmin__ <= 0.f) { 26410 *tau = -(*dmin__); 26411 *ttype = -1; 26412 return 0; 26413 } 26414 26415 nn = (*n0 << 2) + *pp; 26416 if (*n0in == *n0) { 26417 26418 /* No eigenvalues deflated. */ 26419 26420 if (*dmin__ == *dn || *dmin__ == *dn1) { 26421 26422 b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); 26423 b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); 26424 a2 = z__[nn - 7] + z__[nn - 5]; 26425 26426 /* Cases 2 and 3. */ 26427 26428 if (*dmin__ == *dn && *dmin1 == *dn1) { 26429 gap2 = *dmin2 - a2 - *dmin2 * .25f; 26430 if (gap2 > 0.f && gap2 > b2) { 26431 gap1 = a2 - *dn - b2 / gap2 * b2; 26432 } else { 26433 gap1 = a2 - *dn - (b1 + b2); 26434 } 26435 if (gap1 > 0.f && gap1 > b1) { 26436 /* Computing MAX */ 26437 r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f; 26438 s = df2cmax(r__1,r__2); 26439 *ttype = -2; 26440 } else { 26441 s = 0.f; 26442 if (*dn > b1) { 26443 s = *dn - b1; 26444 } 26445 if (a2 > b1 + b2) { 26446 /* Computing MIN */ 26447 r__1 = s, r__2 = a2 - (b1 + b2); 26448 s = df2cmin(r__1,r__2); 26449 } 26450 /* Computing MAX */ 26451 r__1 = s, r__2 = *dmin__ * .333f; 26452 s = df2cmax(r__1,r__2); 26453 *ttype = -3; 26454 } 26455 } else { 26456 26457 /* Case 4. */ 26458 26459 *ttype = -4; 26460 s = *dmin__ * .25f; 26461 if (*dmin__ == *dn) { 26462 gam = *dn; 26463 a2 = 0.f; 26464 if (z__[nn - 5] > z__[nn - 7]) { 26465 return 0; 26466 } 26467 b2 = z__[nn - 5] / z__[nn - 7]; 26468 np = nn - 9; 26469 } else { 26470 np = nn - (*pp << 1); 26471 b2 = z__[np - 2]; 26472 gam = *dn1; 26473 if (z__[np - 4] > z__[np - 2]) { 26474 return 0; 26475 } 26476 a2 = z__[np - 4] / z__[np - 2]; 26477 if (z__[nn - 9] > z__[nn - 11]) { 26478 return 0; 26479 } 26480 b2 = z__[nn - 9] / z__[nn - 11]; 26481 np = nn - 13; 26482 } 26483 26484 /* Approximate contribution to norm squared from I < NN-1. */ 26485 26486 a2 += b2; 26487 i__1 = (*i0 << 2) - 1 + *pp; 26488 for (i4 = np; i4 >= i__1; i4 += -4) { 26489 if (b2 == 0.f) { 26490 goto L20; 26491 } 26492 b1 = b2; 26493 if (z__[i4] > z__[i4 - 2]) { 26494 return 0; 26495 } 26496 b2 *= z__[i4] / z__[i4 - 2]; 26497 a2 += b2; 26498 if (df2cmax(b2,b1) * 100.f < a2 || .563f < a2) { 26499 goto L20; 26500 } 26501 /* L10: */ 26502 } 26503 L20: 26504 a2 *= 1.05f; 26505 26506 /* Rayleigh quotient residual bound. */ 26507 26508 if (a2 < .563f) { 26509 s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); 26510 } 26511 } 26512 } else if (*dmin__ == *dn2) { 26513 26514 /* Case 5. */ 26515 26516 *ttype = -5; 26517 s = *dmin__ * .25f; 26518 26519 /* Compute contribution to norm squared from I > NN-2. */ 26520 26521 np = nn - (*pp << 1); 26522 b1 = z__[np - 2]; 26523 b2 = z__[np - 6]; 26524 gam = *dn2; 26525 if (z__[np - 8] > b2 || z__[np - 4] > b1) { 26526 return 0; 26527 } 26528 a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f); 26529 26530 /* Approximate contribution to norm squared from I < NN-2. */ 26531 26532 if (*n0 - *i0 > 2) { 26533 b2 = z__[nn - 13] / z__[nn - 15]; 26534 a2 += b2; 26535 i__1 = (*i0 << 2) - 1 + *pp; 26536 for (i4 = nn - 17; i4 >= i__1; i4 += -4) { 26537 if (b2 == 0.f) { 26538 goto L40; 26539 } 26540 b1 = b2; 26541 if (z__[i4] > z__[i4 - 2]) { 26542 return 0; 26543 } 26544 b2 *= z__[i4] / z__[i4 - 2]; 26545 a2 += b2; 26546 if (df2cmax(b2,b1) * 100.f < a2 || .563f < a2) { 26547 goto L40; 26548 } 26549 /* L30: */ 26550 } 26551 L40: 26552 a2 *= 1.05f; 26553 } 26554 26555 if (a2 < .563f) { 26556 s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); 26557 } 26558 } else { 26559 26560 /* Case 6, no information to guide us. */ 26561 26562 if (*ttype == -6) { 26563 g += (1.f - g) * .333f; 26564 } else if (*ttype == -18) { 26565 g = .083250000000000005f; 26566 } else { 26567 g = .25f; 26568 } 26569 s = g * *dmin__; 26570 *ttype = -6; 26571 } 26572 26573 } else if (*n0in == *n0 + 1) { 26574 26575 /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ 26576 26577 if (*dmin1 == *dn1 && *dmin2 == *dn2) { 26578 26579 /* Cases 7 and 8. */ 26580 26581 *ttype = -7; 26582 s = *dmin1 * .333f; 26583 if (z__[nn - 5] > z__[nn - 7]) { 26584 return 0; 26585 } 26586 b1 = z__[nn - 5] / z__[nn - 7]; 26587 b2 = b1; 26588 if (b2 == 0.f) { 26589 goto L60; 26590 } 26591 i__1 = (*i0 << 2) - 1 + *pp; 26592 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { 26593 a2 = b1; 26594 if (z__[i4] > z__[i4 - 2]) { 26595 return 0; 26596 } 26597 b1 *= z__[i4] / z__[i4 - 2]; 26598 b2 += b1; 26599 if (df2cmax(b1,a2) * 100.f < b2) { 26600 goto L60; 26601 } 26602 /* L50: */ 26603 } 26604 L60: 26605 b2 = sqrt(b2 * 1.05f); 26606 /* Computing 2nd power */ 26607 r__1 = b2; 26608 a2 = *dmin1 / (r__1 * r__1 + 1.f); 26609 gap2 = *dmin2 * .5f - a2; 26610 if (gap2 > 0.f && gap2 > b2 * a2) { 26611 /* Computing MAX */ 26612 r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); 26613 s = df2cmax(r__1,r__2); 26614 } else { 26615 /* Computing MAX */ 26616 r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); 26617 s = df2cmax(r__1,r__2); 26618 *ttype = -8; 26619 } 26620 } else { 26621 26622 /* Case 9. */ 26623 26624 s = *dmin1 * .25f; 26625 if (*dmin1 == *dn1) { 26626 s = *dmin1 * .5f; 26627 } 26628 *ttype = -9; 26629 } 26630 26631 } else if (*n0in == *n0 + 2) { 26632 26633 /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. 26634 26635 Cases 10 and 11. */ 26636 26637 if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) { 26638 *ttype = -10; 26639 s = *dmin2 * .333f; 26640 if (z__[nn - 5] > z__[nn - 7]) { 26641 return 0; 26642 } 26643 b1 = z__[nn - 5] / z__[nn - 7]; 26644 b2 = b1; 26645 if (b2 == 0.f) { 26646 goto L80; 26647 } 26648 i__1 = (*i0 << 2) - 1 + *pp; 26649 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { 26650 if (z__[i4] > z__[i4 - 2]) { 26651 return 0; 26652 } 26653 b1 *= z__[i4] / z__[i4 - 2]; 26654 b2 += b1; 26655 if (b1 * 100.f < b2) { 26656 goto L80; 26657 } 26658 /* L70: */ 26659 } 26660 L80: 26661 b2 = sqrt(b2 * 1.05f); 26662 /* Computing 2nd power */ 26663 r__1 = b2; 26664 a2 = *dmin2 / (r__1 * r__1 + 1.f); 26665 gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ 26666 nn - 9]) - a2; 26667 if (gap2 > 0.f && gap2 > b2 * a2) { 26668 /* Computing MAX */ 26669 r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); 26670 s = df2cmax(r__1,r__2); 26671 } else { 26672 /* Computing MAX */ 26673 r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); 26674 s = df2cmax(r__1,r__2); 26675 } 26676 } else { 26677 s = *dmin2 * .25f; 26678 *ttype = -11; 26679 } 26680 } else if (*n0in > *n0 + 2) { 26681 26682 /* Case 12, more than two eigenvalues deflated. No information. */ 26683 26684 s = 0.f; 26685 *ttype = -12; 26686 } 26687 26688 *tau = s; 26689 return 0; 26690 26691 /* End of SLASQ4 */ 26692 26693 } /* slasq4_ */
int slasq5_ | ( | integer * | i0, | |
integer * | n0, | |||
real * | z__, | |||
integer * | pp, | |||
real * | tau, | |||
real * | dmin__, | |||
real * | dmin1, | |||
real * | dmin2, | |||
real * | dn, | |||
real * | dnm1, | |||
real * | dnm2, | |||
logical * | ieee | |||
) |
Definition at line 26697 of file lapackblas.cpp.
References df2cmin, and integer.
Referenced by slasq3_().
26700 { 26701 /* -- LAPACK auxiliary routine (version 3.0) -- 26702 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 26703 Courant Institute, Argonne National Lab, and Rice University 26704 May 17, 2000 26705 26706 26707 Purpose 26708 ======= 26709 26710 SLASQ5 computes one dqds transform in ping-pong form, one 26711 version for IEEE machines another for non IEEE machines. 26712 26713 Arguments 26714 ========= 26715 26716 I0 (input) INTEGER 26717 First index. 26718 26719 N0 (input) INTEGER 26720 Last index. 26721 26722 Z (input) REAL array, dimension ( 4*N ) 26723 Z holds the qd array. EMIN is stored in Z(4*N0) to avoid 26724 an extra argument. 26725 26726 PP (input) INTEGER 26727 PP=0 for ping, PP=1 for pong. 26728 26729 TAU (input) REAL 26730 This is the shift. 26731 26732 DMIN (output) REAL 26733 Minimum value of d. 26734 26735 DMIN1 (output) REAL 26736 Minimum value of d, excluding D( N0 ). 26737 26738 DMIN2 (output) REAL 26739 Minimum value of d, excluding D( N0 ) and D( N0-1 ). 26740 26741 DN (output) REAL 26742 d(N0), the last value of d. 26743 26744 DNM1 (output) REAL 26745 d(N0-1). 26746 26747 DNM2 (output) REAL 26748 d(N0-2). 26749 26750 IEEE (input) LOGICAL 26751 Flag for IEEE or non IEEE arithmetic. 26752 26753 ===================================================================== 26754 26755 26756 Parameter adjustments */ 26757 /* System generated locals */ 26758 integer i__1; 26759 real r__1, r__2; 26760 /* Local variables */ 26761 static real emin, temp, d__; 26762 static integer j4, j4p2; 26763 26764 --z__; 26765 26766 /* Function Body */ 26767 if (*n0 - *i0 - 1 <= 0) { 26768 return 0; 26769 } 26770 26771 j4 = (*i0 << 2) + *pp - 3; 26772 emin = z__[j4 + 4]; 26773 d__ = z__[j4] - *tau; 26774 *dmin__ = d__; 26775 *dmin1 = -z__[j4]; 26776 26777 if (*ieee) { 26778 26779 /* Code for IEEE arithmetic. */ 26780 26781 if (*pp == 0) { 26782 i__1 = *n0 - 3 << 2; 26783 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26784 z__[j4 - 2] = d__ + z__[j4 - 1]; 26785 temp = z__[j4 + 1] / z__[j4 - 2]; 26786 d__ = d__ * temp - *tau; 26787 *dmin__ = df2cmin(*dmin__,d__); 26788 z__[j4] = z__[j4 - 1] * temp; 26789 /* Computing MIN */ 26790 r__1 = z__[j4]; 26791 emin = df2cmin(r__1,emin); 26792 /* L10: */ 26793 } 26794 } else { 26795 i__1 = *n0 - 3 << 2; 26796 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26797 z__[j4 - 3] = d__ + z__[j4]; 26798 temp = z__[j4 + 2] / z__[j4 - 3]; 26799 d__ = d__ * temp - *tau; 26800 *dmin__ = df2cmin(*dmin__,d__); 26801 z__[j4 - 1] = z__[j4] * temp; 26802 /* Computing MIN */ 26803 r__1 = z__[j4 - 1]; 26804 emin = df2cmin(r__1,emin); 26805 /* L20: */ 26806 } 26807 } 26808 26809 /* Unroll last two steps. */ 26810 26811 *dnm2 = d__; 26812 *dmin2 = *dmin__; 26813 j4 = (*n0 - 2 << 2) - *pp; 26814 j4p2 = j4 + (*pp << 1) - 1; 26815 z__[j4 - 2] = *dnm2 + z__[j4p2]; 26816 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 26817 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; 26818 *dmin__ = df2cmin(*dmin__,*dnm1); 26819 26820 *dmin1 = *dmin__; 26821 j4 += 4; 26822 j4p2 = j4 + (*pp << 1) - 1; 26823 z__[j4 - 2] = *dnm1 + z__[j4p2]; 26824 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 26825 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; 26826 *dmin__ = df2cmin(*dmin__,*dn); 26827 26828 } else { 26829 26830 /* Code for non IEEE arithmetic. */ 26831 26832 if (*pp == 0) { 26833 i__1 = *n0 - 3 << 2; 26834 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26835 z__[j4 - 2] = d__ + z__[j4 - 1]; 26836 if (d__ < 0.f) { 26837 return 0; 26838 } else { 26839 z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); 26840 d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; 26841 } 26842 *dmin__ = df2cmin(*dmin__,d__); 26843 /* Computing MIN */ 26844 r__1 = emin, r__2 = z__[j4]; 26845 emin = df2cmin(r__1,r__2); 26846 /* L30: */ 26847 } 26848 } else { 26849 i__1 = *n0 - 3 << 2; 26850 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26851 z__[j4 - 3] = d__ + z__[j4]; 26852 if (d__ < 0.f) { 26853 return 0; 26854 } else { 26855 z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); 26856 d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; 26857 } 26858 *dmin__ = df2cmin(*dmin__,d__); 26859 /* Computing MIN */ 26860 r__1 = emin, r__2 = z__[j4 - 1]; 26861 emin = df2cmin(r__1,r__2); 26862 /* L40: */ 26863 } 26864 } 26865 26866 /* Unroll last two steps. */ 26867 26868 *dnm2 = d__; 26869 *dmin2 = *dmin__; 26870 j4 = (*n0 - 2 << 2) - *pp; 26871 j4p2 = j4 + (*pp << 1) - 1; 26872 z__[j4 - 2] = *dnm2 + z__[j4p2]; 26873 if (*dnm2 < 0.f) { 26874 return 0; 26875 } else { 26876 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 26877 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; 26878 } 26879 *dmin__ = df2cmin(*dmin__,*dnm1); 26880 26881 *dmin1 = *dmin__; 26882 j4 += 4; 26883 j4p2 = j4 + (*pp << 1) - 1; 26884 z__[j4 - 2] = *dnm1 + z__[j4p2]; 26885 if (*dnm1 < 0.f) { 26886 return 0; 26887 } else { 26888 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 26889 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; 26890 } 26891 *dmin__ = df2cmin(*dmin__,*dn); 26892 26893 } 26894 26895 z__[j4 + 2] = *dn; 26896 z__[(*n0 << 2) - *pp] = emin; 26897 return 0; 26898 26899 /* End of SLASQ5 */ 26900 26901 } /* slasq5_ */
int slasq6_ | ( | integer * | i0, | |
integer * | n0, | |||
real * | z__, | |||
integer * | pp, | |||
real * | dmin__, | |||
real * | dmin1, | |||
real * | dmin2, | |||
real * | dn, | |||
real * | dnm1, | |||
real * | dnm2 | |||
) |
Definition at line 26905 of file lapackblas.cpp.
References df2cmin, integer, and slamch_().
Referenced by slasq3_().
26908 { 26909 /* System generated locals */ 26910 integer i__1; 26911 real r__1, r__2; 26912 26913 /* Local variables */ 26914 static real emin, temp, d__; 26915 static integer j4; 26916 extern doublereal slamch_(const char *); 26917 static real safmin; 26918 static integer j4p2; 26919 26920 26921 /* -- LAPACK auxiliary routine (version 3.0) -- 26922 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 26923 Courant Institute, Argonne National Lab, and Rice University 26924 October 31, 1999 26925 26926 26927 Purpose 26928 ======= 26929 26930 SLASQ6 computes one dqd (shift equal to zero) transform in 26931 ping-pong form, with protection against underflow and overflow. 26932 26933 Arguments 26934 ========= 26935 26936 I0 (input) INTEGER 26937 First index. 26938 26939 N0 (input) INTEGER 26940 Last index. 26941 26942 Z (input) REAL array, dimension ( 4*N ) 26943 Z holds the qd array. EMIN is stored in Z(4*N0) to avoid 26944 an extra argument. 26945 26946 PP (input) INTEGER 26947 PP=0 for ping, PP=1 for pong. 26948 26949 DMIN (output) REAL 26950 Minimum value of d. 26951 26952 DMIN1 (output) REAL 26953 Minimum value of d, excluding D( N0 ). 26954 26955 DMIN2 (output) REAL 26956 Minimum value of d, excluding D( N0 ) and D( N0-1 ). 26957 26958 DN (output) REAL 26959 d(N0), the last value of d. 26960 26961 DNM1 (output) REAL 26962 d(N0-1). 26963 26964 DNM2 (output) REAL 26965 d(N0-2). 26966 26967 ===================================================================== 26968 26969 26970 Parameter adjustments */ 26971 --z__; 26972 26973 /* Function Body */ 26974 if (*n0 - *i0 - 1 <= 0) { 26975 return 0; 26976 } 26977 26978 safmin = slamch_("Safe minimum"); 26979 j4 = (*i0 << 2) + *pp - 3; 26980 emin = z__[j4 + 4]; 26981 d__ = z__[j4]; 26982 *dmin__ = d__; 26983 26984 if (*pp == 0) { 26985 i__1 = *n0 - 3 << 2; 26986 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 26987 z__[j4 - 2] = d__ + z__[j4 - 1]; 26988 if (z__[j4 - 2] == 0.f) { 26989 z__[j4] = 0.f; 26990 d__ = z__[j4 + 1]; 26991 *dmin__ = d__; 26992 emin = 0.f; 26993 } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 26994 - 2] < z__[j4 + 1]) { 26995 temp = z__[j4 + 1] / z__[j4 - 2]; 26996 z__[j4] = z__[j4 - 1] * temp; 26997 d__ *= temp; 26998 } else { 26999 z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); 27000 d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); 27001 } 27002 *dmin__ = df2cmin(*dmin__,d__); 27003 /* Computing MIN */ 27004 r__1 = emin, r__2 = z__[j4]; 27005 emin = df2cmin(r__1,r__2); 27006 /* L10: */ 27007 } 27008 } else { 27009 i__1 = *n0 - 3 << 2; 27010 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 27011 z__[j4 - 3] = d__ + z__[j4]; 27012 if (z__[j4 - 3] == 0.f) { 27013 z__[j4 - 1] = 0.f; 27014 d__ = z__[j4 + 2]; 27015 *dmin__ = d__; 27016 emin = 0.f; 27017 } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 27018 - 3] < z__[j4 + 2]) { 27019 temp = z__[j4 + 2] / z__[j4 - 3]; 27020 z__[j4 - 1] = z__[j4] * temp; 27021 d__ *= temp; 27022 } else { 27023 z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); 27024 d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); 27025 } 27026 *dmin__ = df2cmin(*dmin__,d__); 27027 /* Computing MIN */ 27028 r__1 = emin, r__2 = z__[j4 - 1]; 27029 emin = df2cmin(r__1,r__2); 27030 /* L20: */ 27031 } 27032 } 27033 27034 /* Unroll last two steps. */ 27035 27036 *dnm2 = d__; 27037 *dmin2 = *dmin__; 27038 j4 = (*n0 - 2 << 2) - *pp; 27039 j4p2 = j4 + (*pp << 1) - 1; 27040 z__[j4 - 2] = *dnm2 + z__[j4p2]; 27041 if (z__[j4 - 2] == 0.f) { 27042 z__[j4] = 0.f; 27043 *dnm1 = z__[j4p2 + 2]; 27044 *dmin__ = *dnm1; 27045 emin = 0.f; 27046 } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 27047 z__[j4p2 + 2]) { 27048 temp = z__[j4p2 + 2] / z__[j4 - 2]; 27049 z__[j4] = z__[j4p2] * temp; 27050 *dnm1 = *dnm2 * temp; 27051 } else { 27052 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 27053 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); 27054 } 27055 *dmin__ = df2cmin(*dmin__,*dnm1); 27056 27057 *dmin1 = *dmin__; 27058 j4 += 4; 27059 j4p2 = j4 + (*pp << 1) - 1; 27060 z__[j4 - 2] = *dnm1 + z__[j4p2]; 27061 if (z__[j4 - 2] == 0.f) { 27062 z__[j4] = 0.f; 27063 *dn = z__[j4p2 + 2]; 27064 *dmin__ = *dn; 27065 emin = 0.f; 27066 } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 27067 z__[j4p2 + 2]) { 27068 temp = z__[j4p2 + 2] / z__[j4 - 2]; 27069 z__[j4] = z__[j4p2] * temp; 27070 *dn = *dnm1 * temp; 27071 } else { 27072 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 27073 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); 27074 } 27075 *dmin__ = df2cmin(*dmin__,*dn); 27076 27077 z__[j4 + 2] = *dn; 27078 z__[(*n0 << 2) - *pp] = emin; 27079 return 0; 27080 27081 /* End of SLASQ6 */ 27082 27083 } /* slasq6_ */
int slasr_ | ( | const char * | side, | |
const char * | pivot, | |||
const char * | direct, | |||
integer * | m, | |||
integer * | n, | |||
real * | c__, | |||
real * | s, | |||
real * | a, | |||
integer * | lda | |||
) |
Definition at line 5499 of file lapackblas.cpp.
References a_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by sbdsqr_(), and ssteqr_().
05501 { 05502 /* -- LAPACK auxiliary routine (version 3.0) -- 05503 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 05504 Courant Institute, Argonne National Lab, and Rice University 05505 October 31, 1992 05506 05507 05508 Purpose 05509 ======= 05510 05511 SLASR performs the transformation 05512 05513 A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) 05514 05515 A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) 05516 05517 where A is an m by n real matrix and P is an orthogonal matrix, 05518 consisting of a sequence of plane rotations determined by the 05519 parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' 05520 and z = n when SIDE = 'R' or 'r' ): 05521 05522 When DIRECT = 'F' or 'f' ( Forward sequence ) then 05523 05524 P = P( z - 1 )*...*P( 2 )*P( 1 ), 05525 05526 and when DIRECT = 'B' or 'b' ( Backward sequence ) then 05527 05528 P = P( 1 )*P( 2 )*...*P( z - 1 ), 05529 05530 where P( k ) is a plane rotation matrix for the following planes: 05531 05532 when PIVOT = 'V' or 'v' ( Variable pivot ), 05533 the plane ( k, k + 1 ) 05534 05535 when PIVOT = 'T' or 't' ( Top pivot ), 05536 the plane ( 1, k + 1 ) 05537 05538 when PIVOT = 'B' or 'b' ( Bottom pivot ), 05539 the plane ( k, z ) 05540 05541 c( k ) and s( k ) must contain the cosine and sine that define the 05542 matrix P( k ). The two by two plane rotation part of the matrix 05543 P( k ), R( k ), is assumed to be of the form 05544 05545 R( k ) = ( c( k ) s( k ) ). 05546 ( -s( k ) c( k ) ) 05547 05548 This version vectorises across rows of the array A when SIDE = 'L'. 05549 05550 Arguments 05551 ========= 05552 05553 SIDE (input) CHARACTER*1 05554 Specifies whether the plane rotation matrix P is applied to 05555 A on the left or the right. 05556 = 'L': Left, compute A := P*A 05557 = 'R': Right, compute A:= A*P' 05558 05559 DIRECT (input) CHARACTER*1 05560 Specifies whether P is a forward or backward sequence of 05561 plane rotations. 05562 = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) 05563 = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) 05564 05565 PIVOT (input) CHARACTER*1 05566 Specifies the plane for which P(k) is a plane rotation 05567 matrix. 05568 = 'V': Variable pivot, the plane (k,k+1) 05569 = 'T': Top pivot, the plane (1,k+1) 05570 = 'B': Bottom pivot, the plane (k,z) 05571 05572 M (input) INTEGER 05573 The number of rows of the matrix A. If m <= 1, an immediate 05574 return is effected. 05575 05576 N (input) INTEGER 05577 The number of columns of the matrix A. If n <= 1, an 05578 immediate return is effected. 05579 05580 C, S (input) REAL arrays, dimension 05581 (M-1) if SIDE = 'L' 05582 (N-1) if SIDE = 'R' 05583 c(k) and s(k) contain the cosine and sine that define the 05584 matrix P(k). The two by two plane rotation part of the 05585 matrix P(k), R(k), is assumed to be of the form 05586 R( k ) = ( c( k ) s( k ) ). 05587 ( -s( k ) c( k ) ) 05588 05589 A (input/output) REAL array, dimension (LDA,N) 05590 The m by n matrix A. On exit, A is overwritten by P*A if 05591 SIDE = 'R' or by A*P' if SIDE = 'L'. 05592 05593 LDA (input) INTEGER 05594 The leading dimension of the array A. LDA >= f2cmax(1,M). 05595 05596 ===================================================================== 05597 05598 05599 Test the input parameters 05600 05601 Parameter adjustments */ 05602 /* System generated locals */ 05603 integer a_dim1, a_offset, i__1, i__2; 05604 /* Local variables */ 05605 static integer info; 05606 static real temp; 05607 static integer i__, j; 05608 extern logical lsame_(const char *, const char *); 05609 static real ctemp, stemp; 05610 extern /* Subroutine */ int xerbla_(const char *, integer *); 05611 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 05612 05613 --c__; 05614 --s; 05615 a_dim1 = *lda; 05616 a_offset = 1 + a_dim1 * 1; 05617 a -= a_offset; 05618 05619 /* Function Body */ 05620 info = 0; 05621 if (! (lsame_(side, "L") || lsame_(side, "R"))) { 05622 info = 1; 05623 } else if (! (lsame_(pivot, "V") || lsame_(pivot, 05624 "T") || lsame_(pivot, "B"))) { 05625 info = 2; 05626 } else if (! (lsame_(direct, "F") || lsame_(direct, 05627 "B"))) { 05628 info = 3; 05629 } else if (*m < 0) { 05630 info = 4; 05631 } else if (*n < 0) { 05632 info = 5; 05633 } else if (*lda < f2cmax(1,*m)) { 05634 info = 9; 05635 } 05636 if (info != 0) { 05637 xerbla_("SLASR ", &info); 05638 return 0; 05639 } 05640 05641 /* Quick return if possible */ 05642 05643 if (*m == 0 || *n == 0) { 05644 return 0; 05645 } 05646 if (lsame_(side, "L")) { 05647 05648 /* Form P * A */ 05649 05650 if (lsame_(pivot, "V")) { 05651 if (lsame_(direct, "F")) { 05652 i__1 = *m - 1; 05653 for (j = 1; j <= i__1; ++j) { 05654 ctemp = c__[j]; 05655 stemp = s[j]; 05656 if (ctemp != 1.f || stemp != 0.f) { 05657 i__2 = *n; 05658 for (i__ = 1; i__ <= i__2; ++i__) { 05659 temp = a_ref(j + 1, i__); 05660 a_ref(j + 1, i__) = ctemp * temp - stemp * a_ref( 05661 j, i__); 05662 a_ref(j, i__) = stemp * temp + ctemp * a_ref(j, 05663 i__); 05664 /* L10: */ 05665 } 05666 } 05667 /* L20: */ 05668 } 05669 } else if (lsame_(direct, "B")) { 05670 for (j = *m - 1; j >= 1; --j) { 05671 ctemp = c__[j]; 05672 stemp = s[j]; 05673 if (ctemp != 1.f || stemp != 0.f) { 05674 i__1 = *n; 05675 for (i__ = 1; i__ <= i__1; ++i__) { 05676 temp = a_ref(j + 1, i__); 05677 a_ref(j + 1, i__) = ctemp * temp - stemp * a_ref( 05678 j, i__); 05679 a_ref(j, i__) = stemp * temp + ctemp * a_ref(j, 05680 i__); 05681 /* L30: */ 05682 } 05683 } 05684 /* L40: */ 05685 } 05686 } 05687 } else if (lsame_(pivot, "T")) { 05688 if (lsame_(direct, "F")) { 05689 i__1 = *m; 05690 for (j = 2; j <= i__1; ++j) { 05691 ctemp = c__[j - 1]; 05692 stemp = s[j - 1]; 05693 if (ctemp != 1.f || stemp != 0.f) { 05694 i__2 = *n; 05695 for (i__ = 1; i__ <= i__2; ++i__) { 05696 temp = a_ref(j, i__); 05697 a_ref(j, i__) = ctemp * temp - stemp * a_ref(1, 05698 i__); 05699 a_ref(1, i__) = stemp * temp + ctemp * a_ref(1, 05700 i__); 05701 /* L50: */ 05702 } 05703 } 05704 /* L60: */ 05705 } 05706 } else if (lsame_(direct, "B")) { 05707 for (j = *m; j >= 2; --j) { 05708 ctemp = c__[j - 1]; 05709 stemp = s[j - 1]; 05710 if (ctemp != 1.f || stemp != 0.f) { 05711 i__1 = *n; 05712 for (i__ = 1; i__ <= i__1; ++i__) { 05713 temp = a_ref(j, i__); 05714 a_ref(j, i__) = ctemp * temp - stemp * a_ref(1, 05715 i__); 05716 a_ref(1, i__) = stemp * temp + ctemp * a_ref(1, 05717 i__); 05718 /* L70: */ 05719 } 05720 } 05721 /* L80: */ 05722 } 05723 } 05724 } else if (lsame_(pivot, "B")) { 05725 if (lsame_(direct, "F")) { 05726 i__1 = *m - 1; 05727 for (j = 1; j <= i__1; ++j) { 05728 ctemp = c__[j]; 05729 stemp = s[j]; 05730 if (ctemp != 1.f || stemp != 0.f) { 05731 i__2 = *n; 05732 for (i__ = 1; i__ <= i__2; ++i__) { 05733 temp = a_ref(j, i__); 05734 a_ref(j, i__) = stemp * a_ref(*m, i__) + ctemp * 05735 temp; 05736 a_ref(*m, i__) = ctemp * a_ref(*m, i__) - stemp * 05737 temp; 05738 /* L90: */ 05739 } 05740 } 05741 /* L100: */ 05742 } 05743 } else if (lsame_(direct, "B")) { 05744 for (j = *m - 1; j >= 1; --j) { 05745 ctemp = c__[j]; 05746 stemp = s[j]; 05747 if (ctemp != 1.f || stemp != 0.f) { 05748 i__1 = *n; 05749 for (i__ = 1; i__ <= i__1; ++i__) { 05750 temp = a_ref(j, i__); 05751 a_ref(j, i__) = stemp * a_ref(*m, i__) + ctemp * 05752 temp; 05753 a_ref(*m, i__) = ctemp * a_ref(*m, i__) - stemp * 05754 temp; 05755 /* L110: */ 05756 } 05757 } 05758 /* L120: */ 05759 } 05760 } 05761 } 05762 } else if (lsame_(side, "R")) { 05763 05764 /* Form A * P' */ 05765 05766 if (lsame_(pivot, "V")) { 05767 if (lsame_(direct, "F")) { 05768 i__1 = *n - 1; 05769 for (j = 1; j <= i__1; ++j) { 05770 ctemp = c__[j]; 05771 stemp = s[j]; 05772 if (ctemp != 1.f || stemp != 0.f) { 05773 i__2 = *m; 05774 for (i__ = 1; i__ <= i__2; ++i__) { 05775 temp = a_ref(i__, j + 1); 05776 a_ref(i__, j + 1) = ctemp * temp - stemp * a_ref( 05777 i__, j); 05778 a_ref(i__, j) = stemp * temp + ctemp * a_ref(i__, 05779 j); 05780 /* L130: */ 05781 } 05782 } 05783 /* L140: */ 05784 } 05785 } else if (lsame_(direct, "B")) { 05786 for (j = *n - 1; j >= 1; --j) { 05787 ctemp = c__[j]; 05788 stemp = s[j]; 05789 if (ctemp != 1.f || stemp != 0.f) { 05790 i__1 = *m; 05791 for (i__ = 1; i__ <= i__1; ++i__) { 05792 temp = a_ref(i__, j + 1); 05793 a_ref(i__, j + 1) = ctemp * temp - stemp * a_ref( 05794 i__, j); 05795 a_ref(i__, j) = stemp * temp + ctemp * a_ref(i__, 05796 j); 05797 /* L150: */ 05798 } 05799 } 05800 /* L160: */ 05801 } 05802 } 05803 } else if (lsame_(pivot, "T")) { 05804 if (lsame_(direct, "F")) { 05805 i__1 = *n; 05806 for (j = 2; j <= i__1; ++j) { 05807 ctemp = c__[j - 1]; 05808 stemp = s[j - 1]; 05809 if (ctemp != 1.f || stemp != 0.f) { 05810 i__2 = *m; 05811 for (i__ = 1; i__ <= i__2; ++i__) { 05812 temp = a_ref(i__, j); 05813 a_ref(i__, j) = ctemp * temp - stemp * a_ref(i__, 05814 1); 05815 a_ref(i__, 1) = stemp * temp + ctemp * a_ref(i__, 05816 1); 05817 /* L170: */ 05818 } 05819 } 05820 /* L180: */ 05821 } 05822 } else if (lsame_(direct, "B")) { 05823 for (j = *n; j >= 2; --j) { 05824 ctemp = c__[j - 1]; 05825 stemp = s[j - 1]; 05826 if (ctemp != 1.f || stemp != 0.f) { 05827 i__1 = *m; 05828 for (i__ = 1; i__ <= i__1; ++i__) { 05829 temp = a_ref(i__, j); 05830 a_ref(i__, j) = ctemp * temp - stemp * a_ref(i__, 05831 1); 05832 a_ref(i__, 1) = stemp * temp + ctemp * a_ref(i__, 05833 1); 05834 /* L190: */ 05835 } 05836 } 05837 /* L200: */ 05838 } 05839 } 05840 } else if (lsame_(pivot, "B")) { 05841 if (lsame_(direct, "F")) { 05842 i__1 = *n - 1; 05843 for (j = 1; j <= i__1; ++j) { 05844 ctemp = c__[j]; 05845 stemp = s[j]; 05846 if (ctemp != 1.f || stemp != 0.f) { 05847 i__2 = *m; 05848 for (i__ = 1; i__ <= i__2; ++i__) { 05849 temp = a_ref(i__, j); 05850 a_ref(i__, j) = stemp * a_ref(i__, *n) + ctemp * 05851 temp; 05852 a_ref(i__, *n) = ctemp * a_ref(i__, *n) - stemp * 05853 temp; 05854 /* L210: */ 05855 } 05856 } 05857 /* L220: */ 05858 } 05859 } else if (lsame_(direct, "B")) { 05860 for (j = *n - 1; j >= 1; --j) { 05861 ctemp = c__[j]; 05862 stemp = s[j]; 05863 if (ctemp != 1.f || stemp != 0.f) { 05864 i__1 = *m; 05865 for (i__ = 1; i__ <= i__1; ++i__) { 05866 temp = a_ref(i__, j); 05867 a_ref(i__, j) = stemp * a_ref(i__, *n) + ctemp * 05868 temp; 05869 a_ref(i__, *n) = ctemp * a_ref(i__, *n) - stemp * 05870 temp; 05871 /* L230: */ 05872 } 05873 } 05874 /* L240: */ 05875 } 05876 } 05877 } 05878 } 05879 05880 return 0; 05881 05882 /* End of SLASR */ 05883 05884 } /* slasr_ */
Definition at line 5892 of file lapackblas.cpp.
References integer, lsame_(), stack_ref, and xerbla_().
Referenced by slasq1_(), slasq2_(), sstedc_(), ssteqr_(), and ssterf_().
05893 { 05894 /* -- LAPACK routine (version 3.0) -- 05895 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 05896 Courant Institute, Argonne National Lab, and Rice University 05897 September 30, 1994 05898 05899 05900 Purpose 05901 ======= 05902 05903 Sort the numbers in D in increasing order (if ID = 'I') or 05904 in decreasing order (if ID = 'D' ). 05905 05906 Use Quick Sort, reverting to Insertion sort on arrays of 05907 size <= 20. Dimension of STACK limits N to about 2**32. 05908 05909 Arguments 05910 ========= 05911 05912 ID (input) CHARACTER*1 05913 = 'I': sort D in increasing order; 05914 = 'D': sort D in decreasing order. 05915 05916 N (input) INTEGER 05917 The length of the array D. 05918 05919 D (input/output) REAL array, dimension (N) 05920 On entry, the array to be sorted. 05921 On exit, D has been sorted into increasing order 05922 (D(1) <= ... <= D(N) ) or into decreasing order 05923 (D(1) >= ... >= D(N) ), depending on ID. 05924 05925 INFO (output) INTEGER 05926 = 0: successful exit 05927 < 0: if INFO = -i, the i-th argument had an illegal value 05928 05929 ===================================================================== 05930 05931 05932 Test the input paramters. 05933 05934 Parameter adjustments */ 05935 /* System generated locals */ 05936 integer i__1, i__2; 05937 /* Local variables */ 05938 static integer endd, i__, j; 05939 extern logical lsame_(const char *, const char *); 05940 static integer stack[64] /* was [2][32] */; 05941 static real dmnmx, d1, d2, d3; 05942 static integer start; 05943 extern /* Subroutine */ int xerbla_(const char *, integer *); 05944 static integer stkpnt, dir; 05945 static real tmp; 05946 #define stack_ref(a_1,a_2) stack[(a_2)*2 + a_1 - 3] 05947 05948 --d__; 05949 05950 /* Function Body */ 05951 *info = 0; 05952 dir = -1; 05953 if (lsame_(id, "D")) { 05954 dir = 0; 05955 } else if (lsame_(id, "I")) { 05956 dir = 1; 05957 } 05958 if (dir == -1) { 05959 *info = -1; 05960 } else if (*n < 0) { 05961 *info = -2; 05962 } 05963 if (*info != 0) { 05964 i__1 = -(*info); 05965 xerbla_("SLASRT", &i__1); 05966 return 0; 05967 } 05968 05969 /* Quick return if possible */ 05970 05971 if (*n <= 1) { 05972 return 0; 05973 } 05974 05975 stkpnt = 1; 05976 stack_ref(1, 1) = 1; 05977 stack_ref(2, 1) = *n; 05978 L10: 05979 start = stack_ref(1, stkpnt); 05980 endd = stack_ref(2, stkpnt); 05981 --stkpnt; 05982 if (endd - start <= 20 && endd - start > 0) { 05983 05984 /* Do Insertion sort on D( START:ENDD ) */ 05985 05986 if (dir == 0) { 05987 05988 /* Sort into decreasing order */ 05989 05990 i__1 = endd; 05991 for (i__ = start + 1; i__ <= i__1; ++i__) { 05992 i__2 = start + 1; 05993 for (j = i__; j >= i__2; --j) { 05994 if (d__[j] > d__[j - 1]) { 05995 dmnmx = d__[j]; 05996 d__[j] = d__[j - 1]; 05997 d__[j - 1] = dmnmx; 05998 } else { 05999 goto L30; 06000 } 06001 /* L20: */ 06002 } 06003 L30: 06004 ; 06005 } 06006 06007 } else { 06008 06009 /* Sort into increasing order */ 06010 06011 i__1 = endd; 06012 for (i__ = start + 1; i__ <= i__1; ++i__) { 06013 i__2 = start + 1; 06014 for (j = i__; j >= i__2; --j) { 06015 if (d__[j] < d__[j - 1]) { 06016 dmnmx = d__[j]; 06017 d__[j] = d__[j - 1]; 06018 d__[j - 1] = dmnmx; 06019 } else { 06020 goto L50; 06021 } 06022 /* L40: */ 06023 } 06024 L50: 06025 ; 06026 } 06027 06028 } 06029 06030 } else if (endd - start > 20) { 06031 06032 /* Partition D( START:ENDD ) and stack parts, largest one first 06033 06034 Choose partition entry as median of 3 */ 06035 06036 d1 = d__[start]; 06037 d2 = d__[endd]; 06038 i__ = (start + endd) / 2; 06039 d3 = d__[i__]; 06040 if (d1 < d2) { 06041 if (d3 < d1) { 06042 dmnmx = d1; 06043 } else if (d3 < d2) { 06044 dmnmx = d3; 06045 } else { 06046 dmnmx = d2; 06047 } 06048 } else { 06049 if (d3 < d2) { 06050 dmnmx = d2; 06051 } else if (d3 < d1) { 06052 dmnmx = d3; 06053 } else { 06054 dmnmx = d1; 06055 } 06056 } 06057 06058 if (dir == 0) { 06059 06060 /* Sort into decreasing order */ 06061 06062 i__ = start - 1; 06063 j = endd + 1; 06064 L60: 06065 L70: 06066 --j; 06067 if (d__[j] < dmnmx) { 06068 goto L70; 06069 } 06070 L80: 06071 ++i__; 06072 if (d__[i__] > dmnmx) { 06073 goto L80; 06074 } 06075 if (i__ < j) { 06076 tmp = d__[i__]; 06077 d__[i__] = d__[j]; 06078 d__[j] = tmp; 06079 goto L60; 06080 } 06081 if (j - start > endd - j - 1) { 06082 ++stkpnt; 06083 stack_ref(1, stkpnt) = start; 06084 stack_ref(2, stkpnt) = j; 06085 ++stkpnt; 06086 stack_ref(1, stkpnt) = j + 1; 06087 stack_ref(2, stkpnt) = endd; 06088 } else { 06089 ++stkpnt; 06090 stack_ref(1, stkpnt) = j + 1; 06091 stack_ref(2, stkpnt) = endd; 06092 ++stkpnt; 06093 stack_ref(1, stkpnt) = start; 06094 stack_ref(2, stkpnt) = j; 06095 } 06096 } else { 06097 06098 /* Sort into increasing order */ 06099 06100 i__ = start - 1; 06101 j = endd + 1; 06102 L90: 06103 L100: 06104 --j; 06105 if (d__[j] > dmnmx) { 06106 goto L100; 06107 } 06108 L110: 06109 ++i__; 06110 if (d__[i__] < dmnmx) { 06111 goto L110; 06112 } 06113 if (i__ < j) { 06114 tmp = d__[i__]; 06115 d__[i__] = d__[j]; 06116 d__[j] = tmp; 06117 goto L90; 06118 } 06119 if (j - start > endd - j - 1) { 06120 ++stkpnt; 06121 stack_ref(1, stkpnt) = start; 06122 stack_ref(2, stkpnt) = j; 06123 ++stkpnt; 06124 stack_ref(1, stkpnt) = j + 1; 06125 stack_ref(2, stkpnt) = endd; 06126 } else { 06127 ++stkpnt; 06128 stack_ref(1, stkpnt) = j + 1; 06129 stack_ref(2, stkpnt) = endd; 06130 ++stkpnt; 06131 stack_ref(1, stkpnt) = start; 06132 stack_ref(2, stkpnt) = j; 06133 } 06134 } 06135 } 06136 if (stkpnt > 0) { 06137 goto L10; 06138 } 06139 return 0; 06140 06141 /* End of SLASRT */ 06142 06143 } /* slasrt_ */
Definition at line 6151 of file lapackblas.cpp.
Referenced by slange_(), slanst_(), and slansy_().
06153 { 06154 /* -- LAPACK auxiliary routine (version 3.0) -- 06155 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 06156 Courant Institute, Argonne National Lab, and Rice University 06157 June 30, 1999 06158 06159 06160 Purpose 06161 ======= 06162 06163 SLASSQ returns the values scl and smsq such that 06164 06165 ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, 06166 06167 where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is 06168 assumed to be non-negative and scl returns the value 06169 06170 scl = f2cmax( scale, abs( x( i ) ) ). 06171 06172 scale and sumsq must be supplied in SCALE and SUMSQ and 06173 scl and smsq are overwritten on SCALE and SUMSQ respectively. 06174 06175 The routine makes only one pass through the vector x. 06176 06177 Arguments 06178 ========= 06179 06180 N (input) INTEGER 06181 The number of elements to be used from the vector X. 06182 06183 X (input) REAL array, dimension (N) 06184 The vector for which a scaled sum of squares is computed. 06185 x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. 06186 06187 INCX (input) INTEGER 06188 The increment between successive values of the vector X. 06189 INCX > 0. 06190 06191 SCALE (input/output) REAL 06192 On entry, the value scale in the equation above. 06193 On exit, SCALE is overwritten with scl , the scaling factor 06194 for the sum of squares. 06195 06196 SUMSQ (input/output) REAL 06197 On entry, the value sumsq in the equation above. 06198 On exit, SUMSQ is overwritten with smsq , the basic sum of 06199 squares from which scl has been factored out. 06200 06201 ===================================================================== 06202 06203 06204 Parameter adjustments */ 06205 /* System generated locals */ 06206 integer i__1, i__2; 06207 real r__1; 06208 /* Local variables */ 06209 static real absxi; 06210 static integer ix; 06211 06212 --x; 06213 06214 /* Function Body */ 06215 if (*n > 0) { 06216 i__1 = (*n - 1) * *incx + 1; 06217 i__2 = *incx; 06218 for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { 06219 if (x[ix] != 0.f) { 06220 absxi = (r__1 = x[ix], dabs(r__1)); 06221 if (*scale < absxi) { 06222 /* Computing 2nd power */ 06223 r__1 = *scale / absxi; 06224 *sumsq = *sumsq * (r__1 * r__1) + 1; 06225 *scale = absxi; 06226 } else { 06227 /* Computing 2nd power */ 06228 r__1 = absxi / *scale; 06229 *sumsq += r__1 * r__1; 06230 } 06231 } 06232 /* L10: */ 06233 } 06234 } 06235 return 0; 06236 06237 /* End of SLASSQ */ 06238 06239 } /* slassq_ */
int slasv2_ | ( | real * | f, | |
real * | g, | |||
real * | h__, | |||
real * | ssmin, | |||
real * | ssmax, | |||
real * | snr, | |||
real * | csr, | |||
real * | snl, | |||
real * | csl | |||
) |
Definition at line 27087 of file lapackblas.cpp.
References dabs, FALSE_, integer, r_sign(), slamch_(), sqrt(), t, and TRUE_.
Referenced by sbdsqr_().
27089 { 27090 /* -- LAPACK auxiliary routine (version 3.0) -- 27091 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 27092 Courant Institute, Argonne National Lab, and Rice University 27093 October 31, 1992 27094 27095 27096 Purpose 27097 ======= 27098 27099 SLASV2 computes the singular value decomposition of a 2-by-2 27100 triangular matrix 27101 [ F G ] 27102 [ 0 H ]. 27103 On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the 27104 smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and 27105 right singular vectors for abs(SSMAX), giving the decomposition 27106 27107 [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] 27108 [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. 27109 27110 Arguments 27111 ========= 27112 27113 F (input) REAL 27114 The (1,1) element of the 2-by-2 matrix. 27115 27116 G (input) REAL 27117 The (1,2) element of the 2-by-2 matrix. 27118 27119 H (input) REAL 27120 The (2,2) element of the 2-by-2 matrix. 27121 27122 SSMIN (output) REAL 27123 abs(SSMIN) is the smaller singular value. 27124 27125 SSMAX (output) REAL 27126 abs(SSMAX) is the larger singular value. 27127 27128 SNL (output) REAL 27129 CSL (output) REAL 27130 The vector (CSL, SNL) is a unit left singular vector for the 27131 singular value abs(SSMAX). 27132 27133 SNR (output) REAL 27134 CSR (output) REAL 27135 The vector (CSR, SNR) is a unit right singular vector for the 27136 singular value abs(SSMAX). 27137 27138 Further Details 27139 =============== 27140 27141 Any input parameter may be aliased with any output parameter. 27142 27143 Barring over/underflow and assuming a guard digit in subtraction, all 27144 output quantities are correct to within a few units in the last 27145 place (ulps). 27146 27147 In IEEE arithmetic, the code works correctly if one matrix element is 27148 infinite. 27149 27150 Overflow will not occur unless the largest singular value itself 27151 overflows or is within a few ulps of overflow. (On machines with 27152 partial overflow, like the Cray, overflow may occur if the largest 27153 singular value is within a factor of 2 of overflow.) 27154 27155 Underflow is harmless if underflow is gradual. Otherwise, results 27156 may correspond to a matrix modified by perturbations of size near 27157 the underflow threshold. 27158 27159 ===================================================================== */ 27160 /* Table of constant values */ 27161 static real c_b3 = 2.f; 27162 static real c_b4 = 1.f; 27163 27164 /* System generated locals */ 27165 real r__1; 27166 /* Builtin functions */ 27167 //double sqrt(doublereal), r_sign(real *, real *); 27168 /* Local variables */ 27169 static integer pmax; 27170 static real temp; 27171 static logical swap; 27172 static real a, d__, l, m, r__, s, t, tsign, fa, ga, ha, ft, gt, ht, mm; 27173 static logical gasmal; 27174 extern doublereal slamch_(const char *); 27175 static real tt, clt, crt, slt, srt; 27176 27177 27178 27179 27180 ft = *f; 27181 fa = dabs(ft); 27182 ht = *h__; 27183 ha = dabs(*h__); 27184 27185 /* PMAX points to the maximum absolute element of matrix 27186 PMAX = 1 if F largest in absolute values 27187 PMAX = 2 if G largest in absolute values 27188 PMAX = 3 if H largest in absolute values */ 27189 27190 pmax = 1; 27191 swap = ha > fa; 27192 if (swap) { 27193 pmax = 3; 27194 temp = ft; 27195 ft = ht; 27196 ht = temp; 27197 temp = fa; 27198 fa = ha; 27199 ha = temp; 27200 27201 /* Now FA .ge. HA */ 27202 27203 } 27204 gt = *g; 27205 ga = dabs(gt); 27206 if (ga == 0.f) { 27207 27208 /* Diagonal matrix */ 27209 27210 *ssmin = ha; 27211 *ssmax = fa; 27212 clt = 1.f; 27213 crt = 1.f; 27214 slt = 0.f; 27215 srt = 0.f; 27216 } else { 27217 gasmal = TRUE_; 27218 if (ga > fa) { 27219 pmax = 2; 27220 if (fa / ga < slamch_("EPS")) { 27221 27222 /* Case of very large GA */ 27223 27224 gasmal = FALSE_; 27225 *ssmax = ga; 27226 if (ha > 1.f) { 27227 *ssmin = fa / (ga / ha); 27228 } else { 27229 *ssmin = fa / ga * ha; 27230 } 27231 clt = 1.f; 27232 slt = ht / gt; 27233 srt = 1.f; 27234 crt = ft / gt; 27235 } 27236 } 27237 if (gasmal) { 27238 27239 /* Normal case */ 27240 27241 d__ = fa - ha; 27242 if (d__ == fa) { 27243 27244 /* Copes with infinite F or H */ 27245 27246 l = 1.f; 27247 } else { 27248 l = d__ / fa; 27249 } 27250 27251 /* Note that 0 .le. L .le. 1 */ 27252 27253 m = gt / ft; 27254 27255 /* Note that abs(M) .le. 1/macheps */ 27256 27257 t = 2.f - l; 27258 27259 /* Note that T .ge. 1 */ 27260 27261 mm = m * m; 27262 tt = t * t; 27263 s = sqrt(tt + mm); 27264 27265 /* Note that 1 .le. S .le. 1 + 1/macheps */ 27266 27267 if (l == 0.f) { 27268 r__ = dabs(m); 27269 } else { 27270 r__ = sqrt(l * l + mm); 27271 } 27272 27273 /* Note that 0 .le. R .le. 1 + 1/macheps */ 27274 27275 a = (s + r__) * .5f; 27276 27277 /* Note that 1 .le. A .le. 1 + abs(M) */ 27278 27279 *ssmin = ha / a; 27280 *ssmax = fa * a; 27281 if (mm == 0.f) { 27282 27283 /* Note that M is very tiny */ 27284 27285 if (l == 0.f) { 27286 t = r_sign(&c_b3, &ft) * r_sign(&c_b4, >); 27287 } else { 27288 t = gt / r_sign(&d__, &ft) + m / t; 27289 } 27290 } else { 27291 t = (m / (s + t) + m / (r__ + l)) * (a + 1.f); 27292 } 27293 l = sqrt(t * t + 4.f); 27294 crt = 2.f / l; 27295 srt = t / l; 27296 clt = (crt + srt * m) / a; 27297 slt = ht / ft * srt / a; 27298 } 27299 } 27300 if (swap) { 27301 *csl = srt; 27302 *snl = crt; 27303 *csr = slt; 27304 *snr = clt; 27305 } else { 27306 *csl = clt; 27307 *snl = slt; 27308 *csr = crt; 27309 *snr = srt; 27310 } 27311 27312 /* Correct signs of SSMAX and SSMIN */ 27313 27314 if (pmax == 1) { 27315 tsign = r_sign(&c_b4, csr) * r_sign(&c_b4, csl) * r_sign(&c_b4, f); 27316 } 27317 if (pmax == 2) { 27318 tsign = r_sign(&c_b4, snr) * r_sign(&c_b4, csl) * r_sign(&c_b4, g); 27319 } 27320 if (pmax == 3) { 27321 tsign = r_sign(&c_b4, snr) * r_sign(&c_b4, snl) * r_sign(&c_b4, h__); 27322 } 27323 *ssmax = r_sign(ssmax, &tsign); 27324 r__1 = tsign * r_sign(&c_b4, f) * r_sign(&c_b4, h__); 27325 *ssmin = r_sign(ssmin, &r__1); 27326 return 0; 27327 27328 /* End of SLASV2 */ 27329 27330 } /* slasv2_ */
int slatrd_ | ( | char * | uplo, | |
integer * | n, | |||
integer * | nb, | |||
real * | a, | |||
integer * | lda, | |||
real * | e, | |||
real * | tau, | |||
real * | w, | |||
integer * | ldw | |||
) |
Definition at line 6244 of file lapackblas.cpp.
References a_ref, c__1, f2cmin, integer, lsame_(), saxpy_(), sdot_(), sgemv_(), slarfg_(), sscal_(), ssymv_(), and w_ref.
Referenced by ssytrd_().
06246 { 06247 /* -- LAPACK auxiliary routine (version 3.0) -- 06248 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 06249 Courant Institute, Argonne National Lab, and Rice University 06250 October 31, 1992 06251 06252 06253 Purpose 06254 ======= 06255 06256 SLATRD reduces NB rows and columns of a real symmetric matrix A to 06257 symmetric tridiagonal form by an orthogonal similarity 06258 transformation Q' * A * Q, and returns the matrices V and W which are 06259 needed to apply the transformation to the unreduced part of A. 06260 06261 If UPLO = 'U', SLATRD reduces the last NB rows and columns of a 06262 matrix, of which the upper triangle is supplied; 06263 if UPLO = 'L', SLATRD reduces the first NB rows and columns of a 06264 matrix, of which the lower triangle is supplied. 06265 06266 This is an auxiliary routine called by SSYTRD. 06267 06268 Arguments 06269 ========= 06270 06271 UPLO (input) CHARACTER 06272 Specifies whether the upper or lower triangular part of the 06273 symmetric matrix A is stored: 06274 = 'U': Upper triangular 06275 = 'L': Lower triangular 06276 06277 N (input) INTEGER 06278 The order of the matrix A. 06279 06280 NB (input) INTEGER 06281 The number of rows and columns to be reduced. 06282 06283 A (input/output) REAL array, dimension (LDA,N) 06284 On entry, the symmetric matrix A. If UPLO = 'U', the leading 06285 n-by-n upper triangular part of A contains the upper 06286 triangular part of the matrix A, and the strictly lower 06287 triangular part of A is not referenced. If UPLO = 'L', the 06288 leading n-by-n lower triangular part of A contains the lower 06289 triangular part of the matrix A, and the strictly upper 06290 triangular part of A is not referenced. 06291 On exit: 06292 if UPLO = 'U', the last NB columns have been reduced to 06293 tridiagonal form, with the diagonal elements overwriting 06294 the diagonal elements of A; the elements above the diagonal 06295 with the array TAU, represent the orthogonal matrix Q as a 06296 product of elementary reflectors; 06297 if UPLO = 'L', the first NB columns have been reduced to 06298 tridiagonal form, with the diagonal elements overwriting 06299 the diagonal elements of A; the elements below the diagonal 06300 with the array TAU, represent the orthogonal matrix Q as a 06301 product of elementary reflectors. 06302 See Further Details. 06303 06304 LDA (input) INTEGER 06305 The leading dimension of the array A. LDA >= (1,N). 06306 06307 E (output) REAL array, dimension (N-1) 06308 If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal 06309 elements of the last NB columns of the reduced matrix; 06310 if UPLO = 'L', E(1:nb) contains the subdiagonal elements of 06311 the first NB columns of the reduced matrix. 06312 06313 TAU (output) REAL array, dimension (N-1) 06314 The scalar factors of the elementary reflectors, stored in 06315 TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. 06316 See Further Details. 06317 06318 W (output) REAL array, dimension (LDW,NB) 06319 The n-by-nb matrix W required to update the unreduced part 06320 of A. 06321 06322 LDW (input) INTEGER 06323 The leading dimension of the array W. LDW >= f2cmax(1,N). 06324 06325 Further Details 06326 =============== 06327 06328 If UPLO = 'U', the matrix Q is represented as a product of elementary 06329 reflectors 06330 06331 Q = H(n) H(n-1) . . . H(n-nb+1). 06332 06333 Each H(i) has the form 06334 06335 H(i) = I - tau * v * v' 06336 06337 where tau is a real scalar, and v is a real vector with 06338 v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), 06339 and tau in TAU(i-1). 06340 06341 If UPLO = 'L', the matrix Q is represented as a product of elementary 06342 reflectors 06343 06344 Q = H(1) H(2) . . . H(nb). 06345 06346 Each H(i) has the form 06347 06348 H(i) = I - tau * v * v' 06349 06350 where tau is a real scalar, and v is a real vector with 06351 v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), 06352 and tau in TAU(i). 06353 06354 The elements of the vectors v together form the n-by-nb matrix V 06355 which is needed, with W, to apply the transformation to the unreduced 06356 part of the matrix, using a symmetric rank-2k update of the form: 06357 A := A - V*W' - W*V'. 06358 06359 The contents of A on exit are illustrated by the following examples 06360 with n = 5 and nb = 2: 06361 06362 if UPLO = 'U': if UPLO = 'L': 06363 06364 ( a a a v4 v5 ) ( d ) 06365 ( a a v4 v5 ) ( 1 d ) 06366 ( a 1 v5 ) ( v1 1 a ) 06367 ( d 1 ) ( v1 v2 a a ) 06368 ( d ) ( v1 v2 a a a ) 06369 06370 where d denotes a diagonal element of the reduced matrix, a denotes 06371 an element of the original matrix that is unchanged, and vi denotes 06372 an element of the vector defining H(i). 06373 06374 ===================================================================== 06375 06376 06377 Quick return if possible 06378 06379 Parameter adjustments */ 06380 /* Table of constant values */ 06381 static real c_b5 = -1.f; 06382 static real c_b6 = 1.f; 06383 static integer c__1 = 1; 06384 static real c_b16 = 0.f; 06385 06386 /* System generated locals */ 06387 integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; 06388 /* Local variables */ 06389 extern doublereal sdot_(integer *, real *, integer *, real *, integer *); 06390 static integer i__; 06391 static real alpha; 06392 extern logical lsame_(const char *, const char *); 06393 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 06394 sgemv_(const char *, integer *, integer *, real *, real *, integer *, 06395 real *, integer *, real *, real *, integer *), saxpy_( 06396 integer *, real *, real *, integer *, real *, integer *), ssymv_( 06397 const char *, integer *, real *, real *, integer *, real *, integer *, 06398 real *, real *, integer *); 06399 static integer iw; 06400 extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 06401 real *); 06402 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 06403 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1] 06404 06405 06406 a_dim1 = *lda; 06407 a_offset = 1 + a_dim1 * 1; 06408 a -= a_offset; 06409 --e; 06410 --tau; 06411 w_dim1 = *ldw; 06412 w_offset = 1 + w_dim1 * 1; 06413 w -= w_offset; 06414 06415 /* Function Body */ 06416 if (*n <= 0) { 06417 return 0; 06418 } 06419 06420 if (lsame_(uplo, "U")) { 06421 06422 /* Reduce last NB columns of upper triangle */ 06423 06424 i__1 = *n - *nb + 1; 06425 for (i__ = *n; i__ >= i__1; --i__) { 06426 iw = i__ - *n + *nb; 06427 if (i__ < *n) { 06428 06429 /* Update A(1:i,i) */ 06430 06431 i__2 = *n - i__; 06432 sgemv_("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1), 06433 lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__), 06434 &c__1); 06435 i__2 = *n - i__; 06436 sgemv_("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1), 06437 ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__), 06438 &c__1); 06439 } 06440 if (i__ > 1) { 06441 06442 /* Generate elementary reflector H(i) to annihilate 06443 A(1:i-2,i) */ 06444 06445 i__2 = i__ - 1; 06446 slarfg_(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, & 06447 tau[i__ - 1]); 06448 e[i__ - 1] = a_ref(i__ - 1, i__); 06449 a_ref(i__ - 1, i__) = 1.f; 06450 06451 /* Compute W(1:i-1,i) */ 06452 06453 i__2 = i__ - 1; 06454 ssymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1, 06455 i__), &c__1, &c_b16, &w_ref(1, iw), &c__1); 06456 if (i__ < *n) { 06457 i__2 = i__ - 1; 06458 i__3 = *n - i__; 06459 sgemv_("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1) 06460 , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__ 06461 + 1, iw), &c__1); 06462 i__2 = i__ - 1; 06463 i__3 = *n - i__; 06464 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ 06465 + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, & 06466 w_ref(1, iw), &c__1); 06467 i__2 = i__ - 1; 06468 i__3 = *n - i__; 06469 sgemv_("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ + 06470 1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref( 06471 i__ + 1, iw), &c__1); 06472 i__2 = i__ - 1; 06473 i__3 = *n - i__; 06474 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw 06475 + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, & 06476 w_ref(1, iw), &c__1); 06477 } 06478 i__2 = i__ - 1; 06479 sscal_(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1); 06480 i__2 = i__ - 1; 06481 alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w_ref(1, iw), & 06482 c__1, &a_ref(1, i__), &c__1); 06483 i__2 = i__ - 1; 06484 saxpy_(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), & 06485 c__1); 06486 } 06487 06488 /* L10: */ 06489 } 06490 } else { 06491 06492 /* Reduce first NB columns of lower triangle */ 06493 06494 i__1 = *nb; 06495 for (i__ = 1; i__ <= i__1; ++i__) { 06496 06497 /* Update A(i:n,i) */ 06498 06499 i__2 = *n - i__ + 1; 06500 i__3 = i__ - 1; 06501 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, & 06502 w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1); 06503 i__2 = *n - i__ + 1; 06504 i__3 = i__ - 1; 06505 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, & 06506 a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1); 06507 if (i__ < *n) { 06508 06509 /* Generate elementary reflector H(i) to annihilate 06510 A(i+2:n,i) 06511 06512 Computing MIN */ 06513 i__2 = i__ + 2; 06514 i__3 = *n - i__; 06515 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*n), i__) 06516 , &c__1, &tau[i__]); 06517 e[i__] = a_ref(i__ + 1, i__); 06518 a_ref(i__ + 1, i__) = 1.f; 06519 06520 /* Compute W(i+1:n,i) */ 06521 06522 i__2 = *n - i__; 06523 ssymv_("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, & 06524 a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1, 06525 i__), &c__1); 06526 i__2 = *n - i__; 06527 i__3 = i__ - 1; 06528 sgemv_("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1), 06529 ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 06530 i__), &c__1); 06531 i__2 = *n - i__; 06532 i__3 = i__ - 1; 06533 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1) 06534 , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 06535 i__), &c__1); 06536 i__2 = *n - i__; 06537 i__3 = i__ - 1; 06538 sgemv_("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1), 06539 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 06540 i__), &c__1); 06541 i__2 = *n - i__; 06542 i__3 = i__ - 1; 06543 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1) 06544 , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 06545 i__), &c__1); 06546 i__2 = *n - i__; 06547 sscal_(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1); 06548 i__2 = *n - i__; 06549 alpha = tau[i__] * -.5f * sdot_(&i__2, &w_ref(i__ + 1, i__), & 06550 c__1, &a_ref(i__ + 1, i__), &c__1); 06551 i__2 = *n - i__; 06552 saxpy_(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__ 06553 + 1, i__), &c__1); 06554 } 06555 06556 /* L20: */ 06557 } 06558 } 06559 06560 return 0; 06561 06562 /* End of SLATRD */ 06563 06564 } /* slatrd_ */
doublereal snrm2_ | ( | integer * | n, | |
real * | x, | |||
integer * | incx | |||
) |
Definition at line 6573 of file lapackblas.cpp.
References dabs, integer, norm(), and sqrt().
Referenced by EMAN::PCA::Lanczos(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos_ooc(), slaed3_(), slaed9_(), and slarfg_().
06574 { 06575 /* The following loop is equivalent to this call to the LAPACK 06576 auxiliary routine: 06577 CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */ 06578 /* System generated locals */ 06579 integer i__1, i__2; 06580 real ret_val, r__1; 06581 /* Builtin functions */ 06582 // double sqrt(doublereal); 06583 /* Local variables */ 06584 static real norm, scale, absxi; 06585 static integer ix; 06586 static real ssq; 06587 /* SNRM2 returns the euclidean norm of a vector via the function 06588 name, so that 06589 SNRM2 := sqrt( x'*x ) 06590 -- This version written on 25-October-1982. 06591 Modified on 14-October-1993 to inline the call to SLASSQ. 06592 Sven Hammarling, Nag Ltd. 06593 Parameter adjustments */ 06594 --x; 06595 /* Function Body */ 06596 if (*n < 1 || *incx < 1) { 06597 norm = 0.f; 06598 } else if (*n == 1) { 06599 norm = dabs(x[1]); 06600 } else { 06601 scale = 0.f; 06602 ssq = 1.f; 06603 06604 06605 i__1 = (*n - 1) * *incx + 1; 06606 i__2 = *incx; 06607 for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { 06608 if (x[ix] != 0.f) { 06609 absxi = (r__1 = x[ix], dabs(r__1)); 06610 if (scale < absxi) { 06611 /* Computing 2nd power */ 06612 r__1 = scale / absxi; 06613 ssq = ssq * (r__1 * r__1) + 1.f; 06614 scale = absxi; 06615 } else { 06616 /* Computing 2nd power */ 06617 r__1 = absxi / scale; 06618 ssq += r__1 * r__1; 06619 } 06620 } 06621 /* L10: */ 06622 } 06623 norm = scale * sqrt(ssq); 06624 } 06625 06626 ret_val = norm; 06627 return ret_val; 06628 06629 /* End of SNRM2. */ 06630 06631 } /* snrm2_ */
int sorg2l_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 6636 of file lapackblas.cpp.
References a_ref, c__1, f2cmax, integer, slarf_(), sscal_(), and xerbla_().
Referenced by sorgql_().
06638 { 06639 /* -- LAPACK routine (version 3.0) -- 06640 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 06641 Courant Institute, Argonne National Lab, and Rice University 06642 February 29, 1992 06643 06644 06645 Purpose 06646 ======= 06647 06648 SORG2L generates an m by n real matrix Q with orthonormal columns, 06649 which is defined as the last n columns of a product of k elementary 06650 reflectors of order m 06651 06652 Q = H(k) . . . H(2) H(1) 06653 06654 as returned by SGEQLF. 06655 06656 Arguments 06657 ========= 06658 06659 M (input) INTEGER 06660 The number of rows of the matrix Q. M >= 0. 06661 06662 N (input) INTEGER 06663 The number of columns of the matrix Q. M >= N >= 0. 06664 06665 K (input) INTEGER 06666 The number of elementary reflectors whose product defines the 06667 matrix Q. N >= K >= 0. 06668 06669 A (input/output) REAL array, dimension (LDA,N) 06670 On entry, the (n-k+i)-th column must contain the vector which 06671 defines the elementary reflector H(i), for i = 1,2,...,k, as 06672 returned by SGEQLF in the last k columns of its array 06673 argument A. 06674 On exit, the m by n matrix Q. 06675 06676 LDA (input) INTEGER 06677 The first dimension of the array A. LDA >= f2cmax(1,M). 06678 06679 TAU (input) REAL array, dimension (K) 06680 TAU(i) must contain the scalar factor of the elementary 06681 reflector H(i), as returned by SGEQLF. 06682 06683 WORK (workspace) REAL array, dimension (N) 06684 06685 INFO (output) INTEGER 06686 = 0: successful exit 06687 < 0: if INFO = -i, the i-th argument has an illegal value 06688 06689 ===================================================================== 06690 06691 06692 Test the input arguments 06693 06694 Parameter adjustments */ 06695 /* Table of constant values */ 06696 static integer c__1 = 1; 06697 06698 /* System generated locals */ 06699 integer a_dim1, a_offset, i__1, i__2, i__3; 06700 real r__1; 06701 /* Local variables */ 06702 static integer i__, j, l; 06703 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 06704 slarf_(const char *, integer *, integer *, real *, integer *, real *, 06705 real *, integer *, real *); 06706 static integer ii; 06707 extern /* Subroutine */ int xerbla_(const char *, integer *); 06708 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 06709 06710 06711 a_dim1 = *lda; 06712 a_offset = 1 + a_dim1 * 1; 06713 a -= a_offset; 06714 --tau; 06715 --work; 06716 06717 /* Function Body */ 06718 *info = 0; 06719 if (*m < 0) { 06720 *info = -1; 06721 } else if (*n < 0 || *n > *m) { 06722 *info = -2; 06723 } else if (*k < 0 || *k > *n) { 06724 *info = -3; 06725 } else if (*lda < f2cmax(1,*m)) { 06726 *info = -5; 06727 } 06728 if (*info != 0) { 06729 i__1 = -(*info); 06730 xerbla_("SORG2L", &i__1); 06731 return 0; 06732 } 06733 06734 /* Quick return if possible */ 06735 06736 if (*n <= 0) { 06737 return 0; 06738 } 06739 06740 /* Initialise columns 1:n-k to columns of the unit matrix */ 06741 06742 i__1 = *n - *k; 06743 for (j = 1; j <= i__1; ++j) { 06744 i__2 = *m; 06745 for (l = 1; l <= i__2; ++l) { 06746 a_ref(l, j) = 0.f; 06747 /* L10: */ 06748 } 06749 a_ref(*m - *n + j, j) = 1.f; 06750 /* L20: */ 06751 } 06752 06753 i__1 = *k; 06754 for (i__ = 1; i__ <= i__1; ++i__) { 06755 ii = *n - *k + i__; 06756 06757 /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ 06758 06759 a_ref(*m - *n + ii, ii) = 1.f; 06760 i__2 = *m - *n + ii; 06761 i__3 = ii - 1; 06762 slarf_("Left", &i__2, &i__3, &a_ref(1, ii), &c__1, &tau[i__], &a[ 06763 a_offset], lda, &work[1]); 06764 i__2 = *m - *n + ii - 1; 06765 r__1 = -tau[i__]; 06766 sscal_(&i__2, &r__1, &a_ref(1, ii), &c__1); 06767 a_ref(*m - *n + ii, ii) = 1.f - tau[i__]; 06768 06769 /* Set A(m-k+i+1:m,n-k+i) to zero */ 06770 06771 i__2 = *m; 06772 for (l = *m - *n + ii + 1; l <= i__2; ++l) { 06773 a_ref(l, ii) = 0.f; 06774 /* L30: */ 06775 } 06776 /* L40: */ 06777 } 06778 return 0; 06779 06780 /* End of SORG2L */ 06781 06782 } /* sorg2l_ */
int sorg2r_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 6790 of file lapackblas.cpp.
References a_ref, c__1, f2cmax, integer, slarf_(), sscal_(), and xerbla_().
Referenced by sorgqr_().
06792 { 06793 /* -- LAPACK routine (version 3.0) -- 06794 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 06795 Courant Institute, Argonne National Lab, and Rice University 06796 February 29, 1992 06797 06798 06799 Purpose 06800 ======= 06801 06802 SORG2R generates an m by n real matrix Q with orthonormal columns, 06803 which is defined as the first n columns of a product of k elementary 06804 reflectors of order m 06805 06806 Q = H(1) H(2) . . . H(k) 06807 06808 as returned by SGEQRF. 06809 06810 Arguments 06811 ========= 06812 06813 M (input) INTEGER 06814 The number of rows of the matrix Q. M >= 0. 06815 06816 N (input) INTEGER 06817 The number of columns of the matrix Q. M >= N >= 0. 06818 06819 K (input) INTEGER 06820 The number of elementary reflectors whose product defines the 06821 matrix Q. N >= K >= 0. 06822 06823 A (input/output) REAL array, dimension (LDA,N) 06824 On entry, the i-th column must contain the vector which 06825 defines the elementary reflector H(i), for i = 1,2,...,k, as 06826 returned by SGEQRF in the first k columns of its array 06827 argument A. 06828 On exit, the m-by-n matrix Q. 06829 06830 LDA (input) INTEGER 06831 The first dimension of the array A. LDA >= f2cmax(1,M). 06832 06833 TAU (input) REAL array, dimension (K) 06834 TAU(i) must contain the scalar factor of the elementary 06835 reflector H(i), as returned by SGEQRF. 06836 06837 WORK (workspace) REAL array, dimension (N) 06838 06839 INFO (output) INTEGER 06840 = 0: successful exit 06841 < 0: if INFO = -i, the i-th argument has an illegal value 06842 06843 ===================================================================== 06844 06845 06846 Test the input arguments 06847 06848 Parameter adjustments */ 06849 /* Table of constant values */ 06850 static integer c__1 = 1; 06851 06852 /* System generated locals */ 06853 integer a_dim1, a_offset, i__1, i__2; 06854 real r__1; 06855 /* Local variables */ 06856 static integer i__, j, l; 06857 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 06858 slarf_(const char *, integer *, integer *, real *, integer *, real *, 06859 real *, integer *, real *), xerbla_(const char *, integer *); 06860 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 06861 06862 06863 a_dim1 = *lda; 06864 a_offset = 1 + a_dim1 * 1; 06865 a -= a_offset; 06866 --tau; 06867 --work; 06868 06869 /* Function Body */ 06870 *info = 0; 06871 if (*m < 0) { 06872 *info = -1; 06873 } else if (*n < 0 || *n > *m) { 06874 *info = -2; 06875 } else if (*k < 0 || *k > *n) { 06876 *info = -3; 06877 } else if (*lda < f2cmax(1,*m)) { 06878 *info = -5; 06879 } 06880 if (*info != 0) { 06881 i__1 = -(*info); 06882 xerbla_("SORG2R", &i__1); 06883 return 0; 06884 } 06885 06886 /* Quick return if possible */ 06887 06888 if (*n <= 0) { 06889 return 0; 06890 } 06891 06892 /* Initialise columns k+1:n to columns of the unit matrix */ 06893 06894 i__1 = *n; 06895 for (j = *k + 1; j <= i__1; ++j) { 06896 i__2 = *m; 06897 for (l = 1; l <= i__2; ++l) { 06898 a_ref(l, j) = 0.f; 06899 /* L10: */ 06900 } 06901 a_ref(j, j) = 1.f; 06902 /* L20: */ 06903 } 06904 06905 for (i__ = *k; i__ >= 1; --i__) { 06906 06907 /* Apply H(i) to A(i:m,i:n) from the left */ 06908 06909 if (i__ < *n) { 06910 a_ref(i__, i__) = 1.f; 06911 i__1 = *m - i__ + 1; 06912 i__2 = *n - i__; 06913 slarf_("Left", &i__1, &i__2, &a_ref(i__, i__), &c__1, &tau[i__], & 06914 a_ref(i__, i__ + 1), lda, &work[1]); 06915 } 06916 if (i__ < *m) { 06917 i__1 = *m - i__; 06918 r__1 = -tau[i__]; 06919 sscal_(&i__1, &r__1, &a_ref(i__ + 1, i__), &c__1); 06920 } 06921 a_ref(i__, i__) = 1.f - tau[i__]; 06922 06923 /* Set A(1:i-1,i) to zero */ 06924 06925 i__1 = i__ - 1; 06926 for (l = 1; l <= i__1; ++l) { 06927 a_ref(l, i__) = 0.f; 06928 /* L30: */ 06929 } 06930 /* L40: */ 06931 } 06932 return 0; 06933 06934 /* End of SORG2R */ 06935 06936 } /* sorg2r_ */
int sorgbr_ | ( | const char * | vect, | |
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 25035 of file lapackblas.cpp.
References a_ref, f2cmax, f2cmin, ilaenv_(), integer, lsame_(), sorglq_(), sorgqr_(), and xerbla_().
Referenced by sgesvd_().
25038 { 25039 /* -- LAPACK routine (version 3.0) -- 25040 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 25041 Courant Institute, Argonne National Lab, and Rice University 25042 June 30, 1999 25043 25044 25045 Purpose 25046 ======= 25047 25048 SORGBR generates one of the real orthogonal matrices Q or P**T 25049 determined by SGEBRD when reducing a real matrix A to bidiagonal 25050 form: A = Q * B * P**T. Q and P**T are defined as products of 25051 elementary reflectors H(i) or G(i) respectively. 25052 25053 If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q 25054 is of order M: 25055 if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n 25056 columns of Q, where m >= n >= k; 25057 if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an 25058 M-by-M matrix. 25059 25060 If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T 25061 is of order N: 25062 if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m 25063 rows of P**T, where n >= m >= k; 25064 if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as 25065 an N-by-N matrix. 25066 25067 Arguments 25068 ========= 25069 25070 VECT (input) CHARACTER*1 25071 Specifies whether the matrix Q or the matrix P**T is 25072 required, as defined in the transformation applied by SGEBRD: 25073 = 'Q': generate Q; 25074 = 'P': generate P**T. 25075 25076 M (input) INTEGER 25077 The number of rows of the matrix Q or P**T to be returned. 25078 M >= 0. 25079 25080 N (input) INTEGER 25081 The number of columns of the matrix Q or P**T to be returned. 25082 N >= 0. 25083 If VECT = 'Q', M >= N >= min(M,K); 25084 if VECT = 'P', N >= M >= min(N,K). 25085 25086 K (input) INTEGER 25087 If VECT = 'Q', the number of columns in the original M-by-K 25088 matrix reduced by SGEBRD. 25089 If VECT = 'P', the number of rows in the original K-by-N 25090 matrix reduced by SGEBRD. 25091 K >= 0. 25092 25093 A (input/output) REAL array, dimension (LDA,N) 25094 On entry, the vectors which define the elementary reflectors, 25095 as returned by SGEBRD. 25096 On exit, the M-by-N matrix Q or P**T. 25097 25098 LDA (input) INTEGER 25099 The leading dimension of the array A. LDA >= max(1,M). 25100 25101 TAU (input) REAL array, dimension 25102 (min(M,K)) if VECT = 'Q' 25103 (min(N,K)) if VECT = 'P' 25104 TAU(i) must contain the scalar factor of the elementary 25105 reflector H(i) or G(i), which determines Q or P**T, as 25106 returned by SGEBRD in its array argument TAUQ or TAUP. 25107 25108 WORK (workspace/output) REAL array, dimension (LWORK) 25109 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 25110 25111 LWORK (input) INTEGER 25112 The dimension of the array WORK. LWORK >= max(1,min(M,N)). 25113 For optimum performance LWORK >= min(M,N)*NB, where NB 25114 is the optimal blocksize. 25115 25116 If LWORK = -1, then a workspace query is assumed; the routine 25117 only calculates the optimal size of the WORK array, returns 25118 this value as the first entry of the WORK array, and no error 25119 message related to LWORK is issued by XERBLA. 25120 25121 INFO (output) INTEGER 25122 = 0: successful exit 25123 < 0: if INFO = -i, the i-th argument had an illegal value 25124 25125 ===================================================================== 25126 25127 25128 Test the input arguments 25129 25130 Parameter adjustments */ 25131 /* Table of constant values */ 25132 static integer c__1 = 1; 25133 static integer c_n1 = -1; 25134 25135 /* System generated locals */ 25136 integer a_dim1, a_offset, i__1, i__2, i__3; 25137 /* Local variables */ 25138 static integer i__, j; 25139 extern logical lsame_(const char *, const char *); 25140 static integer iinfo; 25141 static logical wantq; 25142 static integer nb, mn; 25143 extern /* Subroutine */ int xerbla_(const char *, integer *); 25144 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 25145 integer *, integer *, ftnlen, ftnlen); 25146 extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real 25147 *, integer *, real *, real *, integer *, integer *), sorgqr_( 25148 integer *, integer *, integer *, real *, integer *, real *, real * 25149 , integer *, integer *); 25150 static integer lwkopt; 25151 static logical lquery; 25152 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 25153 25154 25155 a_dim1 = *lda; 25156 a_offset = 1 + a_dim1 * 1; 25157 a -= a_offset; 25158 --tau; 25159 --work; 25160 25161 /* Function Body */ 25162 *info = 0; 25163 wantq = lsame_(vect, "Q"); 25164 mn = f2cmin(*m,*n); 25165 lquery = *lwork == -1; 25166 if (! wantq && ! lsame_(vect, "P")) { 25167 *info = -1; 25168 } else if (*m < 0) { 25169 *info = -2; 25170 } else if (*n < 0 || wantq && (*n > *m || *n < f2cmin(*m,*k)) || ! wantq && ( 25171 *m > *n || *m < f2cmin(*n,*k))) { 25172 *info = -3; 25173 } else if (*k < 0) { 25174 *info = -4; 25175 } else if (*lda < f2cmax(1,*m)) { 25176 *info = -6; 25177 } else if (*lwork < f2cmax(1,mn) && ! lquery) { 25178 *info = -9; 25179 } 25180 25181 if (*info == 0) { 25182 if (wantq) { 25183 nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( 25184 ftnlen)1); 25185 } else { 25186 nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( 25187 ftnlen)1); 25188 } 25189 lwkopt = f2cmax(1,mn) * nb; 25190 work[1] = (real) lwkopt; 25191 } 25192 25193 if (*info != 0) { 25194 i__1 = -(*info); 25195 xerbla_("SORGBR", &i__1); 25196 return 0; 25197 } else if (lquery) { 25198 return 0; 25199 } 25200 25201 /* Quick return if possible */ 25202 25203 if (*m == 0 || *n == 0) { 25204 work[1] = 1.f; 25205 return 0; 25206 } 25207 25208 if (wantq) { 25209 25210 /* Form Q, determined by a call to SGEBRD to reduce an m-by-k 25211 matrix */ 25212 25213 if (*m >= *k) { 25214 25215 /* If m >= k, assume m >= n >= k */ 25216 25217 sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & 25218 iinfo); 25219 25220 } else { 25221 25222 /* If m < k, assume m = n 25223 25224 Shift the vectors which define the elementary reflectors one 25225 column to the right, and set the first row and column of Q 25226 to those of the unit matrix */ 25227 25228 for (j = *m; j >= 2; --j) { 25229 a_ref(1, j) = 0.f; 25230 i__1 = *m; 25231 for (i__ = j + 1; i__ <= i__1; ++i__) { 25232 a_ref(i__, j) = a_ref(i__, j - 1); 25233 /* L10: */ 25234 } 25235 /* L20: */ 25236 } 25237 a_ref(1, 1) = 1.f; 25238 i__1 = *m; 25239 for (i__ = 2; i__ <= i__1; ++i__) { 25240 a_ref(i__, 1) = 0.f; 25241 /* L30: */ 25242 } 25243 if (*m > 1) { 25244 25245 /* Form Q(2:m,2:m) */ 25246 25247 i__1 = *m - 1; 25248 i__2 = *m - 1; 25249 i__3 = *m - 1; 25250 sorgqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], & 25251 work[1], lwork, &iinfo); 25252 } 25253 } 25254 } else { 25255 25256 /* Form P', determined by a call to SGEBRD to reduce a k-by-n 25257 matrix */ 25258 25259 if (*k < *n) { 25260 25261 /* If k < n, assume k <= m <= n */ 25262 25263 sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & 25264 iinfo); 25265 25266 } else { 25267 25268 /* If k >= n, assume m = n 25269 25270 Shift the vectors which define the elementary reflectors one 25271 row downward, and set the first row and column of P' to 25272 those of the unit matrix */ 25273 25274 a_ref(1, 1) = 1.f; 25275 i__1 = *n; 25276 for (i__ = 2; i__ <= i__1; ++i__) { 25277 a_ref(i__, 1) = 0.f; 25278 /* L40: */ 25279 } 25280 i__1 = *n; 25281 for (j = 2; j <= i__1; ++j) { 25282 for (i__ = j - 1; i__ >= 2; --i__) { 25283 a_ref(i__, j) = a_ref(i__ - 1, j); 25284 /* L50: */ 25285 } 25286 a_ref(1, j) = 0.f; 25287 /* L60: */ 25288 } 25289 if (*n > 1) { 25290 25291 /* Form P'(2:n,2:n) */ 25292 25293 i__1 = *n - 1; 25294 i__2 = *n - 1; 25295 i__3 = *n - 1; 25296 sorglq_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], & 25297 work[1], lwork, &iinfo); 25298 } 25299 } 25300 } 25301 work[1] = (real) lwkopt; 25302 return 0; 25303 25304 /* End of SORGBR */ 25305 25306 } /* sorgbr_ */
int sorgl2_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 20442 of file lapackblas.cpp.
References a_ref, f2cmax, integer, slarf_(), sscal_(), and xerbla_().
Referenced by sorglq_().
20445 { 20446 /* -- LAPACK routine (version 3.0) -- 20447 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 20448 Courant Institute, Argonne National Lab, and Rice University 20449 June 30, 1999 20450 20451 20452 Purpose 20453 ======= 20454 20455 SORGL2 generates an m by n real matrix Q with orthonormal rows, 20456 which is defined as the first m rows of a product of k elementary 20457 reflectors of order n 20458 20459 Q = H(k) . . . H(2) H(1) 20460 20461 as returned by SGELQF. 20462 20463 Arguments 20464 ========= 20465 20466 M (input) INTEGER 20467 The number of rows of the matrix Q. M >= 0. 20468 20469 N (input) INTEGER 20470 The number of columns of the matrix Q. N >= M. 20471 20472 K (input) INTEGER 20473 The number of elementary reflectors whose product defines the 20474 matrix Q. M >= K >= 0. 20475 20476 A (input/output) REAL array, dimension (LDA,N) 20477 On entry, the i-th row must contain the vector which defines 20478 the elementary reflector H(i), for i = 1,2,...,k, as returned 20479 by SGELQF in the first k rows of its array argument A. 20480 On exit, the m-by-n matrix Q. 20481 20482 LDA (input) INTEGER 20483 The first dimension of the array A. LDA >= max(1,M). 20484 20485 TAU (input) REAL array, dimension (K) 20486 TAU(i) must contain the scalar factor of the elementary 20487 reflector H(i), as returned by SGELQF. 20488 20489 WORK (workspace) REAL array, dimension (M) 20490 20491 INFO (output) INTEGER 20492 = 0: successful exit 20493 < 0: if INFO = -i, the i-th argument has an illegal value 20494 20495 ===================================================================== 20496 20497 20498 Test the input arguments 20499 20500 Parameter adjustments */ 20501 /* System generated locals */ 20502 integer a_dim1, a_offset, i__1, i__2; 20503 real r__1; 20504 /* Local variables */ 20505 static integer i__, j, l; 20506 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 20507 slarf_(const char *, integer *, integer *, real *, integer *, real *, 20508 real *, integer *, real *), xerbla_(const char *, integer *); 20509 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 20510 20511 a_dim1 = *lda; 20512 a_offset = 1 + a_dim1 * 1; 20513 a -= a_offset; 20514 --tau; 20515 --work; 20516 20517 /* Function Body */ 20518 *info = 0; 20519 if (*m < 0) { 20520 *info = -1; 20521 } else if (*n < *m) { 20522 *info = -2; 20523 } else if (*k < 0 || *k > *m) { 20524 *info = -3; 20525 } else if (*lda < f2cmax(1,*m)) { 20526 *info = -5; 20527 } 20528 if (*info != 0) { 20529 i__1 = -(*info); 20530 xerbla_("SORGL2", &i__1); 20531 return 0; 20532 } 20533 20534 /* Quick return if possible */ 20535 20536 if (*m <= 0) { 20537 return 0; 20538 } 20539 20540 if (*k < *m) { 20541 20542 /* Initialise rows k+1:m to rows of the unit matrix */ 20543 20544 i__1 = *n; 20545 for (j = 1; j <= i__1; ++j) { 20546 i__2 = *m; 20547 for (l = *k + 1; l <= i__2; ++l) { 20548 a_ref(l, j) = 0.f; 20549 /* L10: */ 20550 } 20551 if (j > *k && j <= *m) { 20552 a_ref(j, j) = 1.f; 20553 } 20554 /* L20: */ 20555 } 20556 } 20557 20558 for (i__ = *k; i__ >= 1; --i__) { 20559 20560 /* Apply H(i) to A(i:m,i:n) from the right */ 20561 20562 if (i__ < *n) { 20563 if (i__ < *m) { 20564 a_ref(i__, i__) = 1.f; 20565 i__1 = *m - i__; 20566 i__2 = *n - i__ + 1; 20567 slarf_("Right", &i__1, &i__2, &a_ref(i__, i__), lda, &tau[i__] 20568 , &a_ref(i__ + 1, i__), lda, &work[1]); 20569 } 20570 i__1 = *n - i__; 20571 r__1 = -tau[i__]; 20572 sscal_(&i__1, &r__1, &a_ref(i__, i__ + 1), lda); 20573 } 20574 a_ref(i__, i__) = 1.f - tau[i__]; 20575 20576 /* Set A(i,1:i-1) to zero */ 20577 20578 i__1 = i__ - 1; 20579 for (l = 1; l <= i__1; ++l) { 20580 a_ref(i__, l) = 0.f; 20581 /* L30: */ 20582 } 20583 /* L40: */ 20584 } 20585 return 0; 20586 20587 /* End of SORGL2 */ 20588 20589 } /* sorgl2_ */
int sorglq_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 20595 of file lapackblas.cpp.
References a_ref, c__3, f2cmax, f2cmin, ilaenv_(), integer, nx, slarfb_(), slarft_(), sorgl2_(), and xerbla_().
Referenced by sgesvd_(), and sorgbr_().
20597 { 20598 /* -- LAPACK routine (version 3.0) -- 20599 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 20600 Courant Institute, Argonne National Lab, and Rice University 20601 June 30, 1999 20602 20603 20604 Purpose 20605 ======= 20606 20607 SORGLQ generates an M-by-N real matrix Q with orthonormal rows, 20608 which is defined as the first M rows of a product of K elementary 20609 reflectors of order N 20610 20611 Q = H(k) . . . H(2) H(1) 20612 20613 as returned by SGELQF. 20614 20615 Arguments 20616 ========= 20617 20618 M (input) INTEGER 20619 The number of rows of the matrix Q. M >= 0. 20620 20621 N (input) INTEGER 20622 The number of columns of the matrix Q. N >= M. 20623 20624 K (input) INTEGER 20625 The number of elementary reflectors whose product defines the 20626 matrix Q. M >= K >= 0. 20627 20628 A (input/output) REAL array, dimension (LDA,N) 20629 On entry, the i-th row must contain the vector which defines 20630 the elementary reflector H(i), for i = 1,2,...,k, as returned 20631 by SGELQF in the first k rows of its array argument A. 20632 On exit, the M-by-N matrix Q. 20633 20634 LDA (input) INTEGER 20635 The first dimension of the array A. LDA >= max(1,M). 20636 20637 TAU (input) REAL array, dimension (K) 20638 TAU(i) must contain the scalar factor of the elementary 20639 reflector H(i), as returned by SGELQF. 20640 20641 WORK (workspace/output) REAL array, dimension (LWORK) 20642 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 20643 20644 LWORK (input) INTEGER 20645 The dimension of the array WORK. LWORK >= max(1,M). 20646 For optimum performance LWORK >= M*NB, where NB is 20647 the optimal blocksize. 20648 20649 If LWORK = -1, then a workspace query is assumed; the routine 20650 only calculates the optimal size of the WORK array, returns 20651 this value as the first entry of the WORK array, and no error 20652 message related to LWORK is issued by XERBLA. 20653 20654 INFO (output) INTEGER 20655 = 0: successful exit 20656 < 0: if INFO = -i, the i-th argument has an illegal value 20657 20658 ===================================================================== 20659 20660 20661 Test the input arguments 20662 20663 Parameter adjustments */ 20664 /* Table of constant values */ 20665 static integer c__1 = 1; 20666 static integer c_n1 = -1; 20667 static integer c__3 = 3; 20668 static integer c__2 = 2; 20669 20670 /* System generated locals */ 20671 integer a_dim1, a_offset, i__1, i__2, i__3; 20672 /* Local variables */ 20673 static integer i__, j, l, nbmin, iinfo; 20674 extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real 20675 *, integer *, real *, real *, integer *); 20676 static integer ib, nb, ki, kk, nx; 20677 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 20678 integer *, integer *, integer *, real *, integer *, real *, 20679 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 20680 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 20681 integer *, integer *, ftnlen, ftnlen); 20682 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 20683 real *, integer *, real *, real *, integer *); 20684 static integer ldwork, lwkopt; 20685 static logical lquery; 20686 static integer iws; 20687 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 20688 20689 20690 a_dim1 = *lda; 20691 a_offset = 1 + a_dim1 * 1; 20692 a -= a_offset; 20693 --tau; 20694 --work; 20695 20696 /* Function Body */ 20697 *info = 0; 20698 nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); 20699 lwkopt = f2cmax(1,*m) * nb; 20700 work[1] = (real) lwkopt; 20701 lquery = *lwork == -1; 20702 if (*m < 0) { 20703 *info = -1; 20704 } else if (*n < *m) { 20705 *info = -2; 20706 } else if (*k < 0 || *k > *m) { 20707 *info = -3; 20708 } else if (*lda < f2cmax(1,*m)) { 20709 *info = -5; 20710 } else if (*lwork < f2cmax(1,*m) && ! lquery) { 20711 *info = -8; 20712 } 20713 if (*info != 0) { 20714 i__1 = -(*info); 20715 xerbla_("SORGLQ", &i__1); 20716 return 0; 20717 } else if (lquery) { 20718 return 0; 20719 } 20720 20721 /* Quick return if possible */ 20722 20723 if (*m <= 0) { 20724 work[1] = 1.f; 20725 return 0; 20726 } 20727 20728 nbmin = 2; 20729 nx = 0; 20730 iws = *m; 20731 if (nb > 1 && nb < *k) { 20732 20733 /* Determine when to cross over from blocked to unblocked code. 20734 20735 Computing MAX */ 20736 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, ( 20737 ftnlen)6, (ftnlen)1); 20738 nx = f2cmax(i__1,i__2); 20739 if (nx < *k) { 20740 20741 /* Determine if workspace is large enough for blocked code. */ 20742 20743 ldwork = *m; 20744 iws = ldwork * nb; 20745 if (*lwork < iws) { 20746 20747 /* Not enough workspace to use optimal NB: reduce NB and 20748 determine the minimum value of NB. */ 20749 20750 nb = *lwork / ldwork; 20751 /* Computing MAX */ 20752 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1, 20753 (ftnlen)6, (ftnlen)1); 20754 nbmin = f2cmax(i__1,i__2); 20755 } 20756 } 20757 } 20758 20759 if (nb >= nbmin && nb < *k && nx < *k) { 20760 20761 /* Use blocked code after the last block. 20762 The first kk rows are handled by the block method. */ 20763 20764 ki = (*k - nx - 1) / nb * nb; 20765 /* Computing MIN */ 20766 i__1 = *k, i__2 = ki + nb; 20767 kk = f2cmin(i__1,i__2); 20768 20769 /* Set A(kk+1:m,1:kk) to zero. */ 20770 20771 i__1 = kk; 20772 for (j = 1; j <= i__1; ++j) { 20773 i__2 = *m; 20774 for (i__ = kk + 1; i__ <= i__2; ++i__) { 20775 a_ref(i__, j) = 0.f; 20776 /* L10: */ 20777 } 20778 /* L20: */ 20779 } 20780 } else { 20781 kk = 0; 20782 } 20783 20784 /* Use unblocked code for the last or only block. */ 20785 20786 if (kk < *m) { 20787 i__1 = *m - kk; 20788 i__2 = *n - kk; 20789 i__3 = *k - kk; 20790 sorgl2_(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1] 20791 , &work[1], &iinfo); 20792 } 20793 20794 if (kk > 0) { 20795 20796 /* Use blocked code */ 20797 20798 i__1 = -nb; 20799 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { 20800 /* Computing MIN */ 20801 i__2 = nb, i__3 = *k - i__ + 1; 20802 ib = f2cmin(i__2,i__3); 20803 if (i__ + ib <= *m) { 20804 20805 /* Form the triangular factor of the block reflector 20806 H = H(i) H(i+1) . . . H(i+ib-1) */ 20807 20808 i__2 = *n - i__ + 1; 20809 slarft_("Forward", "Rowwise", &i__2, &ib, &a_ref(i__, i__), 20810 lda, &tau[i__], &work[1], &ldwork); 20811 20812 /* Apply H' to A(i+ib:m,i:n) from the right */ 20813 20814 i__2 = *m - i__ - ib + 1; 20815 i__3 = *n - i__ + 1; 20816 slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & 20817 i__3, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, & 20818 a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork); 20819 } 20820 20821 /* Apply H' to columns i:n of current block */ 20822 20823 i__2 = *n - i__ + 1; 20824 sorgl2_(&ib, &i__2, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[ 20825 1], &iinfo); 20826 20827 /* Set columns 1:i-1 of current block to zero */ 20828 20829 i__2 = i__ - 1; 20830 for (j = 1; j <= i__2; ++j) { 20831 i__3 = i__ + ib - 1; 20832 for (l = i__; l <= i__3; ++l) { 20833 a_ref(l, j) = 0.f; 20834 /* L30: */ 20835 } 20836 /* L40: */ 20837 } 20838 /* L50: */ 20839 } 20840 } 20841 20842 work[1] = (real) iws; 20843 return 0; 20844 20845 /* End of SORGLQ */ 20846 20847 } /* sorglq_ */
int sorgql_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 6944 of file lapackblas.cpp.
References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ilaenv_(), integer, nx, slarfb_(), slarft_(), sorg2l_(), and xerbla_().
Referenced by sorgtr_().
06946 { 06947 /* -- LAPACK routine (version 3.0) -- 06948 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 06949 Courant Institute, Argonne National Lab, and Rice University 06950 June 30, 1999 06951 06952 06953 Purpose 06954 ======= 06955 06956 SORGQL generates an M-by-N real matrix Q with orthonormal columns, 06957 which is defined as the last N columns of a product of K elementary 06958 reflectors of order M 06959 06960 Q = H(k) . . . H(2) H(1) 06961 06962 as returned by SGEQLF. 06963 06964 Arguments 06965 ========= 06966 06967 M (input) INTEGER 06968 The number of rows of the matrix Q. M >= 0. 06969 06970 N (input) INTEGER 06971 The number of columns of the matrix Q. M >= N >= 0. 06972 06973 K (input) INTEGER 06974 The number of elementary reflectors whose product defines the 06975 matrix Q. N >= K >= 0. 06976 06977 A (input/output) REAL array, dimension (LDA,N) 06978 On entry, the (n-k+i)-th column must contain the vector which 06979 defines the elementary reflector H(i), for i = 1,2,...,k, as 06980 returned by SGEQLF in the last k columns of its array 06981 argument A. 06982 On exit, the M-by-N matrix Q. 06983 06984 LDA (input) INTEGER 06985 The first dimension of the array A. LDA >= f2cmax(1,M). 06986 06987 TAU (input) REAL array, dimension (K) 06988 TAU(i) must contain the scalar factor of the elementary 06989 reflector H(i), as returned by SGEQLF. 06990 06991 WORK (workspace/output) REAL array, dimension (LWORK) 06992 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 06993 06994 LWORK (input) INTEGER 06995 The dimension of the array WORK. LWORK >= f2cmax(1,N). 06996 For optimum performance LWORK >= N*NB, where NB is the 06997 optimal blocksize. 06998 06999 If LWORK = -1, then a workspace query is assumed; the routine 07000 only calculates the optimal size of the WORK array, returns 07001 this value as the first entry of the WORK array, and no error 07002 message related to LWORK is issued by XERBLA. 07003 07004 INFO (output) INTEGER 07005 = 0: successful exit 07006 < 0: if INFO = -i, the i-th argument has an illegal value 07007 07008 ===================================================================== 07009 07010 07011 Test the input arguments 07012 07013 Parameter adjustments */ 07014 /* Table of constant values */ 07015 static integer c__1 = 1; 07016 static integer c_n1 = -1; 07017 static integer c__3 = 3; 07018 static integer c__2 = 2; 07019 07020 /* System generated locals */ 07021 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 07022 /* Local variables */ 07023 static integer i__, j, l, nbmin, iinfo; 07024 extern /* Subroutine */ int sorg2l_(integer *, integer *, integer *, real 07025 *, integer *, real *, real *, integer *); 07026 static integer ib, nb, kk, nx; 07027 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 07028 integer *, integer *, integer *, real *, integer *, real *, 07029 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 07030 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 07031 integer *, integer *, ftnlen, ftnlen); 07032 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 07033 real *, integer *, real *, real *, integer *); 07034 static integer ldwork, lwkopt; 07035 static logical lquery; 07036 static integer iws; 07037 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 07038 07039 07040 a_dim1 = *lda; 07041 a_offset = 1 + a_dim1 * 1; 07042 a -= a_offset; 07043 --tau; 07044 --work; 07045 07046 /* Function Body */ 07047 *info = 0; 07048 nb = ilaenv_(&c__1, "SORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); 07049 lwkopt = f2cmax(1,*n) * nb; 07050 work[1] = (real) lwkopt; 07051 lquery = *lwork == -1; 07052 if (*m < 0) { 07053 *info = -1; 07054 } else if (*n < 0 || *n > *m) { 07055 *info = -2; 07056 } else if (*k < 0 || *k > *n) { 07057 *info = -3; 07058 } else if (*lda < f2cmax(1,*m)) { 07059 *info = -5; 07060 } else if (*lwork < f2cmax(1,*n) && ! lquery) { 07061 *info = -8; 07062 } 07063 if (*info != 0) { 07064 i__1 = -(*info); 07065 xerbla_("SORGQL", &i__1); 07066 return 0; 07067 } else if (lquery) { 07068 return 0; 07069 } 07070 07071 /* Quick return if possible */ 07072 07073 if (*n <= 0) { 07074 work[1] = 1.f; 07075 return 0; 07076 } 07077 07078 nbmin = 2; 07079 nx = 0; 07080 iws = *n; 07081 if (nb > 1 && nb < *k) { 07082 07083 /* Determine when to cross over from blocked to unblocked code. 07084 07085 Computing MAX */ 07086 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQL", " ", m, n, k, &c_n1, ( 07087 ftnlen)6, (ftnlen)1); 07088 nx = f2cmax(i__1,i__2); 07089 if (nx < *k) { 07090 07091 /* Determine if workspace is large enough for blocked code. */ 07092 07093 ldwork = *n; 07094 iws = ldwork * nb; 07095 if (*lwork < iws) { 07096 07097 /* Not enough workspace to use optimal NB: reduce NB and 07098 determine the minimum value of NB. */ 07099 07100 nb = *lwork / ldwork; 07101 /* Computing MAX */ 07102 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQL", " ", m, n, k, &c_n1, 07103 (ftnlen)6, (ftnlen)1); 07104 nbmin = f2cmax(i__1,i__2); 07105 } 07106 } 07107 } 07108 07109 if (nb >= nbmin && nb < *k && nx < *k) { 07110 07111 /* Use blocked code after the first block. 07112 The last kk columns are handled by the block method. 07113 07114 Computing MIN */ 07115 i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; 07116 kk = f2cmin(i__1,i__2); 07117 07118 /* Set A(m-kk+1:m,1:n-kk) to zero. */ 07119 07120 i__1 = *n - kk; 07121 for (j = 1; j <= i__1; ++j) { 07122 i__2 = *m; 07123 for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { 07124 a_ref(i__, j) = 0.f; 07125 /* L10: */ 07126 } 07127 /* L20: */ 07128 } 07129 } else { 07130 kk = 0; 07131 } 07132 07133 /* Use unblocked code for the first or only block. */ 07134 07135 i__1 = *m - kk; 07136 i__2 = *n - kk; 07137 i__3 = *k - kk; 07138 sorg2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) 07139 ; 07140 07141 if (kk > 0) { 07142 07143 /* Use blocked code */ 07144 07145 i__1 = *k; 07146 i__2 = nb; 07147 for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 07148 i__2) { 07149 /* Computing MIN */ 07150 i__3 = nb, i__4 = *k - i__ + 1; 07151 ib = f2cmin(i__3,i__4); 07152 if (*n - *k + i__ > 1) { 07153 07154 /* Form the triangular factor of the block reflector 07155 H = H(i+ib-1) . . . H(i+1) H(i) */ 07156 07157 i__3 = *m - *k + i__ + ib - 1; 07158 slarft_("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - * 07159 k + i__), lda, &tau[i__], &work[1], &ldwork); 07160 07161 /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ 07162 07163 i__3 = *m - *k + i__ + ib - 1; 07164 i__4 = *n - *k + i__ - 1; 07165 slarfb_("Left", "No transpose", "Backward", "Columnwise", & 07166 i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, & 07167 work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & 07168 ldwork); 07169 } 07170 07171 /* Apply H to rows 1:m-k+i+ib-1 of current block */ 07172 07173 i__3 = *m - *k + i__ + ib - 1; 07174 sorg2l_(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__], 07175 &work[1], &iinfo); 07176 07177 /* Set rows m-k+i+ib:m of current block to zero */ 07178 07179 i__3 = *n - *k + i__ + ib - 1; 07180 for (j = *n - *k + i__; j <= i__3; ++j) { 07181 i__4 = *m; 07182 for (l = *m - *k + i__ + ib; l <= i__4; ++l) { 07183 a_ref(l, j) = 0.f; 07184 /* L30: */ 07185 } 07186 /* L40: */ 07187 } 07188 /* L50: */ 07189 } 07190 } 07191 07192 work[1] = (real) iws; 07193 return 0; 07194 07195 /* End of SORGQL */ 07196 07197 } /* sorgql_ */
int sorgqr_ | ( | integer * | m, | |
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 7205 of file lapackblas.cpp.
References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ilaenv_(), integer, nx, slarfb_(), slarft_(), sorg2r_(), and xerbla_().
Referenced by sgesvd_(), sorgbr_(), and sorgtr_().
07207 { 07208 /* -- LAPACK routine (version 3.0) -- 07209 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 07210 Courant Institute, Argonne National Lab, and Rice University 07211 June 30, 1999 07212 07213 07214 Purpose 07215 ======= 07216 07217 SORGQR generates an M-by-N real matrix Q with orthonormal columns, 07218 which is defined as the first N columns of a product of K elementary 07219 reflectors of order M 07220 07221 Q = H(1) H(2) . . . H(k) 07222 07223 as returned by SGEQRF. 07224 07225 Arguments 07226 ========= 07227 07228 M (input) INTEGER 07229 The number of rows of the matrix Q. M >= 0. 07230 07231 N (input) INTEGER 07232 The number of columns of the matrix Q. M >= N >= 0. 07233 07234 K (input) INTEGER 07235 The number of elementary reflectors whose product defines the 07236 matrix Q. N >= K >= 0. 07237 07238 A (input/output) REAL array, dimension (LDA,N) 07239 On entry, the i-th column must contain the vector which 07240 defines the elementary reflector H(i), for i = 1,2,...,k, as 07241 returned by SGEQRF in the first k columns of its array 07242 argument A. 07243 On exit, the M-by-N matrix Q. 07244 07245 LDA (input) INTEGER 07246 The first dimension of the array A. LDA >= f2cmax(1,M). 07247 07248 TAU (input) REAL array, dimension (K) 07249 TAU(i) must contain the scalar factor of the elementary 07250 reflector H(i), as returned by SGEQRF. 07251 07252 WORK (workspace/output) REAL array, dimension (LWORK) 07253 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 07254 07255 LWORK (input) INTEGER 07256 The dimension of the array WORK. LWORK >= f2cmax(1,N). 07257 For optimum performance LWORK >= N*NB, where NB is the 07258 optimal blocksize. 07259 07260 If LWORK = -1, then a workspace query is assumed; the routine 07261 only calculates the optimal size of the WORK array, returns 07262 this value as the first entry of the WORK array, and no error 07263 message related to LWORK is issued by XERBLA. 07264 07265 INFO (output) INTEGER 07266 = 0: successful exit 07267 < 0: if INFO = -i, the i-th argument has an illegal value 07268 07269 ===================================================================== 07270 07271 07272 Test the input arguments 07273 07274 Parameter adjustments */ 07275 /* Table of constant values */ 07276 static integer c__1 = 1; 07277 static integer c_n1 = -1; 07278 static integer c__3 = 3; 07279 static integer c__2 = 2; 07280 07281 /* System generated locals */ 07282 integer a_dim1, a_offset, i__1, i__2, i__3; 07283 /* Local variables */ 07284 static integer i__, j, l, nbmin, iinfo, ib; 07285 extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real 07286 *, integer *, real *, real *, integer *); 07287 static integer nb, ki, kk, nx; 07288 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 07289 integer *, integer *, integer *, real *, integer *, real *, 07290 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 07291 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 07292 integer *, integer *, ftnlen, ftnlen); 07293 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 07294 real *, integer *, real *, real *, integer *); 07295 static integer ldwork, lwkopt; 07296 static logical lquery; 07297 static integer iws; 07298 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 07299 07300 07301 a_dim1 = *lda; 07302 a_offset = 1 + a_dim1 * 1; 07303 a -= a_offset; 07304 --tau; 07305 --work; 07306 07307 /* Function Body */ 07308 *info = 0; 07309 nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); 07310 lwkopt = f2cmax(1,*n) * nb; 07311 work[1] = (real) lwkopt; 07312 lquery = *lwork == -1; 07313 if (*m < 0) { 07314 *info = -1; 07315 } else if (*n < 0 || *n > *m) { 07316 *info = -2; 07317 } else if (*k < 0 || *k > *n) { 07318 *info = -3; 07319 } else if (*lda < f2cmax(1,*m)) { 07320 *info = -5; 07321 } else if (*lwork < f2cmax(1,*n) && ! lquery) { 07322 *info = -8; 07323 } 07324 if (*info != 0) { 07325 i__1 = -(*info); 07326 xerbla_("SORGQR", &i__1); 07327 return 0; 07328 } else if (lquery) { 07329 return 0; 07330 } 07331 07332 /* Quick return if possible */ 07333 07334 if (*n <= 0) { 07335 work[1] = 1.f; 07336 return 0; 07337 } 07338 07339 nbmin = 2; 07340 nx = 0; 07341 iws = *n; 07342 if (nb > 1 && nb < *k) { 07343 07344 /* Determine when to cross over from blocked to unblocked code. 07345 07346 Computing MAX */ 07347 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, ( 07348 ftnlen)6, (ftnlen)1); 07349 nx = f2cmax(i__1,i__2); 07350 if (nx < *k) { 07351 07352 /* Determine if workspace is large enough for blocked code. */ 07353 07354 ldwork = *n; 07355 iws = ldwork * nb; 07356 if (*lwork < iws) { 07357 07358 /* Not enough workspace to use optimal NB: reduce NB and 07359 determine the minimum value of NB. */ 07360 07361 nb = *lwork / ldwork; 07362 /* Computing MAX */ 07363 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1, 07364 (ftnlen)6, (ftnlen)1); 07365 nbmin = f2cmax(i__1,i__2); 07366 } 07367 } 07368 } 07369 07370 if (nb >= nbmin && nb < *k && nx < *k) { 07371 07372 /* Use blocked code after the last block. 07373 The first kk columns are handled by the block method. */ 07374 07375 ki = (*k - nx - 1) / nb * nb; 07376 /* Computing MIN */ 07377 i__1 = *k, i__2 = ki + nb; 07378 kk = f2cmin(i__1,i__2); 07379 07380 /* Set A(1:kk,kk+1:n) to zero. */ 07381 07382 i__1 = *n; 07383 for (j = kk + 1; j <= i__1; ++j) { 07384 i__2 = kk; 07385 for (i__ = 1; i__ <= i__2; ++i__) { 07386 a_ref(i__, j) = 0.f; 07387 /* L10: */ 07388 } 07389 /* L20: */ 07390 } 07391 } else { 07392 kk = 0; 07393 } 07394 07395 /* Use unblocked code for the last or only block. */ 07396 07397 if (kk < *n) { 07398 i__1 = *m - kk; 07399 i__2 = *n - kk; 07400 i__3 = *k - kk; 07401 sorg2r_(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1] 07402 , &work[1], &iinfo); 07403 } 07404 07405 if (kk > 0) { 07406 07407 /* Use blocked code */ 07408 07409 i__1 = -nb; 07410 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { 07411 /* Computing MIN */ 07412 i__2 = nb, i__3 = *k - i__ + 1; 07413 ib = f2cmin(i__2,i__3); 07414 if (i__ + ib <= *n) { 07415 07416 /* Form the triangular factor of the block reflector 07417 H = H(i) H(i+1) . . . H(i+ib-1) */ 07418 07419 i__2 = *m - i__ + 1; 07420 slarft_("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__), 07421 lda, &tau[i__], &work[1], &ldwork); 07422 07423 /* Apply H to A(i:m,i+ib:n) from the left */ 07424 07425 i__2 = *m - i__ + 1; 07426 i__3 = *n - i__ - ib + 1; 07427 slarfb_("Left", "No transpose", "Forward", "Columnwise", & 07428 i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], & 07429 ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], & 07430 ldwork); 07431 } 07432 07433 /* Apply H to rows i:m of current block */ 07434 07435 i__2 = *m - i__ + 1; 07436 sorg2r_(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[ 07437 1], &iinfo); 07438 07439 /* Set rows 1:i-1 of current block to zero */ 07440 07441 i__2 = i__ + ib - 1; 07442 for (j = i__; j <= i__2; ++j) { 07443 i__3 = i__ - 1; 07444 for (l = 1; l <= i__3; ++l) { 07445 a_ref(l, j) = 0.f; 07446 /* L30: */ 07447 } 07448 /* L40: */ 07449 } 07450 /* L50: */ 07451 } 07452 } 07453 07454 work[1] = (real) iws; 07455 return 0; 07456 07457 /* End of SORGQR */ 07458 07459 } /* sorgqr_ */
int sorgtr_ | ( | char * | uplo, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 7467 of file lapackblas.cpp.
References a_ref, c__1, c_n1, f2cmax, ilaenv_(), integer, lsame_(), sorgql_(), sorgqr_(), and xerbla_().
Referenced by ssyev_().
07469 { 07470 /* -- LAPACK routine (version 3.0) -- 07471 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 07472 Courant Institute, Argonne National Lab, and Rice University 07473 June 30, 1999 07474 07475 07476 Purpose 07477 ======= 07478 07479 SORGTR generates a real orthogonal matrix Q which is defined as the 07480 product of n-1 elementary reflectors of order N, as returned by 07481 SSYTRD: 07482 07483 if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), 07484 07485 if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). 07486 07487 Arguments 07488 ========= 07489 07490 UPLO (input) CHARACTER*1 07491 = 'U': Upper triangle of A contains elementary reflectors 07492 from SSYTRD; 07493 = 'L': Lower triangle of A contains elementary reflectors 07494 from SSYTRD. 07495 07496 N (input) INTEGER 07497 The order of the matrix Q. N >= 0. 07498 07499 A (input/output) REAL array, dimension (LDA,N) 07500 On entry, the vectors which define the elementary reflectors, 07501 as returned by SSYTRD. 07502 On exit, the N-by-N orthogonal matrix Q. 07503 07504 LDA (input) INTEGER 07505 The leading dimension of the array A. LDA >= f2cmax(1,N). 07506 07507 TAU (input) REAL array, dimension (N-1) 07508 TAU(i) must contain the scalar factor of the elementary 07509 reflector H(i), as returned by SSYTRD. 07510 07511 WORK (workspace/output) REAL array, dimension (LWORK) 07512 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 07513 07514 LWORK (input) INTEGER 07515 The dimension of the array WORK. LWORK >= f2cmax(1,N-1). 07516 For optimum performance LWORK >= (N-1)*NB, where NB is 07517 the optimal blocksize. 07518 07519 If LWORK = -1, then a workspace query is assumed; the routine 07520 only calculates the optimal size of the WORK array, returns 07521 this value as the first entry of the WORK array, and no error 07522 message related to LWORK is issued by XERBLA. 07523 07524 INFO (output) INTEGER 07525 = 0: successful exit 07526 < 0: if INFO = -i, the i-th argument had an illegal value 07527 07528 ===================================================================== 07529 07530 07531 Test the input arguments 07532 07533 Parameter adjustments */ 07534 /* Table of constant values */ 07535 static integer c__1 = 1; 07536 static integer c_n1 = -1; 07537 07538 /* System generated locals */ 07539 integer a_dim1, a_offset, i__1, i__2, i__3; 07540 /* Local variables */ 07541 static integer i__, j; 07542 extern logical lsame_(const char *, const char *); 07543 static integer iinfo; 07544 static logical upper; 07545 static integer nb; 07546 extern /* Subroutine */ int xerbla_(const char *, integer *); 07547 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 07548 integer *, integer *, ftnlen, ftnlen); 07549 extern /* Subroutine */ int sorgql_(integer *, integer *, integer *, real 07550 *, integer *, real *, real *, integer *, integer *), sorgqr_( 07551 integer *, integer *, integer *, real *, integer *, real *, real * 07552 , integer *, integer *); 07553 static logical lquery; 07554 static integer lwkopt; 07555 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 07556 07557 07558 a_dim1 = *lda; 07559 a_offset = 1 + a_dim1 * 1; 07560 a -= a_offset; 07561 --tau; 07562 --work; 07563 07564 /* Function Body */ 07565 *info = 0; 07566 lquery = *lwork == -1; 07567 upper = lsame_(uplo, "U"); 07568 if (! upper && ! lsame_(uplo, "L")) { 07569 *info = -1; 07570 } else if (*n < 0) { 07571 *info = -2; 07572 } else if (*lda < f2cmax(1,*n)) { 07573 *info = -4; 07574 } else /* if(complicated condition) */ { 07575 /* Computing MAX */ 07576 i__1 = 1, i__2 = *n - 1; 07577 if (*lwork < f2cmax(i__1,i__2) && ! lquery) { 07578 *info = -7; 07579 } 07580 } 07581 07582 if (*info == 0) { 07583 if (upper) { 07584 i__1 = *n - 1; 07585 i__2 = *n - 1; 07586 i__3 = *n - 1; 07587 nb = ilaenv_(&c__1, "SORGQL", " ", &i__1, &i__2, &i__3, &c_n1, ( 07588 ftnlen)6, (ftnlen)1); 07589 } else { 07590 i__1 = *n - 1; 07591 i__2 = *n - 1; 07592 i__3 = *n - 1; 07593 nb = ilaenv_(&c__1, "SORGQR", " ", &i__1, &i__2, &i__3, &c_n1, ( 07594 ftnlen)6, (ftnlen)1); 07595 } 07596 /* Computing MAX */ 07597 i__1 = 1, i__2 = *n - 1; 07598 lwkopt = f2cmax(i__1,i__2) * nb; 07599 work[1] = (real) lwkopt; 07600 } 07601 07602 if (*info != 0) { 07603 i__1 = -(*info); 07604 xerbla_("SORGTR", &i__1); 07605 return 0; 07606 } else if (lquery) { 07607 return 0; 07608 } 07609 07610 /* Quick return if possible */ 07611 07612 if (*n == 0) { 07613 work[1] = 1.f; 07614 return 0; 07615 } 07616 07617 if (upper) { 07618 07619 /* Q was determined by a call to SSYTRD with UPLO = 'U' 07620 07621 Shift the vectors which define the elementary reflectors one 07622 column to the left, and set the last row and column of Q to 07623 those of the unit matrix */ 07624 07625 i__1 = *n - 1; 07626 for (j = 1; j <= i__1; ++j) { 07627 i__2 = j - 1; 07628 for (i__ = 1; i__ <= i__2; ++i__) { 07629 a_ref(i__, j) = a_ref(i__, j + 1); 07630 /* L10: */ 07631 } 07632 a_ref(*n, j) = 0.f; 07633 /* L20: */ 07634 } 07635 i__1 = *n - 1; 07636 for (i__ = 1; i__ <= i__1; ++i__) { 07637 a_ref(i__, *n) = 0.f; 07638 /* L30: */ 07639 } 07640 a_ref(*n, *n) = 1.f; 07641 07642 /* Generate Q(1:n-1,1:n-1) */ 07643 07644 i__1 = *n - 1; 07645 i__2 = *n - 1; 07646 i__3 = *n - 1; 07647 sorgql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], 07648 lwork, &iinfo); 07649 07650 } else { 07651 07652 /* Q was determined by a call to SSYTRD with UPLO = 'L'. 07653 07654 Shift the vectors which define the elementary reflectors one 07655 column to the right, and set the first row and column of Q to 07656 those of the unit matrix */ 07657 07658 for (j = *n; j >= 2; --j) { 07659 a_ref(1, j) = 0.f; 07660 i__1 = *n; 07661 for (i__ = j + 1; i__ <= i__1; ++i__) { 07662 a_ref(i__, j) = a_ref(i__, j - 1); 07663 /* L40: */ 07664 } 07665 /* L50: */ 07666 } 07667 a_ref(1, 1) = 1.f; 07668 i__1 = *n; 07669 for (i__ = 2; i__ <= i__1; ++i__) { 07670 a_ref(i__, 1) = 0.f; 07671 /* L60: */ 07672 } 07673 if (*n > 1) { 07674 07675 /* Generate Q(2:n,2:n) */ 07676 07677 i__1 = *n - 1; 07678 i__2 = *n - 1; 07679 i__3 = *n - 1; 07680 sorgqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &work[1], 07681 lwork, &iinfo); 07682 } 07683 } 07684 work[1] = (real) lwkopt; 07685 return 0; 07686 07687 /* End of SORGTR */ 07688 07689 } /* sorgtr_ */
int sorm2r_ | ( | const char * | side, | |
const char * | trans, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 24823 of file lapackblas.cpp.
References a_ref, c___ref, f2cmax, integer, lsame_(), slarf_(), and xerbla_().
Referenced by sormqr_().
24826 { 24827 /* -- LAPACK routine (version 3.0) -- 24828 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 24829 Courant Institute, Argonne National Lab, and Rice University 24830 February 29, 1992 24831 24832 24833 Purpose 24834 ======= 24835 24836 SORM2R overwrites the general real m by n matrix C with 24837 24838 Q * C if SIDE = 'L' and TRANS = 'N', or 24839 24840 Q'* C if SIDE = 'L' and TRANS = 'T', or 24841 24842 C * Q if SIDE = 'R' and TRANS = 'N', or 24843 24844 C * Q' if SIDE = 'R' and TRANS = 'T', 24845 24846 where Q is a real orthogonal matrix defined as the product of k 24847 elementary reflectors 24848 24849 Q = H(1) H(2) . . . H(k) 24850 24851 as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n 24852 if SIDE = 'R'. 24853 24854 Arguments 24855 ========= 24856 24857 SIDE (input) CHARACTER*1 24858 = 'L': apply Q or Q' from the Left 24859 = 'R': apply Q or Q' from the Right 24860 24861 TRANS (input) CHARACTER*1 24862 = 'N': apply Q (No transpose) 24863 = 'T': apply Q' (Transpose) 24864 24865 M (input) INTEGER 24866 The number of rows of the matrix C. M >= 0. 24867 24868 N (input) INTEGER 24869 The number of columns of the matrix C. N >= 0. 24870 24871 K (input) INTEGER 24872 The number of elementary reflectors whose product defines 24873 the matrix Q. 24874 If SIDE = 'L', M >= K >= 0; 24875 if SIDE = 'R', N >= K >= 0. 24876 24877 A (input) REAL array, dimension (LDA,K) 24878 The i-th column must contain the vector which defines the 24879 elementary reflector H(i), for i = 1,2,...,k, as returned by 24880 SGEQRF in the first k columns of its array argument A. 24881 A is modified by the routine but restored on exit. 24882 24883 LDA (input) INTEGER 24884 The leading dimension of the array A. 24885 If SIDE = 'L', LDA >= max(1,M); 24886 if SIDE = 'R', LDA >= max(1,N). 24887 24888 TAU (input) REAL array, dimension (K) 24889 TAU(i) must contain the scalar factor of the elementary 24890 reflector H(i), as returned by SGEQRF. 24891 24892 C (input/output) REAL array, dimension (LDC,N) 24893 On entry, the m by n matrix C. 24894 On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. 24895 24896 LDC (input) INTEGER 24897 The leading dimension of the array C. LDC >= max(1,M). 24898 24899 WORK (workspace) REAL array, dimension 24900 (N) if SIDE = 'L', 24901 (M) if SIDE = 'R' 24902 24903 INFO (output) INTEGER 24904 = 0: successful exit 24905 < 0: if INFO = -i, the i-th argument had an illegal value 24906 24907 ===================================================================== 24908 24909 24910 Test the input arguments 24911 24912 Parameter adjustments */ 24913 /* Table of constant values */ 24914 static integer c__1 = 1; 24915 24916 /* System generated locals */ 24917 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; 24918 /* Local variables */ 24919 static logical left; 24920 static integer i__; 24921 extern logical lsame_(const char *, const char *); 24922 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 24923 integer *, real *, real *, integer *, real *); 24924 static integer i1, i2, i3, ic, jc, mi, ni, nq; 24925 extern /* Subroutine */ int xerbla_(const char *, integer *); 24926 static logical notran; 24927 static real aii; 24928 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 24929 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 24930 24931 24932 a_dim1 = *lda; 24933 a_offset = 1 + a_dim1 * 1; 24934 a -= a_offset; 24935 --tau; 24936 c_dim1 = *ldc; 24937 c_offset = 1 + c_dim1 * 1; 24938 c__ -= c_offset; 24939 --work; 24940 24941 /* Function Body */ 24942 *info = 0; 24943 left = lsame_(side, "L"); 24944 notran = lsame_(trans, "N"); 24945 24946 /* NQ is the order of Q */ 24947 24948 if (left) { 24949 nq = *m; 24950 } else { 24951 nq = *n; 24952 } 24953 if (! left && ! lsame_(side, "R")) { 24954 *info = -1; 24955 } else if (! notran && ! lsame_(trans, "T")) { 24956 *info = -2; 24957 } else if (*m < 0) { 24958 *info = -3; 24959 } else if (*n < 0) { 24960 *info = -4; 24961 } else if (*k < 0 || *k > nq) { 24962 *info = -5; 24963 } else if (*lda < f2cmax(1,nq)) { 24964 *info = -7; 24965 } else if (*ldc < f2cmax(1,*m)) { 24966 *info = -10; 24967 } 24968 if (*info != 0) { 24969 i__1 = -(*info); 24970 xerbla_("SORM2R", &i__1); 24971 return 0; 24972 } 24973 24974 /* Quick return if possible */ 24975 24976 if (*m == 0 || *n == 0 || *k == 0) { 24977 return 0; 24978 } 24979 24980 if (left && ! notran || ! left && notran) { 24981 i1 = 1; 24982 i2 = *k; 24983 i3 = 1; 24984 } else { 24985 i1 = *k; 24986 i2 = 1; 24987 i3 = -1; 24988 } 24989 24990 if (left) { 24991 ni = *n; 24992 jc = 1; 24993 } else { 24994 mi = *m; 24995 ic = 1; 24996 } 24997 24998 i__1 = i2; 24999 i__2 = i3; 25000 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 25001 if (left) { 25002 25003 /* H(i) is applied to C(i:m,1:n) */ 25004 25005 mi = *m - i__ + 1; 25006 ic = i__; 25007 } else { 25008 25009 /* H(i) is applied to C(1:m,i:n) */ 25010 25011 ni = *n - i__ + 1; 25012 jc = i__; 25013 } 25014 25015 /* Apply H(i) */ 25016 25017 aii = a_ref(i__, i__); 25018 a_ref(i__, i__) = 1.f; 25019 slarf_(side, &mi, &ni, &a_ref(i__, i__), &c__1, &tau[i__], &c___ref( 25020 ic, jc), ldc, &work[1]); 25021 a_ref(i__, i__) = aii; 25022 /* L10: */ 25023 } 25024 return 0; 25025 25026 /* End of SORM2R */ 25027 25028 } /* sorm2r_ */
int sormbr_ | ( | const char * | vect, | |
const char * | side, | |||
const char * | trans, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 21618 of file lapackblas.cpp.
References a_ref, c___ref, f2cmax, f2cmin, ilaenv_(), integer, lsame_(), s_cat(), sormlq_(), sormqr_(), and xerbla_().
Referenced by sgesvd_().
21621 { 21622 /* -- LAPACK routine (version 3.0) -- 21623 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 21624 Courant Institute, Argonne National Lab, and Rice University 21625 June 30, 1999 21626 21627 21628 Purpose 21629 ======= 21630 21631 If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C 21632 with 21633 SIDE = 'L' SIDE = 'R' 21634 TRANS = 'N': Q * C C * Q 21635 TRANS = 'T': Q**T * C C * Q**T 21636 21637 If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C 21638 with 21639 SIDE = 'L' SIDE = 'R' 21640 TRANS = 'N': P * C C * P 21641 TRANS = 'T': P**T * C C * P**T 21642 21643 Here Q and P**T are the orthogonal matrices determined by SGEBRD when 21644 reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and 21645 P**T are defined as products of elementary reflectors H(i) and G(i) 21646 respectively. 21647 21648 Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the 21649 order of the orthogonal matrix Q or P**T that is applied. 21650 21651 If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: 21652 if nq >= k, Q = H(1) H(2) . . . H(k); 21653 if nq < k, Q = H(1) H(2) . . . H(nq-1). 21654 21655 If VECT = 'P', A is assumed to have been a K-by-NQ matrix: 21656 if k < nq, P = G(1) G(2) . . . G(k); 21657 if k >= nq, P = G(1) G(2) . . . G(nq-1). 21658 21659 Arguments 21660 ========= 21661 21662 VECT (input) CHARACTER*1 21663 = 'Q': apply Q or Q**T; 21664 = 'P': apply P or P**T. 21665 21666 SIDE (input) CHARACTER*1 21667 = 'L': apply Q, Q**T, P or P**T from the Left; 21668 = 'R': apply Q, Q**T, P or P**T from the Right. 21669 21670 TRANS (input) CHARACTER*1 21671 = 'N': No transpose, apply Q or P; 21672 = 'T': Transpose, apply Q**T or P**T. 21673 21674 M (input) INTEGER 21675 The number of rows of the matrix C. M >= 0. 21676 21677 N (input) INTEGER 21678 The number of columns of the matrix C. N >= 0. 21679 21680 K (input) INTEGER 21681 If VECT = 'Q', the number of columns in the original 21682 matrix reduced by SGEBRD. 21683 If VECT = 'P', the number of rows in the original 21684 matrix reduced by SGEBRD. 21685 K >= 0. 21686 21687 A (input) REAL array, dimension 21688 (LDA,min(nq,K)) if VECT = 'Q' 21689 (LDA,nq) if VECT = 'P' 21690 The vectors which define the elementary reflectors H(i) and 21691 G(i), whose products determine the matrices Q and P, as 21692 returned by SGEBRD. 21693 21694 LDA (input) INTEGER 21695 The leading dimension of the array A. 21696 If VECT = 'Q', LDA >= max(1,nq); 21697 if VECT = 'P', LDA >= max(1,min(nq,K)). 21698 21699 TAU (input) REAL array, dimension (min(nq,K)) 21700 TAU(i) must contain the scalar factor of the elementary 21701 reflector H(i) or G(i) which determines Q or P, as returned 21702 by SGEBRD in the array argument TAUQ or TAUP. 21703 21704 C (input/output) REAL array, dimension (LDC,N) 21705 On entry, the M-by-N matrix C. 21706 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q 21707 or P*C or P**T*C or C*P or C*P**T. 21708 21709 LDC (input) INTEGER 21710 The leading dimension of the array C. LDC >= max(1,M). 21711 21712 WORK (workspace/output) REAL array, dimension (LWORK) 21713 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 21714 21715 LWORK (input) INTEGER 21716 The dimension of the array WORK. 21717 If SIDE = 'L', LWORK >= max(1,N); 21718 if SIDE = 'R', LWORK >= max(1,M). 21719 For optimum performance LWORK >= N*NB if SIDE = 'L', and 21720 LWORK >= M*NB if SIDE = 'R', where NB is the optimal 21721 blocksize. 21722 21723 If LWORK = -1, then a workspace query is assumed; the routine 21724 only calculates the optimal size of the WORK array, returns 21725 this value as the first entry of the WORK array, and no error 21726 message related to LWORK is issued by XERBLA. 21727 21728 INFO (output) INTEGER 21729 = 0: successful exit 21730 < 0: if INFO = -i, the i-th argument had an illegal value 21731 21732 ===================================================================== 21733 21734 21735 Test the input arguments 21736 21737 Parameter adjustments */ 21738 /* Table of constant values */ 21739 static integer c__1 = 1; 21740 static integer c_n1 = -1; 21741 static integer c__2 = 2; 21742 21743 typedef const char *address; 21744 21745 /* System generated locals */ 21746 address a__1[2]; 21747 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; 21748 char ch__1[2]; 21749 /* Builtin functions 21750 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen); 21751 /* Local variables */ 21752 static logical left; 21753 extern logical lsame_(const char *, const char *); 21754 static integer iinfo, i1, i2, nb, mi, ni, nq, nw; 21755 extern /* Subroutine */ int xerbla_(const char *, integer *); 21756 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 21757 integer *, integer *, ftnlen, ftnlen); 21758 static logical notran, applyq; 21759 static char transt[1]; 21760 extern /* Subroutine */ int sormlq_(const char *, const char *, integer *, integer *, 21761 integer *, real *, integer *, real *, real *, integer *, real *, 21762 integer *, integer *); 21763 static integer lwkopt; 21764 static logical lquery; 21765 extern /* Subroutine */ int sormqr_(const char *, const char *, integer *, integer *, 21766 integer *, real *, integer *, real *, real *, integer *, real *, 21767 integer *, integer *); 21768 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 21769 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 21770 21771 21772 a_dim1 = *lda; 21773 a_offset = 1 + a_dim1 * 1; 21774 a -= a_offset; 21775 --tau; 21776 c_dim1 = *ldc; 21777 c_offset = 1 + c_dim1 * 1; 21778 c__ -= c_offset; 21779 --work; 21780 21781 /* Function Body */ 21782 *info = 0; 21783 applyq = lsame_(vect, "Q"); 21784 left = lsame_(side, "L"); 21785 notran = lsame_(trans, "N"); 21786 lquery = *lwork == -1; 21787 21788 /* NQ is the order of Q or P and NW is the minimum dimension of WORK */ 21789 21790 if (left) { 21791 nq = *m; 21792 nw = *n; 21793 } else { 21794 nq = *n; 21795 nw = *m; 21796 } 21797 if (! applyq && ! lsame_(vect, "P")) { 21798 *info = -1; 21799 } else if (! left && ! lsame_(side, "R")) { 21800 *info = -2; 21801 } else if (! notran && ! lsame_(trans, "T")) { 21802 *info = -3; 21803 } else if (*m < 0) { 21804 *info = -4; 21805 } else if (*n < 0) { 21806 *info = -5; 21807 } else if (*k < 0) { 21808 *info = -6; 21809 } else /* if(complicated condition) */ { 21810 /* Computing MAX */ 21811 i__1 = 1, i__2 = f2cmin(nq,*k); 21812 if (applyq && *lda < f2cmax(1,nq) || ! applyq && *lda < f2cmax(i__1,i__2)) { 21813 *info = -8; 21814 } else if (*ldc < f2cmax(1,*m)) { 21815 *info = -11; 21816 } else if (*lwork < f2cmax(1,nw) && ! lquery) { 21817 *info = -13; 21818 } 21819 } 21820 21821 if (*info == 0) { 21822 if (applyq) { 21823 if (left) { 21824 /* Writing concatenation */ 21825 i__3[0] = 1, a__1[0] = side; 21826 i__3[1] = 1, a__1[1] = trans; 21827 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 21828 i__1 = *m - 1; 21829 i__2 = *m - 1; 21830 nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( 21831 ftnlen)6, (ftnlen)2); 21832 } else { 21833 /* Writing concatenation */ 21834 i__3[0] = 1, a__1[0] = side; 21835 i__3[1] = 1, a__1[1] = trans; 21836 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 21837 i__1 = *n - 1; 21838 i__2 = *n - 1; 21839 nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( 21840 ftnlen)6, (ftnlen)2); 21841 } 21842 } else { 21843 if (left) { 21844 /* Writing concatenation */ 21845 i__3[0] = 1, a__1[0] = side; 21846 i__3[1] = 1, a__1[1] = trans; 21847 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 21848 i__1 = *m - 1; 21849 i__2 = *m - 1; 21850 nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( 21851 ftnlen)6, (ftnlen)2); 21852 } else { 21853 /* Writing concatenation */ 21854 i__3[0] = 1, a__1[0] = side; 21855 i__3[1] = 1, a__1[1] = trans; 21856 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 21857 i__1 = *n - 1; 21858 i__2 = *n - 1; 21859 nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( 21860 ftnlen)6, (ftnlen)2); 21861 } 21862 } 21863 lwkopt = f2cmax(1,nw) * nb; 21864 work[1] = (real) lwkopt; 21865 } 21866 21867 if (*info != 0) { 21868 i__1 = -(*info); 21869 xerbla_("SORMBR", &i__1); 21870 return 0; 21871 } else if (lquery) { 21872 return 0; 21873 } 21874 21875 /* Quick return if possible */ 21876 21877 work[1] = 1.f; 21878 if (*m == 0 || *n == 0) { 21879 return 0; 21880 } 21881 21882 if (applyq) { 21883 21884 /* Apply Q */ 21885 21886 if (nq >= *k) { 21887 21888 /* Q was determined by a call to SGEBRD with nq >= k */ 21889 21890 sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ 21891 c_offset], ldc, &work[1], lwork, &iinfo); 21892 } else if (nq > 1) { 21893 21894 /* Q was determined by a call to SGEBRD with nq < k */ 21895 21896 if (left) { 21897 mi = *m - 1; 21898 ni = *n; 21899 i1 = 2; 21900 i2 = 1; 21901 } else { 21902 mi = *m; 21903 ni = *n - 1; 21904 i1 = 1; 21905 i2 = 2; 21906 } 21907 i__1 = nq - 1; 21908 sormqr_(side, trans, &mi, &ni, &i__1, &a_ref(2, 1), lda, &tau[1], 21909 &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo); 21910 } 21911 } else { 21912 21913 /* Apply P */ 21914 21915 if (notran) { 21916 *(unsigned char *)transt = 'T'; 21917 } else { 21918 *(unsigned char *)transt = 'N'; 21919 } 21920 if (nq > *k) { 21921 21922 /* P was determined by a call to SGEBRD with nq > k */ 21923 21924 sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ 21925 c_offset], ldc, &work[1], lwork, &iinfo); 21926 } else if (nq > 1) { 21927 21928 /* P was determined by a call to SGEBRD with nq <= k */ 21929 21930 if (left) { 21931 mi = *m - 1; 21932 ni = *n; 21933 i1 = 2; 21934 i2 = 1; 21935 } else { 21936 mi = *m; 21937 ni = *n - 1; 21938 i1 = 1; 21939 i2 = 2; 21940 } 21941 i__1 = nq - 1; 21942 sormlq_(side, transt, &mi, &ni, &i__1, &a_ref(1, 2), lda, &tau[1], 21943 &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo); 21944 } 21945 } 21946 work[1] = (real) lwkopt; 21947 return 0; 21948 21949 /* End of SORMBR */ 21950 21951 } /* sormbr_ */
int sorml2_ | ( | const char * | side, | |
const char * | trans, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 24069 of file lapackblas.cpp.
References a_ref, c___ref, f2cmax, integer, lsame_(), slarf_(), and xerbla_().
Referenced by sormlq_().
24072 { 24073 /* -- LAPACK routine (version 3.0) -- 24074 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 24075 Courant Institute, Argonne National Lab, and Rice University 24076 February 29, 1992 24077 24078 24079 Purpose 24080 ======= 24081 24082 SORML2 overwrites the general real m by n matrix C with 24083 24084 Q * C if SIDE = 'L' and TRANS = 'N', or 24085 24086 Q'* C if SIDE = 'L' and TRANS = 'T', or 24087 24088 C * Q if SIDE = 'R' and TRANS = 'N', or 24089 24090 C * Q' if SIDE = 'R' and TRANS = 'T', 24091 24092 where Q is a real orthogonal matrix defined as the product of k 24093 elementary reflectors 24094 24095 Q = H(k) . . . H(2) H(1) 24096 24097 as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n 24098 if SIDE = 'R'. 24099 24100 Arguments 24101 ========= 24102 24103 SIDE (input) CHARACTER*1 24104 = 'L': apply Q or Q' from the Left 24105 = 'R': apply Q or Q' from the Right 24106 24107 TRANS (input) CHARACTER*1 24108 = 'N': apply Q (No transpose) 24109 = 'T': apply Q' (Transpose) 24110 24111 M (input) INTEGER 24112 The number of rows of the matrix C. M >= 0. 24113 24114 N (input) INTEGER 24115 The number of columns of the matrix C. N >= 0. 24116 24117 K (input) INTEGER 24118 The number of elementary reflectors whose product defines 24119 the matrix Q. 24120 If SIDE = 'L', M >= K >= 0; 24121 if SIDE = 'R', N >= K >= 0. 24122 24123 A (input) REAL array, dimension 24124 (LDA,M) if SIDE = 'L', 24125 (LDA,N) if SIDE = 'R' 24126 The i-th row must contain the vector which defines the 24127 elementary reflector H(i), for i = 1,2,...,k, as returned by 24128 SGELQF in the first k rows of its array argument A. 24129 A is modified by the routine but restored on exit. 24130 24131 LDA (input) INTEGER 24132 The leading dimension of the array A. LDA >= max(1,K). 24133 24134 TAU (input) REAL array, dimension (K) 24135 TAU(i) must contain the scalar factor of the elementary 24136 reflector H(i), as returned by SGELQF. 24137 24138 C (input/output) REAL array, dimension (LDC,N) 24139 On entry, the m by n matrix C. 24140 On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. 24141 24142 LDC (input) INTEGER 24143 The leading dimension of the array C. LDC >= max(1,M). 24144 24145 WORK (workspace) REAL array, dimension 24146 (N) if SIDE = 'L', 24147 (M) if SIDE = 'R' 24148 24149 INFO (output) INTEGER 24150 = 0: successful exit 24151 < 0: if INFO = -i, the i-th argument had an illegal value 24152 24153 ===================================================================== 24154 24155 24156 Test the input arguments 24157 24158 Parameter adjustments */ 24159 /* System generated locals */ 24160 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; 24161 /* Local variables */ 24162 static logical left; 24163 static integer i__; 24164 extern logical lsame_(const char *, const char *); 24165 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *, 24166 integer *, real *, real *, integer *, real *); 24167 static integer i1, i2, i3, ic, jc, mi, ni, nq; 24168 extern /* Subroutine */ int xerbla_(const char *, integer *); 24169 static logical notran; 24170 static real aii; 24171 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 24172 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 24173 24174 a_dim1 = *lda; 24175 a_offset = 1 + a_dim1 * 1; 24176 a -= a_offset; 24177 --tau; 24178 c_dim1 = *ldc; 24179 c_offset = 1 + c_dim1 * 1; 24180 c__ -= c_offset; 24181 --work; 24182 24183 /* Function Body */ 24184 *info = 0; 24185 left = lsame_(side, "L"); 24186 notran = lsame_(trans, "N"); 24187 24188 /* NQ is the order of Q */ 24189 24190 if (left) { 24191 nq = *m; 24192 } else { 24193 nq = *n; 24194 } 24195 if (! left && ! lsame_(side, "R")) { 24196 *info = -1; 24197 } else if (! notran && ! lsame_(trans, "T")) { 24198 *info = -2; 24199 } else if (*m < 0) { 24200 *info = -3; 24201 } else if (*n < 0) { 24202 *info = -4; 24203 } else if (*k < 0 || *k > nq) { 24204 *info = -5; 24205 } else if (*lda < f2cmax(1,*k)) { 24206 *info = -7; 24207 } else if (*ldc < f2cmax(1,*m)) { 24208 *info = -10; 24209 } 24210 if (*info != 0) { 24211 i__1 = -(*info); 24212 xerbla_("SORML2", &i__1); 24213 return 0; 24214 } 24215 24216 /* Quick return if possible */ 24217 24218 if (*m == 0 || *n == 0 || *k == 0) { 24219 return 0; 24220 } 24221 24222 if (left && notran || ! left && ! notran) { 24223 i1 = 1; 24224 i2 = *k; 24225 i3 = 1; 24226 } else { 24227 i1 = *k; 24228 i2 = 1; 24229 i3 = -1; 24230 } 24231 24232 if (left) { 24233 ni = *n; 24234 jc = 1; 24235 } else { 24236 mi = *m; 24237 ic = 1; 24238 } 24239 24240 i__1 = i2; 24241 i__2 = i3; 24242 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 24243 if (left) { 24244 24245 /* H(i) is applied to C(i:m,1:n) */ 24246 24247 mi = *m - i__ + 1; 24248 ic = i__; 24249 } else { 24250 24251 /* H(i) is applied to C(1:m,i:n) */ 24252 24253 ni = *n - i__ + 1; 24254 jc = i__; 24255 } 24256 24257 /* Apply H(i) */ 24258 24259 aii = a_ref(i__, i__); 24260 a_ref(i__, i__) = 1.f; 24261 slarf_(side, &mi, &ni, &a_ref(i__, i__), lda, &tau[i__], &c___ref(ic, 24262 jc), ldc, &work[1]); 24263 a_ref(i__, i__) = aii; 24264 /* L10: */ 24265 } 24266 return 0; 24267 24268 /* End of SORML2 */ 24269 24270 } /* sorml2_ */
int sormlq_ | ( | const char * | side, | |
const char * | trans, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 22190 of file lapackblas.cpp.
References a_ref, c___ref, f2cmax, f2cmin, ilaenv_(), integer, lsame_(), s_cat(), slarfb_(), slarft_(), sorml2_(), t, and xerbla_().
Referenced by sormbr_().
22193 { 22194 /* -- LAPACK routine (version 3.0) -- 22195 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 22196 Courant Institute, Argonne National Lab, and Rice University 22197 June 30, 1999 22198 22199 22200 Purpose 22201 ======= 22202 22203 SORMLQ overwrites the general real M-by-N matrix C with 22204 22205 SIDE = 'L' SIDE = 'R' 22206 TRANS = 'N': Q * C C * Q 22207 TRANS = 'T': Q**T * C C * Q**T 22208 22209 where Q is a real orthogonal matrix defined as the product of k 22210 elementary reflectors 22211 22212 Q = H(k) . . . H(2) H(1) 22213 22214 as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N 22215 if SIDE = 'R'. 22216 22217 Arguments 22218 ========= 22219 22220 SIDE (input) CHARACTER*1 22221 = 'L': apply Q or Q**T from the Left; 22222 = 'R': apply Q or Q**T from the Right. 22223 22224 TRANS (input) CHARACTER*1 22225 = 'N': No transpose, apply Q; 22226 = 'T': Transpose, apply Q**T. 22227 22228 M (input) INTEGER 22229 The number of rows of the matrix C. M >= 0. 22230 22231 N (input) INTEGER 22232 The number of columns of the matrix C. N >= 0. 22233 22234 K (input) INTEGER 22235 The number of elementary reflectors whose product defines 22236 the matrix Q. 22237 If SIDE = 'L', M >= K >= 0; 22238 if SIDE = 'R', N >= K >= 0. 22239 22240 A (input) REAL array, dimension 22241 (LDA,M) if SIDE = 'L', 22242 (LDA,N) if SIDE = 'R' 22243 The i-th row must contain the vector which defines the 22244 elementary reflector H(i), for i = 1,2,...,k, as returned by 22245 SGELQF in the first k rows of its array argument A. 22246 A is modified by the routine but restored on exit. 22247 22248 LDA (input) INTEGER 22249 The leading dimension of the array A. LDA >= max(1,K). 22250 22251 TAU (input) REAL array, dimension (K) 22252 TAU(i) must contain the scalar factor of the elementary 22253 reflector H(i), as returned by SGELQF. 22254 22255 C (input/output) REAL array, dimension (LDC,N) 22256 On entry, the M-by-N matrix C. 22257 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. 22258 22259 LDC (input) INTEGER 22260 The leading dimension of the array C. LDC >= max(1,M). 22261 22262 WORK (workspace/output) REAL array, dimension (LWORK) 22263 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 22264 22265 LWORK (input) INTEGER 22266 The dimension of the array WORK. 22267 If SIDE = 'L', LWORK >= max(1,N); 22268 if SIDE = 'R', LWORK >= max(1,M). 22269 For optimum performance LWORK >= N*NB if SIDE = 'L', and 22270 LWORK >= M*NB if SIDE = 'R', where NB is the optimal 22271 blocksize. 22272 22273 If LWORK = -1, then a workspace query is assumed; the routine 22274 only calculates the optimal size of the WORK array, returns 22275 this value as the first entry of the WORK array, and no error 22276 message related to LWORK is issued by XERBLA. 22277 22278 INFO (output) INTEGER 22279 = 0: successful exit 22280 < 0: if INFO = -i, the i-th argument had an illegal value 22281 22282 ===================================================================== 22283 22284 22285 Test the input arguments 22286 22287 Parameter adjustments */ 22288 /* Table of constant values */ 22289 static integer c__1 = 1; 22290 static integer c_n1 = -1; 22291 static integer c__2 = 2; 22292 static integer c__65 = 65; 22293 22294 typedef const char *address; 22295 /* System generated locals */ 22296 address a__1[2]; 22297 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 22298 i__5; 22299 char ch__1[2]; 22300 /* Builtin functions 22301 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen); 22302 /* Local variables */ 22303 static logical left; 22304 static integer i__; 22305 static real t[4160] /* was [65][64] */; 22306 extern logical lsame_(const char *, const char *); 22307 static integer nbmin, iinfo, i1, i2, i3, ib, ic, jc; 22308 extern /* Subroutine */ int sorml2_(const char *, const char *, integer *, integer *, 22309 integer *, real *, integer *, real *, real *, integer *, real *, 22310 integer *); 22311 static integer nb, mi, ni, nq, nw; 22312 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 22313 integer *, integer *, integer *, real *, integer *, real *, 22314 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 22315 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 22316 integer *, integer *, ftnlen, ftnlen); 22317 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 22318 real *, integer *, real *, real *, integer *); 22319 static logical notran; 22320 static integer ldwork; 22321 static char transt[1]; 22322 static integer lwkopt; 22323 static logical lquery; 22324 static integer iws; 22325 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 22326 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 22327 22328 22329 a_dim1 = *lda; 22330 a_offset = 1 + a_dim1 * 1; 22331 a -= a_offset; 22332 --tau; 22333 c_dim1 = *ldc; 22334 c_offset = 1 + c_dim1 * 1; 22335 c__ -= c_offset; 22336 --work; 22337 22338 /* Function Body */ 22339 *info = 0; 22340 left = lsame_(side, "L"); 22341 notran = lsame_(trans, "N"); 22342 lquery = *lwork == -1; 22343 22344 /* NQ is the order of Q and NW is the minimum dimension of WORK */ 22345 22346 if (left) { 22347 nq = *m; 22348 nw = *n; 22349 } else { 22350 nq = *n; 22351 nw = *m; 22352 } 22353 if (! left && ! lsame_(side, "R")) { 22354 *info = -1; 22355 } else if (! notran && ! lsame_(trans, "T")) { 22356 *info = -2; 22357 } else if (*m < 0) { 22358 *info = -3; 22359 } else if (*n < 0) { 22360 *info = -4; 22361 } else if (*k < 0 || *k > nq) { 22362 *info = -5; 22363 } else if (*lda < f2cmax(1,*k)) { 22364 *info = -7; 22365 } else if (*ldc < f2cmax(1,*m)) { 22366 *info = -10; 22367 } else if (*lwork < f2cmax(1,nw) && ! lquery) { 22368 *info = -12; 22369 } 22370 22371 if (*info == 0) { 22372 22373 /* Determine the block size. NB may be at most NBMAX, where NBMAX 22374 is used to define the local array T. 22375 22376 Computing MIN 22377 Writing concatenation */ 22378 i__3[0] = 1, a__1[0] = side; 22379 i__3[1] = 1, a__1[1] = trans; 22380 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 22381 i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, ( 22382 ftnlen)6, (ftnlen)2); 22383 nb = f2cmin(i__1,i__2); 22384 lwkopt = f2cmax(1,nw) * nb; 22385 work[1] = (real) lwkopt; 22386 } 22387 22388 if (*info != 0) { 22389 i__1 = -(*info); 22390 xerbla_("SORMLQ", &i__1); 22391 return 0; 22392 } else if (lquery) { 22393 return 0; 22394 } 22395 22396 /* Quick return if possible */ 22397 22398 if (*m == 0 || *n == 0 || *k == 0) { 22399 work[1] = 1.f; 22400 return 0; 22401 } 22402 22403 nbmin = 2; 22404 ldwork = nw; 22405 if (nb > 1 && nb < *k) { 22406 iws = nw * nb; 22407 if (*lwork < iws) { 22408 nb = *lwork / ldwork; 22409 /* Computing MAX 22410 Writing concatenation */ 22411 i__3[0] = 1, a__1[0] = side; 22412 i__3[1] = 1, a__1[1] = trans; 22413 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 22414 i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, ( 22415 ftnlen)6, (ftnlen)2); 22416 nbmin = f2cmax(i__1,i__2); 22417 } 22418 } else { 22419 iws = nw; 22420 } 22421 22422 if (nb < nbmin || nb >= *k) { 22423 22424 /* Use unblocked code */ 22425 22426 sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ 22427 c_offset], ldc, &work[1], &iinfo); 22428 } else { 22429 22430 /* Use blocked code */ 22431 22432 if (left && notran || ! left && ! notran) { 22433 i1 = 1; 22434 i2 = *k; 22435 i3 = nb; 22436 } else { 22437 i1 = (*k - 1) / nb * nb + 1; 22438 i2 = 1; 22439 i3 = -nb; 22440 } 22441 22442 if (left) { 22443 ni = *n; 22444 jc = 1; 22445 } else { 22446 mi = *m; 22447 ic = 1; 22448 } 22449 22450 if (notran) { 22451 *(unsigned char *)transt = 'T'; 22452 } else { 22453 *(unsigned char *)transt = 'N'; 22454 } 22455 22456 i__1 = i2; 22457 i__2 = i3; 22458 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 22459 /* Computing MIN */ 22460 i__4 = nb, i__5 = *k - i__ + 1; 22461 ib = f2cmin(i__4,i__5); 22462 22463 /* Form the triangular factor of the block reflector 22464 H = H(i) H(i+1) . . . H(i+ib-1) */ 22465 22466 i__4 = nq - i__ + 1; 22467 slarft_("Forward", "Rowwise", &i__4, &ib, &a_ref(i__, i__), lda, & 22468 tau[i__], t, &c__65); 22469 if (left) { 22470 22471 /* H or H' is applied to C(i:m,1:n) */ 22472 22473 mi = *m - i__ + 1; 22474 ic = i__; 22475 } else { 22476 22477 /* H or H' is applied to C(1:m,i:n) */ 22478 22479 ni = *n - i__ + 1; 22480 jc = i__; 22481 } 22482 22483 /* Apply H or H' */ 22484 22485 slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a_ref( 22486 i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &work[1] 22487 , &ldwork); 22488 /* L10: */ 22489 } 22490 } 22491 work[1] = (real) lwkopt; 22492 return 0; 22493 22494 /* End of SORMLQ */ 22495 22496 } /* sormlq_ */
int sormqr_ | ( | const char * | side, | |
const char * | trans, | |||
integer * | m, | |||
integer * | n, | |||
integer * | k, | |||
real * | a, | |||
integer * | lda, | |||
real * | tau, | |||
real * | c__, | |||
integer * | ldc, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 22503 of file lapackblas.cpp.
References a_ref, c___ref, f2cmax, f2cmin, ilaenv_(), integer, lsame_(), s_cat(), slarfb_(), slarft_(), sorm2r_(), t, and xerbla_().
Referenced by sormbr_().
22506 { 22507 /* -- LAPACK routine (version 3.0) -- 22508 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 22509 Courant Institute, Argonne National Lab, and Rice University 22510 June 30, 1999 22511 22512 22513 Purpose 22514 ======= 22515 22516 SORMQR overwrites the general real M-by-N matrix C with 22517 22518 SIDE = 'L' SIDE = 'R' 22519 TRANS = 'N': Q * C C * Q 22520 TRANS = 'T': Q**T * C C * Q**T 22521 22522 where Q is a real orthogonal matrix defined as the product of k 22523 elementary reflectors 22524 22525 Q = H(1) H(2) . . . H(k) 22526 22527 as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N 22528 if SIDE = 'R'. 22529 22530 Arguments 22531 ========= 22532 22533 SIDE (input) CHARACTER*1 22534 = 'L': apply Q or Q**T from the Left; 22535 = 'R': apply Q or Q**T from the Right. 22536 22537 TRANS (input) CHARACTER*1 22538 = 'N': No transpose, apply Q; 22539 = 'T': Transpose, apply Q**T. 22540 22541 M (input) INTEGER 22542 The number of rows of the matrix C. M >= 0. 22543 22544 N (input) INTEGER 22545 The number of columns of the matrix C. N >= 0. 22546 22547 K (input) INTEGER 22548 The number of elementary reflectors whose product defines 22549 the matrix Q. 22550 If SIDE = 'L', M >= K >= 0; 22551 if SIDE = 'R', N >= K >= 0. 22552 22553 A (input) REAL array, dimension (LDA,K) 22554 The i-th column must contain the vector which defines the 22555 elementary reflector H(i), for i = 1,2,...,k, as returned by 22556 SGEQRF in the first k columns of its array argument A. 22557 A is modified by the routine but restored on exit. 22558 22559 LDA (input) INTEGER 22560 The leading dimension of the array A. 22561 If SIDE = 'L', LDA >= max(1,M); 22562 if SIDE = 'R', LDA >= max(1,N). 22563 22564 TAU (input) REAL array, dimension (K) 22565 TAU(i) must contain the scalar factor of the elementary 22566 reflector H(i), as returned by SGEQRF. 22567 22568 C (input/output) REAL array, dimension (LDC,N) 22569 On entry, the M-by-N matrix C. 22570 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. 22571 22572 LDC (input) INTEGER 22573 The leading dimension of the array C. LDC >= max(1,M). 22574 22575 WORK (workspace/output) REAL array, dimension (LWORK) 22576 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 22577 22578 LWORK (input) INTEGER 22579 The dimension of the array WORK. 22580 If SIDE = 'L', LWORK >= max(1,N); 22581 if SIDE = 'R', LWORK >= max(1,M). 22582 For optimum performance LWORK >= N*NB if SIDE = 'L', and 22583 LWORK >= M*NB if SIDE = 'R', where NB is the optimal 22584 blocksize. 22585 22586 If LWORK = -1, then a workspace query is assumed; the routine 22587 only calculates the optimal size of the WORK array, returns 22588 this value as the first entry of the WORK array, and no error 22589 message related to LWORK is issued by XERBLA. 22590 22591 INFO (output) INTEGER 22592 = 0: successful exit 22593 < 0: if INFO = -i, the i-th argument had an illegal value 22594 22595 ===================================================================== 22596 22597 22598 Test the input arguments 22599 22600 Parameter adjustments */ 22601 /* Table of constant values */ 22602 static integer c__1 = 1; 22603 static integer c_n1 = -1; 22604 static integer c__2 = 2; 22605 static integer c__65 = 65; 22606 22607 /* System generated locals */ 22608 typedef const char *address; 22609 address a__1[2]; 22610 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 22611 i__5; 22612 char ch__1[2]; 22613 /* Builtin functions 22614 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen); 22615 /* Local variables */ 22616 static logical left; 22617 static integer i__; 22618 static real t[4160] /* was [65][64] */; 22619 extern logical lsame_(const char *, const char *); 22620 static integer nbmin, iinfo, i1, i2, i3, ib, ic, jc, nb; 22621 extern /* Subroutine */ int sorm2r_(const char *, const char *, integer *, integer *, 22622 integer *, real *, integer *, real *, real *, integer *, real *, 22623 integer *); 22624 static integer mi, ni, nq, nw; 22625 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *, 22626 integer *, integer *, integer *, real *, integer *, real *, 22627 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *); 22628 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 22629 integer *, integer *, ftnlen, ftnlen); 22630 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *, 22631 real *, integer *, real *, real *, integer *); 22632 static logical notran; 22633 static integer ldwork, lwkopt; 22634 static logical lquery; 22635 static integer iws; 22636 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 22637 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 22638 22639 22640 a_dim1 = *lda; 22641 a_offset = 1 + a_dim1 * 1; 22642 a -= a_offset; 22643 --tau; 22644 c_dim1 = *ldc; 22645 c_offset = 1 + c_dim1 * 1; 22646 c__ -= c_offset; 22647 --work; 22648 22649 /* Function Body */ 22650 *info = 0; 22651 left = lsame_(side, "L"); 22652 notran = lsame_(trans, "N"); 22653 lquery = *lwork == -1; 22654 22655 /* NQ is the order of Q and NW is the minimum dimension of WORK */ 22656 22657 if (left) { 22658 nq = *m; 22659 nw = *n; 22660 } else { 22661 nq = *n; 22662 nw = *m; 22663 } 22664 if (! left && ! lsame_(side, "R")) { 22665 *info = -1; 22666 } else if (! notran && ! lsame_(trans, "T")) { 22667 *info = -2; 22668 } else if (*m < 0) { 22669 *info = -3; 22670 } else if (*n < 0) { 22671 *info = -4; 22672 } else if (*k < 0 || *k > nq) { 22673 *info = -5; 22674 } else if (*lda < f2cmax(1,nq)) { 22675 *info = -7; 22676 } else if (*ldc < f2cmax(1,*m)) { 22677 *info = -10; 22678 } else if (*lwork < f2cmax(1,nw) && ! lquery) { 22679 *info = -12; 22680 } 22681 22682 if (*info == 0) { 22683 22684 /* Determine the block size. NB may be at most NBMAX, where NBMAX 22685 is used to define the local array T. 22686 22687 Computing MIN 22688 Writing concatenation */ 22689 i__3[0] = 1, a__1[0] = side; 22690 i__3[1] = 1, a__1[1] = trans; 22691 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 22692 i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, ( 22693 ftnlen)6, (ftnlen)2); 22694 nb = f2cmin(i__1,i__2); 22695 lwkopt = f2cmax(1,nw) * nb; 22696 work[1] = (real) lwkopt; 22697 } 22698 22699 if (*info != 0) { 22700 i__1 = -(*info); 22701 xerbla_("SORMQR", &i__1); 22702 return 0; 22703 } else if (lquery) { 22704 return 0; 22705 } 22706 22707 /* Quick return if possible */ 22708 22709 if (*m == 0 || *n == 0 || *k == 0) { 22710 work[1] = 1.f; 22711 return 0; 22712 } 22713 22714 nbmin = 2; 22715 ldwork = nw; 22716 if (nb > 1 && nb < *k) { 22717 iws = nw * nb; 22718 if (*lwork < iws) { 22719 nb = *lwork / ldwork; 22720 /* Computing MAX 22721 Writing concatenation */ 22722 i__3[0] = 1, a__1[0] = side; 22723 i__3[1] = 1, a__1[1] = trans; 22724 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 22725 i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, ( 22726 ftnlen)6, (ftnlen)2); 22727 nbmin = f2cmax(i__1,i__2); 22728 } 22729 } else { 22730 iws = nw; 22731 } 22732 22733 if (nb < nbmin || nb >= *k) { 22734 22735 /* Use unblocked code */ 22736 22737 sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ 22738 c_offset], ldc, &work[1], &iinfo); 22739 } else { 22740 22741 /* Use blocked code */ 22742 22743 if (left && ! notran || ! left && notran) { 22744 i1 = 1; 22745 i2 = *k; 22746 i3 = nb; 22747 } else { 22748 i1 = (*k - 1) / nb * nb + 1; 22749 i2 = 1; 22750 i3 = -nb; 22751 } 22752 22753 if (left) { 22754 ni = *n; 22755 jc = 1; 22756 } else { 22757 mi = *m; 22758 ic = 1; 22759 } 22760 22761 i__1 = i2; 22762 i__2 = i3; 22763 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 22764 /* Computing MIN */ 22765 i__4 = nb, i__5 = *k - i__ + 1; 22766 ib = f2cmin(i__4,i__5); 22767 22768 /* Form the triangular factor of the block reflector 22769 H = H(i) H(i+1) . . . H(i+ib-1) */ 22770 22771 i__4 = nq - i__ + 1; 22772 slarft_("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__), 22773 lda, &tau[i__], t, &c__65); 22774 if (left) { 22775 22776 /* H or H' is applied to C(i:m,1:n) */ 22777 22778 mi = *m - i__ + 1; 22779 ic = i__; 22780 } else { 22781 22782 /* H or H' is applied to C(1:m,i:n) */ 22783 22784 ni = *n - i__ + 1; 22785 jc = i__; 22786 } 22787 22788 /* Apply H or H' */ 22789 22790 slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, & 22791 a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, & 22792 work[1], &ldwork); 22793 /* L10: */ 22794 } 22795 } 22796 work[1] = (real) lwkopt; 22797 return 0; 22798 22799 /* End of SORMQR */ 22800 22801 } /* sormqr_ */
int srot_ | ( | integer * | n, | |
real * | sx, | |||
integer * | incx, | |||
real * | sy, | |||
integer * | incy, | |||
real * | c__, | |||
real * | s | |||
) |
Definition at line 14381 of file lapackblas.cpp.
References integer.
Referenced by sbdsqr_(), slaed2_(), slaed8_(), and slaeda_().
14383 { 14384 /* System generated locals */ 14385 integer i__1; 14386 /* Local variables */ 14387 static integer i__; 14388 static real stemp; 14389 static integer ix, iy; 14390 /* applies a plane rotation. 14391 jack dongarra, linpack, 3/11/78. 14392 modified 12/3/93, array(1) declarations changed to array(*) 14393 Parameter adjustments */ 14394 --sy; 14395 --sx; 14396 /* Function Body */ 14397 if (*n <= 0) { 14398 return 0; 14399 } 14400 if (*incx == 1 && *incy == 1) { 14401 goto L20; 14402 } 14403 /* code for unequal increments or equal increments not equal 14404 to 1 */ 14405 ix = 1; 14406 iy = 1; 14407 if (*incx < 0) { 14408 ix = (-(*n) + 1) * *incx + 1; 14409 } 14410 if (*incy < 0) { 14411 iy = (-(*n) + 1) * *incy + 1; 14412 } 14413 i__1 = *n; 14414 for (i__ = 1; i__ <= i__1; ++i__) { 14415 stemp = *c__ * sx[ix] + *s * sy[iy]; 14416 sy[iy] = *c__ * sy[iy] - *s * sx[ix]; 14417 sx[ix] = stemp; 14418 ix += *incx; 14419 iy += *incy; 14420 /* L10: */ 14421 } 14422 return 0; 14423 /* code for both increments equal to 1 */ 14424 L20: 14425 i__1 = *n; 14426 for (i__ = 1; i__ <= i__1; ++i__) { 14427 stemp = *c__ * sx[i__] + *s * sy[i__]; 14428 sy[i__] = *c__ * sy[i__] - *s * sx[i__]; 14429 sx[i__] = stemp; 14430 /* L30: */ 14431 } 14432 return 0; 14433 } /* srot_ */
Definition at line 7697 of file lapackblas.cpp.
References integer.
Referenced by EMAN::PCA::Lanczos_ooc(), sbdsqr_(), slabrd_(), slaed2_(), slaed8_(), slarfg_(), slatrd_(), sorg2l_(), sorg2r_(), sorgl2_(), sstevd_(), and ssyev_().
07698 { 07699 /* System generated locals */ 07700 integer i__1, i__2; 07701 /* Local variables */ 07702 static integer i__, m, nincx, mp1; 07703 /* scales a vector by a constant. 07704 uses unrolled loops for increment equal to 1. 07705 jack dongarra, linpack, 3/11/78. 07706 modified 3/93 to return if incx .le. 0. 07707 modified 12/3/93, array(1) declarations changed to array(*) 07708 Parameter adjustments */ 07709 --sx; 07710 /* Function Body */ 07711 if (*n <= 0 || *incx <= 0) { 07712 return 0; 07713 } 07714 if (*incx == 1) { 07715 goto L20; 07716 } 07717 /* code for increment not equal to 1 */ 07718 nincx = *n * *incx; 07719 i__1 = nincx; 07720 i__2 = *incx; 07721 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 07722 sx[i__] = *sa * sx[i__]; 07723 /* L10: */ 07724 } 07725 return 0; 07726 /* code for increment equal to 1 07727 clean-up loop */ 07728 L20: 07729 m = *n % 5; 07730 if (m == 0) { 07731 goto L40; 07732 } 07733 i__2 = m; 07734 for (i__ = 1; i__ <= i__2; ++i__) { 07735 sx[i__] = *sa * sx[i__]; 07736 /* L30: */ 07737 } 07738 if (*n < 5) { 07739 return 0; 07740 } 07741 L40: 07742 mp1 = m + 1; 07743 i__2 = *n; 07744 for (i__ = mp1; i__ <= i__2; i__ += 5) { 07745 sx[i__] = *sa * sx[i__]; 07746 sx[i__ + 1] = *sa * sx[i__ + 1]; 07747 sx[i__ + 2] = *sa * sx[i__ + 2]; 07748 sx[i__ + 3] = *sa * sx[i__ + 3]; 07749 sx[i__ + 4] = *sa * sx[i__ + 4]; 07750 /* L50: */ 07751 } 07752 return 0; 07753 } /* sscal_ */
int sstedc_ | ( | const char * | compz, | |
integer * | n, | |||
real * | d__, | |||
real * | e, | |||
real * | z__, | |||
integer * | ldz, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | iwork, | |||
integer * | liwork, | |||
integer * | info | |||
) |
Definition at line 11235 of file lapackblas.cpp.
References c__0, c__1, c__2, dabs, f2cmax, ilaenv_(), integer, log(), lsame_(), pow_ii(), sgemm_(), slacpy_(), slaed0_(), slamch_(), slanst_(), slascl_(), slaset_(), slasrt_(), sqrt(), ssteqr_(), ssterf_(), sswap_(), xerbla_(), and z___ref.
Referenced by sstevd_().
11238 { 11239 /* -- LAPACK driver routine (version 3.0) -- 11240 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 11241 Courant Institute, Argonne National Lab, and Rice University 11242 June 30, 1999 11243 11244 11245 Purpose 11246 ======= 11247 11248 SSTEDC computes all eigenvalues and, optionally, eigenvectors of a 11249 symmetric tridiagonal matrix using the divide and conquer method. 11250 The eigenvectors of a full or band real symmetric matrix can also be 11251 found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this 11252 matrix to tridiagonal form. 11253 11254 This code makes very mild assumptions about floating point 11255 arithmetic. It will work on machines with a guard digit in 11256 add/subtract, or on those binary machines without guard digits 11257 which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. 11258 It could conceivably fail on hexadecimal or decimal machines 11259 without guard digits, but we know of none. See SLAED3 for details. 11260 11261 Arguments 11262 ========= 11263 11264 COMPZ (input) CHARACTER*1 11265 = 'N': Compute eigenvalues only. 11266 = 'I': Compute eigenvectors of tridiagonal matrix also. 11267 = 'V': Compute eigenvectors of original dense symmetric 11268 matrix also. On entry, Z contains the orthogonal 11269 matrix used to reduce the original matrix to 11270 tridiagonal form. 11271 11272 N (input) INTEGER 11273 The dimension of the symmetric tridiagonal matrix. N >= 0. 11274 11275 D (input/output) REAL array, dimension (N) 11276 On entry, the diagonal elements of the tridiagonal matrix. 11277 On exit, if INFO = 0, the eigenvalues in ascending order. 11278 11279 E (input/output) REAL array, dimension (N-1) 11280 On entry, the subdiagonal elements of the tridiagonal matrix. 11281 On exit, E has been destroyed. 11282 11283 Z (input/output) REAL array, dimension (LDZ,N) 11284 On entry, if COMPZ = 'V', then Z contains the orthogonal 11285 matrix used in the reduction to tridiagonal form. 11286 On exit, if INFO = 0, then if COMPZ = 'V', Z contains the 11287 orthonormal eigenvectors of the original symmetric matrix, 11288 and if COMPZ = 'I', Z contains the orthonormal eigenvectors 11289 of the symmetric tridiagonal matrix. 11290 If COMPZ = 'N', then Z is not referenced. 11291 11292 LDZ (input) INTEGER 11293 The leading dimension of the array Z. LDZ >= 1. 11294 If eigenvectors are desired, then LDZ >= max(1,N). 11295 11296 WORK (workspace/output) REAL array, 11297 dimension (LWORK) 11298 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 11299 11300 LWORK (input) INTEGER 11301 The dimension of the array WORK. 11302 If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. 11303 If COMPZ = 'V' and N > 1 then LWORK must be at least 11304 ( 1 + 3*N + 2*N*lg N + 3*N**2 ), 11305 where lg( N ) = smallest integer k such 11306 that 2**k >= N. 11307 If COMPZ = 'I' and N > 1 then LWORK must be at least 11308 ( 1 + 4*N + N**2 ). 11309 11310 If LWORK = -1, then a workspace query is assumed; the routine 11311 only calculates the optimal size of the WORK array, returns 11312 this value as the first entry of the WORK array, and no error 11313 message related to LWORK is issued by XERBLA. 11314 11315 IWORK (workspace/output) INTEGER array, dimension (LIWORK) 11316 On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. 11317 11318 LIWORK (input) INTEGER 11319 The dimension of the array IWORK. 11320 If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. 11321 If COMPZ = 'V' and N > 1 then LIWORK must be at least 11322 ( 6 + 6*N + 5*N*lg N ). 11323 If COMPZ = 'I' and N > 1 then LIWORK must be at least 11324 ( 3 + 5*N ). 11325 11326 If LIWORK = -1, then a workspace query is assumed; the 11327 routine only calculates the optimal size of the IWORK array, 11328 returns this value as the first entry of the IWORK array, and 11329 no error message related to LIWORK is issued by XERBLA. 11330 11331 INFO (output) INTEGER 11332 = 0: successful exit. 11333 < 0: if INFO = -i, the i-th argument had an illegal value. 11334 > 0: The algorithm failed to compute an eigenvalue while 11335 working on the submatrix lying in rows and columns 11336 INFO/(N+1) through mod(INFO,N+1). 11337 11338 Further Details 11339 =============== 11340 11341 Based on contributions by 11342 Jeff Rutter, Computer Science Division, University of California 11343 at Berkeley, USA 11344 Modified by Francoise Tisseur, University of Tennessee. 11345 11346 ===================================================================== 11347 11348 11349 Test the input parameters. 11350 11351 Parameter adjustments */ 11352 /* Table of constant values */ 11353 static integer c__2 = 2; 11354 static integer c__9 = 9; 11355 static integer c__0 = 0; 11356 static real c_b18 = 0.f; 11357 static real c_b19 = 1.f; 11358 static integer c__1 = 1; 11359 11360 /* System generated locals */ 11361 integer z_dim1, z_offset, i__1, i__2; 11362 real r__1, r__2; 11363 /* Builtin functions */ 11364 //double log(doublereal); 11365 integer pow_ii(integer *, integer *); 11366 //double sqrt(doublereal); 11367 /* Local variables */ 11368 static real tiny; 11369 static integer i__, j, k, m; 11370 static real p; 11371 extern logical lsame_(const char *, const char *); 11372 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *, 11373 integer *, real *, real *, integer *, real *, integer *, real *, 11374 real *, integer *); 11375 static integer lwmin, start; 11376 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 11377 integer *), slaed0_(integer *, integer *, integer *, real *, real 11378 *, real *, integer *, real *, integer *, real *, integer *, 11379 integer *); 11380 static integer ii; 11381 extern doublereal slamch_(const char *); 11382 extern /* Subroutine */ int xerbla_(const char *, integer *); 11383 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 11384 integer *, integer *, ftnlen, ftnlen); 11385 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *, 11386 real *, integer *, integer *, real *, integer *, integer *), slacpy_(const char *, integer *, integer *, real *, integer *, 11387 real *, integer *), slaset_(const char *, integer *, integer *, 11388 real *, real *, real *, integer *); 11389 static integer liwmin, icompz; 11390 static real orgnrm; 11391 extern doublereal slanst_(const char *, integer *, real *, real *); 11392 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *), 11393 slasrt_(const char *, integer *, real *, integer *); 11394 static logical lquery; 11395 static integer smlsiz; 11396 extern /* Subroutine */ int ssteqr_(const char *, integer *, real *, real *, 11397 real *, integer *, real *, integer *); 11398 static integer storez, strtrw, end, lgn; 11399 static real eps; 11400 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] 11401 11402 11403 --d__; 11404 --e; 11405 z_dim1 = *ldz; 11406 z_offset = 1 + z_dim1 * 1; 11407 z__ -= z_offset; 11408 --work; 11409 --iwork; 11410 11411 /* Function Body */ 11412 *info = 0; 11413 lquery = *lwork == -1 || *liwork == -1; 11414 11415 if (lsame_(compz, "N")) { 11416 icompz = 0; 11417 } else if (lsame_(compz, "V")) { 11418 icompz = 1; 11419 } else if (lsame_(compz, "I")) { 11420 icompz = 2; 11421 } else { 11422 icompz = -1; 11423 } 11424 if (*n <= 1 || icompz <= 0) { 11425 liwmin = 1; 11426 lwmin = 1; 11427 } else { 11428 lgn = (integer) (log((real) (*n)) / log(2.f)); 11429 if (pow_ii(&c__2, &lgn) < *n) { 11430 ++lgn; 11431 } 11432 if (pow_ii(&c__2, &lgn) < *n) { 11433 ++lgn; 11434 } 11435 if (icompz == 1) { 11436 /* Computing 2nd power */ 11437 i__1 = *n; 11438 lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; 11439 liwmin = *n * 6 + 6 + *n * 5 * lgn; 11440 } else if (icompz == 2) { 11441 /* Computing 2nd power */ 11442 i__1 = *n; 11443 lwmin = (*n << 2) + 1 + i__1 * i__1; 11444 liwmin = *n * 5 + 3; 11445 } 11446 } 11447 if (icompz < 0) { 11448 *info = -1; 11449 } else if (*n < 0) { 11450 *info = -2; 11451 } else if (*ldz < 1 || icompz > 0 && *ldz < f2cmax(1,*n)) { 11452 *info = -6; 11453 } else if (*lwork < lwmin && ! lquery) { 11454 *info = -8; 11455 } else if (*liwork < liwmin && ! lquery) { 11456 *info = -10; 11457 } 11458 11459 if (*info == 0) { 11460 work[1] = (real) lwmin; 11461 iwork[1] = liwmin; 11462 } 11463 11464 if (*info != 0) { 11465 i__1 = -(*info); 11466 xerbla_("SSTEDC", &i__1); 11467 return 0; 11468 } else if (lquery) { 11469 return 0; 11470 } 11471 11472 /* Quick return if possible */ 11473 11474 if (*n == 0) { 11475 return 0; 11476 } 11477 if (*n == 1) { 11478 if (icompz != 0) { 11479 z___ref(1, 1) = 1.f; 11480 } 11481 return 0; 11482 } 11483 11484 smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( 11485 ftnlen)6, (ftnlen)1); 11486 11487 /* If the following conditional clause is removed, then the routine 11488 will use the Divide and Conquer routine to compute only the 11489 eigenvalues, which requires (3N + 3N**2) real workspace and 11490 (2 + 5N + 2N lg(N)) integer workspace. 11491 Since on many architectures SSTERF is much faster than any other 11492 algorithm for finding eigenvalues only, it is used here 11493 as the default. 11494 11495 If COMPZ = 'N', use SSTERF to compute the eigenvalues. */ 11496 11497 if (icompz == 0) { 11498 ssterf_(n, &d__[1], &e[1], info); 11499 return 0; 11500 } 11501 11502 /* If N is smaller than the minimum divide size (SMLSIZ+1), then 11503 solve the problem with another solver. */ 11504 11505 if (*n <= smlsiz) { 11506 if (icompz == 0) { 11507 ssterf_(n, &d__[1], &e[1], info); 11508 return 0; 11509 } else if (icompz == 2) { 11510 ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], 11511 info); 11512 return 0; 11513 } else { 11514 ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], 11515 info); 11516 return 0; 11517 } 11518 } 11519 11520 /* If COMPZ = 'V', the Z matrix must be stored elsewhere for later 11521 use. */ 11522 11523 if (icompz == 1) { 11524 storez = *n * *n + 1; 11525 } else { 11526 storez = 1; 11527 } 11528 11529 if (icompz == 2) { 11530 slaset_("Full", n, n, &c_b18, &c_b19, &z__[z_offset], ldz); 11531 } 11532 11533 /* Scale. */ 11534 11535 orgnrm = slanst_("M", n, &d__[1], &e[1]); 11536 if (orgnrm == 0.f) { 11537 return 0; 11538 } 11539 11540 eps = slamch_("Epsilon"); 11541 11542 start = 1; 11543 11544 /* while ( START <= N ) */ 11545 11546 L10: 11547 if (start <= *n) { 11548 11549 /* Let END be the position of the next subdiagonal entry such that 11550 E( END ) <= TINY or END = N if no such subdiagonal exists. The 11551 matrix identified by the elements between START and END 11552 constitutes an independent sub-problem. */ 11553 11554 end = start; 11555 L20: 11556 if (end < *n) { 11557 tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = 11558 d__[end + 1], dabs(r__2))); 11559 if ((r__1 = e[end], dabs(r__1)) > tiny) { 11560 ++end; 11561 goto L20; 11562 } 11563 } 11564 11565 /* (Sub) Problem determined. Compute its size and solve it. */ 11566 11567 m = end - start + 1; 11568 if (m == 1) { 11569 start = end + 1; 11570 goto L10; 11571 } 11572 if (m > smlsiz) { 11573 *info = smlsiz; 11574 11575 /* Scale. */ 11576 11577 orgnrm = slanst_("M", &m, &d__[start], &e[start]); 11578 slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &m, &c__1, &d__[start] 11579 , &m, info); 11580 i__1 = m - 1; 11581 i__2 = m - 1; 11582 slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &i__1, &c__1, &e[ 11583 start], &i__2, info); 11584 11585 if (icompz == 1) { 11586 strtrw = 1; 11587 } else { 11588 strtrw = start; 11589 } 11590 slaed0_(&icompz, n, &m, &d__[start], &e[start], &z___ref(strtrw, 11591 start), ldz, &work[1], n, &work[storez], &iwork[1], info); 11592 if (*info != 0) { 11593 *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m 11594 + 1) + start - 1; 11595 return 0; 11596 } 11597 11598 /* Scale back. */ 11599 11600 slascl_("G", &c__0, &c__0, &c_b19, &orgnrm, &m, &c__1, &d__[start] 11601 , &m, info); 11602 11603 } else { 11604 if (icompz == 1) { 11605 11606 /* Since QR won't update a Z matrix which is larger than the 11607 length of D, we must solve the sub-problem in a workspace and 11608 then multiply back into Z. */ 11609 11610 ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ 11611 m * m + 1], info); 11612 slacpy_("A", n, &m, &z___ref(1, start), ldz, &work[storez], n); 11613 sgemm_("N", "N", n, &m, &m, &c_b19, &work[storez], ldz, &work[ 11614 1], &m, &c_b18, &z___ref(1, start), ldz); 11615 } else if (icompz == 2) { 11616 ssteqr_("I", &m, &d__[start], &e[start], &z___ref(start, 11617 start), ldz, &work[1], info); 11618 } else { 11619 ssterf_(&m, &d__[start], &e[start], info); 11620 } 11621 if (*info != 0) { 11622 *info = start * (*n + 1) + end; 11623 return 0; 11624 } 11625 } 11626 11627 start = end + 1; 11628 goto L10; 11629 } 11630 11631 /* endwhile 11632 11633 If the problem split any number of times, then the eigenvalues 11634 will not be properly ordered. Here we permute the eigenvalues 11635 (and the associated eigenvectors) into ascending order. */ 11636 11637 if (m != *n) { 11638 if (icompz == 0) { 11639 11640 /* Use Quick Sort */ 11641 11642 slasrt_("I", n, &d__[1], info); 11643 11644 } else { 11645 11646 /* Use Selection Sort to minimize swaps of eigenvectors */ 11647 11648 i__1 = *n; 11649 for (ii = 2; ii <= i__1; ++ii) { 11650 i__ = ii - 1; 11651 k = i__; 11652 p = d__[i__]; 11653 i__2 = *n; 11654 for (j = ii; j <= i__2; ++j) { 11655 if (d__[j] < p) { 11656 k = j; 11657 p = d__[j]; 11658 } 11659 /* L30: */ 11660 } 11661 if (k != i__) { 11662 d__[k] = d__[i__]; 11663 d__[i__] = p; 11664 sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1); 11665 } 11666 /* L40: */ 11667 } 11668 } 11669 } 11670 11671 work[1] = (real) lwmin; 11672 iwork[1] = liwmin; 11673 11674 return 0; 11675 11676 /* End of SSTEDC */ 11677 11678 } /* sstedc_ */
int ssteqr_ | ( | const char * | compz, | |
integer * | n, | |||
real * | d__, | |||
real * | e, | |||
real * | z__, | |||
integer * | ldz, | |||
real * | work, | |||
integer * | info | |||
) |
Definition at line 7758 of file lapackblas.cpp.
References c__0, c__1, c__2, c_b9, dabs, f2cmax, integer, lsame_(), r_sign(), slae2_(), slaev2_(), slamch_(), slanst_(), slapy2_(), slartg_(), slascl_(), slaset_(), slasr_(), slasrt_(), sqrt(), sswap_(), xerbla_(), and z___ref.
Referenced by slaed0_(), sstedc_(), and ssyev_().
07760 { 07761 /* -- LAPACK routine (version 3.0) -- 07762 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 07763 Courant Institute, Argonne National Lab, and Rice University 07764 September 30, 1994 07765 07766 07767 Purpose 07768 ======= 07769 07770 SSTEQR computes all eigenvalues and, optionally, eigenvectors of a 07771 symmetric tridiagonal matrix using the implicit QL or QR method. 07772 The eigenvectors of a full or band symmetric matrix can also be found 07773 if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to 07774 tridiagonal form. 07775 07776 Arguments 07777 ========= 07778 07779 COMPZ (input) CHARACTER*1 07780 = 'N': Compute eigenvalues only. 07781 = 'V': Compute eigenvalues and eigenvectors of the original 07782 symmetric matrix. On entry, Z must contain the 07783 orthogonal matrix used to reduce the original matrix 07784 to tridiagonal form. 07785 = 'I': Compute eigenvalues and eigenvectors of the 07786 tridiagonal matrix. Z is initialized to the identity 07787 matrix. 07788 07789 N (input) INTEGER 07790 The order of the matrix. N >= 0. 07791 07792 D (input/output) REAL array, dimension (N) 07793 On entry, the diagonal elements of the tridiagonal matrix. 07794 On exit, if INFO = 0, the eigenvalues in ascending order. 07795 07796 E (input/output) REAL array, dimension (N-1) 07797 On entry, the (n-1) subdiagonal elements of the tridiagonal 07798 matrix. 07799 On exit, E has been destroyed. 07800 07801 Z (input/output) REAL array, dimension (LDZ, N) 07802 On entry, if COMPZ = 'V', then Z contains the orthogonal 07803 matrix used in the reduction to tridiagonal form. 07804 On exit, if INFO = 0, then if COMPZ = 'V', Z contains the 07805 orthonormal eigenvectors of the original symmetric matrix, 07806 and if COMPZ = 'I', Z contains the orthonormal eigenvectors 07807 of the symmetric tridiagonal matrix. 07808 If COMPZ = 'N', then Z is not referenced. 07809 07810 LDZ (input) INTEGER 07811 The leading dimension of the array Z. LDZ >= 1, and if 07812 eigenvectors are desired, then LDZ >= f2cmax(1,N). 07813 07814 WORK (workspace) REAL array, dimension (f2cmax(1,2*N-2)) 07815 If COMPZ = 'N', then WORK is not referenced. 07816 07817 INFO (output) INTEGER 07818 = 0: successful exit 07819 < 0: if INFO = -i, the i-th argument had an illegal value 07820 > 0: the algorithm has failed to find all the eigenvalues in 07821 a total of 30*N iterations; if INFO = i, then i 07822 elements of E have not converged to zero; on exit, D 07823 and E contain the elements of a symmetric tridiagonal 07824 matrix which is orthogonally similar to the original 07825 matrix. 07826 07827 ===================================================================== 07828 07829 07830 Test the input parameters. 07831 07832 Parameter adjustments */ 07833 /* Table of constant values */ 07834 static real c_b9 = 0.f; 07835 static real c_b10 = 1.f; 07836 static integer c__0 = 0; 07837 static integer c__1 = 1; 07838 static integer c__2 = 2; 07839 07840 /* System generated locals */ 07841 integer z_dim1, z_offset, i__1, i__2; 07842 real r__1, r__2; 07843 /* Builtin functions */ 07844 // double sqrt(doublereal), r_sign(real *, real *); 07845 double r_sign(real *, real *); 07846 /* Local variables */ 07847 static integer lend, jtot; 07848 extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) 07849 ; 07850 static real b, c__, f, g; 07851 static integer i__, j, k, l, m; 07852 static real p, r__, s; 07853 extern logical lsame_(const char *, const char *); 07854 static real anorm; 07855 extern /* Subroutine */ int slasr_(const char *, const char *, const char *, integer *, 07856 integer *, real *, real *, real *, integer *); 07857 static integer l1; 07858 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 07859 integer *); 07860 static integer lendm1, lendp1; 07861 extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * 07862 , real *, real *); 07863 extern doublereal slapy2_(real *, real *); 07864 static integer ii, mm, iscale; 07865 extern doublereal slamch_(const char *); 07866 static real safmin; 07867 extern /* Subroutine */ int xerbla_(const char *, integer *); 07868 static real safmax; 07869 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *, 07870 real *, integer *, integer *, real *, integer *, integer *); 07871 static integer lendsv; 07872 extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * 07873 ), slaset_(const char *, integer *, integer *, real *, real *, real *, 07874 integer *); 07875 static real ssfmin; 07876 static integer nmaxit, icompz; 07877 static real ssfmax; 07878 extern doublereal slanst_(const char *, integer *, real *, real *); 07879 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *); 07880 static integer lm1, mm1, nm1; 07881 static real rt1, rt2, eps; 07882 static integer lsv; 07883 static real tst, eps2; 07884 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] 07885 07886 07887 --d__; 07888 --e; 07889 z_dim1 = *ldz; 07890 z_offset = 1 + z_dim1 * 1; 07891 z__ -= z_offset; 07892 --work; 07893 07894 /* Function Body */ 07895 *info = 0; 07896 07897 if (lsame_(compz, "N")) { 07898 icompz = 0; 07899 } else if (lsame_(compz, "V")) { 07900 icompz = 1; 07901 } else if (lsame_(compz, "I")) { 07902 icompz = 2; 07903 } else { 07904 icompz = -1; 07905 } 07906 if (icompz < 0) { 07907 *info = -1; 07908 } else if (*n < 0) { 07909 *info = -2; 07910 } else if (*ldz < 1 || icompz > 0 && *ldz < f2cmax(1,*n)) { 07911 *info = -6; 07912 } 07913 if (*info != 0) { 07914 i__1 = -(*info); 07915 xerbla_("SSTEQR", &i__1); 07916 return 0; 07917 } 07918 07919 /* Quick return if possible */ 07920 07921 if (*n == 0) { 07922 return 0; 07923 } 07924 07925 if (*n == 1) { 07926 if (icompz == 2) { 07927 z___ref(1, 1) = 1.f; 07928 } 07929 return 0; 07930 } 07931 07932 /* Determine the unit roundoff and over/underflow thresholds. */ 07933 07934 eps = slamch_("E"); 07935 /* Computing 2nd power */ 07936 r__1 = eps; 07937 eps2 = r__1 * r__1; 07938 safmin = slamch_("S"); 07939 safmax = 1.f / safmin; 07940 ssfmax = sqrt(safmax) / 3.f; 07941 ssfmin = sqrt(safmin) / eps2; 07942 07943 /* Compute the eigenvalues and eigenvectors of the tridiagonal 07944 matrix. */ 07945 07946 if (icompz == 2) { 07947 slaset_("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz); 07948 } 07949 07950 nmaxit = *n * 30; 07951 jtot = 0; 07952 07953 /* Determine where the matrix splits and choose QL or QR iteration 07954 for each block, according to whether top or bottom diagonal 07955 element is smaller. */ 07956 07957 l1 = 1; 07958 nm1 = *n - 1; 07959 07960 L10: 07961 if (l1 > *n) { 07962 goto L160; 07963 } 07964 if (l1 > 1) { 07965 e[l1 - 1] = 0.f; 07966 } 07967 if (l1 <= nm1) { 07968 i__1 = nm1; 07969 for (m = l1; m <= i__1; ++m) { 07970 tst = (r__1 = e[m], dabs(r__1)); 07971 if (tst == 0.f) { 07972 goto L30; 07973 } 07974 if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m 07975 + 1], dabs(r__2))) * eps) { 07976 e[m] = 0.f; 07977 goto L30; 07978 } 07979 /* L20: */ 07980 } 07981 } 07982 m = *n; 07983 07984 L30: 07985 l = l1; 07986 lsv = l; 07987 lend = m; 07988 lendsv = lend; 07989 l1 = m + 1; 07990 if (lend == l) { 07991 goto L10; 07992 } 07993 07994 /* Scale submatrix in rows and columns L to LEND */ 07995 07996 i__1 = lend - l + 1; 07997 anorm = slanst_("I", &i__1, &d__[l], &e[l]); 07998 iscale = 0; 07999 if (anorm == 0.f) { 08000 goto L10; 08001 } 08002 if (anorm > ssfmax) { 08003 iscale = 1; 08004 i__1 = lend - l + 1; 08005 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, 08006 info); 08007 i__1 = lend - l; 08008 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, 08009 info); 08010 } else if (anorm < ssfmin) { 08011 iscale = 2; 08012 i__1 = lend - l + 1; 08013 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, 08014 info); 08015 i__1 = lend - l; 08016 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, 08017 info); 08018 } 08019 08020 /* Choose between QL and QR iteration */ 08021 08022 if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { 08023 lend = lsv; 08024 l = lendsv; 08025 } 08026 08027 if (lend > l) { 08028 08029 /* QL Iteration 08030 08031 Look for small subdiagonal element. */ 08032 08033 L40: 08034 if (l != lend) { 08035 lendm1 = lend - 1; 08036 i__1 = lendm1; 08037 for (m = l; m <= i__1; ++m) { 08038 /* Computing 2nd power */ 08039 r__2 = (r__1 = e[m], dabs(r__1)); 08040 tst = r__2 * r__2; 08041 if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m 08042 + 1], dabs(r__2)) + safmin) { 08043 goto L60; 08044 } 08045 /* L50: */ 08046 } 08047 } 08048 08049 m = lend; 08050 08051 L60: 08052 if (m < lend) { 08053 e[m] = 0.f; 08054 } 08055 p = d__[l]; 08056 if (m == l) { 08057 goto L80; 08058 } 08059 08060 /* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 08061 to compute its eigensystem. */ 08062 08063 if (m == l + 1) { 08064 if (icompz > 0) { 08065 slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); 08066 work[l] = c__; 08067 work[*n - 1 + l] = s; 08068 slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & 08069 z___ref(1, l), ldz); 08070 } else { 08071 slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); 08072 } 08073 d__[l] = rt1; 08074 d__[l + 1] = rt2; 08075 e[l] = 0.f; 08076 l += 2; 08077 if (l <= lend) { 08078 goto L40; 08079 } 08080 goto L140; 08081 } 08082 08083 if (jtot == nmaxit) { 08084 goto L140; 08085 } 08086 ++jtot; 08087 08088 /* Form shift. */ 08089 08090 g = (d__[l + 1] - p) / (e[l] * 2.f); 08091 r__ = slapy2_(&g, &c_b10); 08092 g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); 08093 08094 s = 1.f; 08095 c__ = 1.f; 08096 p = 0.f; 08097 08098 /* Inner loop */ 08099 08100 mm1 = m - 1; 08101 i__1 = l; 08102 for (i__ = mm1; i__ >= i__1; --i__) { 08103 f = s * e[i__]; 08104 b = c__ * e[i__]; 08105 slartg_(&g, &f, &c__, &s, &r__); 08106 if (i__ != m - 1) { 08107 e[i__ + 1] = r__; 08108 } 08109 g = d__[i__ + 1] - p; 08110 r__ = (d__[i__] - g) * s + c__ * 2.f * b; 08111 p = s * r__; 08112 d__[i__ + 1] = g + p; 08113 g = c__ * r__ - b; 08114 08115 /* If eigenvectors are desired, then save rotations. */ 08116 08117 if (icompz > 0) { 08118 work[i__] = c__; 08119 work[*n - 1 + i__] = -s; 08120 } 08121 08122 /* L70: */ 08123 } 08124 08125 /* If eigenvectors are desired, then apply saved rotations. */ 08126 08127 if (icompz > 0) { 08128 mm = m - l + 1; 08129 slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], & 08130 z___ref(1, l), ldz); 08131 } 08132 08133 d__[l] -= p; 08134 e[l] = g; 08135 goto L40; 08136 08137 /* Eigenvalue found. */ 08138 08139 L80: 08140 d__[l] = p; 08141 08142 ++l; 08143 if (l <= lend) { 08144 goto L40; 08145 } 08146 goto L140; 08147 08148 } else { 08149 08150 /* QR Iteration 08151 08152 Look for small superdiagonal element. */ 08153 08154 L90: 08155 if (l != lend) { 08156 lendp1 = lend + 1; 08157 i__1 = lendp1; 08158 for (m = l; m >= i__1; --m) { 08159 /* Computing 2nd power */ 08160 r__2 = (r__1 = e[m - 1], dabs(r__1)); 08161 tst = r__2 * r__2; 08162 if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m 08163 - 1], dabs(r__2)) + safmin) { 08164 goto L110; 08165 } 08166 /* L100: */ 08167 } 08168 } 08169 08170 m = lend; 08171 08172 L110: 08173 if (m > lend) { 08174 e[m - 1] = 0.f; 08175 } 08176 p = d__[l]; 08177 if (m == l) { 08178 goto L130; 08179 } 08180 08181 /* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 08182 to compute its eigensystem. */ 08183 08184 if (m == l - 1) { 08185 if (icompz > 0) { 08186 slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) 08187 ; 08188 work[m] = c__; 08189 work[*n - 1 + m] = s; 08190 slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & 08191 z___ref(1, l - 1), ldz); 08192 } else { 08193 slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); 08194 } 08195 d__[l - 1] = rt1; 08196 d__[l] = rt2; 08197 e[l - 1] = 0.f; 08198 l += -2; 08199 if (l >= lend) { 08200 goto L90; 08201 } 08202 goto L140; 08203 } 08204 08205 if (jtot == nmaxit) { 08206 goto L140; 08207 } 08208 ++jtot; 08209 08210 /* Form shift. */ 08211 08212 g = (d__[l - 1] - p) / (e[l - 1] * 2.f); 08213 r__ = slapy2_(&g, &c_b10); 08214 g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); 08215 08216 s = 1.f; 08217 c__ = 1.f; 08218 p = 0.f; 08219 08220 /* Inner loop */ 08221 08222 lm1 = l - 1; 08223 i__1 = lm1; 08224 for (i__ = m; i__ <= i__1; ++i__) { 08225 f = s * e[i__]; 08226 b = c__ * e[i__]; 08227 slartg_(&g, &f, &c__, &s, &r__); 08228 if (i__ != m) { 08229 e[i__ - 1] = r__; 08230 } 08231 g = d__[i__] - p; 08232 r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; 08233 p = s * r__; 08234 d__[i__] = g + p; 08235 g = c__ * r__ - b; 08236 08237 /* If eigenvectors are desired, then save rotations. */ 08238 08239 if (icompz > 0) { 08240 work[i__] = c__; 08241 work[*n - 1 + i__] = s; 08242 } 08243 08244 /* L120: */ 08245 } 08246 08247 /* If eigenvectors are desired, then apply saved rotations. */ 08248 08249 if (icompz > 0) { 08250 mm = l - m + 1; 08251 slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], & 08252 z___ref(1, m), ldz); 08253 } 08254 08255 d__[l] -= p; 08256 e[lm1] = g; 08257 goto L90; 08258 08259 /* Eigenvalue found. */ 08260 08261 L130: 08262 d__[l] = p; 08263 08264 --l; 08265 if (l >= lend) { 08266 goto L90; 08267 } 08268 goto L140; 08269 08270 } 08271 08272 /* Undo scaling if necessary */ 08273 08274 L140: 08275 if (iscale == 1) { 08276 i__1 = lendsv - lsv + 1; 08277 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], 08278 n, info); 08279 i__1 = lendsv - lsv; 08280 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, 08281 info); 08282 } else if (iscale == 2) { 08283 i__1 = lendsv - lsv + 1; 08284 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], 08285 n, info); 08286 i__1 = lendsv - lsv; 08287 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, 08288 info); 08289 } 08290 08291 /* Check for no convergence to an eigenvalue after a total 08292 of N*MAXIT iterations. */ 08293 08294 if (jtot < nmaxit) { 08295 goto L10; 08296 } 08297 i__1 = *n - 1; 08298 for (i__ = 1; i__ <= i__1; ++i__) { 08299 if (e[i__] != 0.f) { 08300 ++(*info); 08301 } 08302 /* L150: */ 08303 } 08304 goto L190; 08305 08306 /* Order eigenvalues and eigenvectors. */ 08307 08308 L160: 08309 if (icompz == 0) { 08310 08311 /* Use Quick Sort */ 08312 08313 slasrt_("I", n, &d__[1], info); 08314 08315 } else { 08316 08317 /* Use Selection Sort to minimize swaps of eigenvectors */ 08318 08319 i__1 = *n; 08320 for (ii = 2; ii <= i__1; ++ii) { 08321 i__ = ii - 1; 08322 k = i__; 08323 p = d__[i__]; 08324 i__2 = *n; 08325 for (j = ii; j <= i__2; ++j) { 08326 if (d__[j] < p) { 08327 k = j; 08328 p = d__[j]; 08329 } 08330 /* L170: */ 08331 } 08332 if (k != i__) { 08333 d__[k] = d__[i__]; 08334 d__[i__] = p; 08335 sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1); 08336 } 08337 /* L180: */ 08338 } 08339 } 08340 08341 L190: 08342 return 0; 08343 08344 /* End of SSTEQR */ 08345 08346 } /* ssteqr_ */
Definition at line 8354 of file lapackblas.cpp.
References c__0, c__1, dabs, integer, r_sign(), slae2_(), slamch_(), slanst_(), slapy2_(), slascl_(), slasrt_(), sqrt(), and xerbla_().
Referenced by sstedc_(), sstevd_(), and ssyev_().
08355 { 08356 /* -- LAPACK routine (version 3.0) -- 08357 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 08358 Courant Institute, Argonne National Lab, and Rice University 08359 June 30, 1999 08360 08361 08362 Purpose 08363 ======= 08364 08365 SSTERF computes all eigenvalues of a symmetric tridiagonal matrix 08366 using the Pal-Walker-Kahan variant of the QL or QR algorithm. 08367 08368 Arguments 08369 ========= 08370 08371 N (input) INTEGER 08372 The order of the matrix. N >= 0. 08373 08374 D (input/output) REAL array, dimension (N) 08375 On entry, the n diagonal elements of the tridiagonal matrix. 08376 On exit, if INFO = 0, the eigenvalues in ascending order. 08377 08378 E (input/output) REAL array, dimension (N-1) 08379 On entry, the (n-1) subdiagonal elements of the tridiagonal 08380 matrix. 08381 On exit, E has been destroyed. 08382 08383 INFO (output) INTEGER 08384 = 0: successful exit 08385 < 0: if INFO = -i, the i-th argument had an illegal value 08386 > 0: the algorithm failed to find all of the eigenvalues in 08387 a total of 30*N iterations; if INFO = i, then i 08388 elements of E have not converged to zero. 08389 08390 ===================================================================== 08391 08392 08393 Test the input parameters. 08394 08395 Parameter adjustments */ 08396 /* Table of constant values */ 08397 static integer c__0 = 0; 08398 static integer c__1 = 1; 08399 static real c_b32 = 1.f; 08400 08401 /* System generated locals */ 08402 integer i__1; 08403 real r__1, r__2, r__3; 08404 /* Builtin functions */ 08405 // double sqrt(doublereal), r_sign(real *, real *); 08406 double r_sign(real *, real *); 08407 /* Local variables */ 08408 static real oldc; 08409 static integer lend, jtot; 08410 extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) 08411 ; 08412 static real c__; 08413 static integer i__, l, m; 08414 static real p, gamma, r__, s, alpha, sigma, anorm; 08415 static integer l1; 08416 static real bb; 08417 extern doublereal slapy2_(real *, real *); 08418 static integer iscale; 08419 static real oldgam; 08420 extern doublereal slamch_(const char *); 08421 static real safmin; 08422 extern /* Subroutine */ int xerbla_(const char *, integer *); 08423 static real safmax; 08424 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *, 08425 real *, integer *, integer *, real *, integer *, integer *); 08426 static integer lendsv; 08427 static real ssfmin; 08428 static integer nmaxit; 08429 static real ssfmax; 08430 extern doublereal slanst_(const char *, integer *, real *, real *); 08431 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *); 08432 static real rt1, rt2, eps, rte; 08433 static integer lsv; 08434 static real eps2; 08435 08436 08437 --e; 08438 --d__; 08439 08440 /* Function Body */ 08441 *info = 0; 08442 08443 /* Quick return if possible */ 08444 08445 if (*n < 0) { 08446 *info = -1; 08447 i__1 = -(*info); 08448 xerbla_("SSTERF", &i__1); 08449 return 0; 08450 } 08451 if (*n <= 1) { 08452 return 0; 08453 } 08454 08455 /* Determine the unit roundoff for this environment. */ 08456 08457 eps = slamch_("E"); 08458 /* Computing 2nd power */ 08459 r__1 = eps; 08460 eps2 = r__1 * r__1; 08461 safmin = slamch_("S"); 08462 safmax = 1.f / safmin; 08463 ssfmax = sqrt(safmax) / 3.f; 08464 ssfmin = sqrt(safmin) / eps2; 08465 08466 /* Compute the eigenvalues of the tridiagonal matrix. */ 08467 08468 nmaxit = *n * 30; 08469 sigma = 0.f; 08470 jtot = 0; 08471 08472 /* Determine where the matrix splits and choose QL or QR iteration 08473 for each block, according to whether top or bottom diagonal 08474 element is smaller. */ 08475 08476 l1 = 1; 08477 08478 L10: 08479 if (l1 > *n) { 08480 goto L170; 08481 } 08482 if (l1 > 1) { 08483 e[l1 - 1] = 0.f; 08484 } 08485 i__1 = *n - 1; 08486 for (m = l1; m <= i__1; ++m) { 08487 if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) * 08488 sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) { 08489 e[m] = 0.f; 08490 goto L30; 08491 } 08492 /* L20: */ 08493 } 08494 m = *n; 08495 08496 L30: 08497 l = l1; 08498 lsv = l; 08499 lend = m; 08500 lendsv = lend; 08501 l1 = m + 1; 08502 if (lend == l) { 08503 goto L10; 08504 } 08505 08506 /* Scale submatrix in rows and columns L to LEND */ 08507 08508 i__1 = lend - l + 1; 08509 anorm = slanst_("I", &i__1, &d__[l], &e[l]); 08510 iscale = 0; 08511 if (anorm > ssfmax) { 08512 iscale = 1; 08513 i__1 = lend - l + 1; 08514 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, 08515 info); 08516 i__1 = lend - l; 08517 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, 08518 info); 08519 } else if (anorm < ssfmin) { 08520 iscale = 2; 08521 i__1 = lend - l + 1; 08522 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, 08523 info); 08524 i__1 = lend - l; 08525 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, 08526 info); 08527 } 08528 08529 i__1 = lend - 1; 08530 for (i__ = l; i__ <= i__1; ++i__) { 08531 /* Computing 2nd power */ 08532 r__1 = e[i__]; 08533 e[i__] = r__1 * r__1; 08534 /* L40: */ 08535 } 08536 08537 /* Choose between QL and QR iteration */ 08538 08539 if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { 08540 lend = lsv; 08541 l = lendsv; 08542 } 08543 08544 if (lend >= l) { 08545 08546 /* QL Iteration 08547 08548 Look for small subdiagonal element. */ 08549 08550 L50: 08551 if (l != lend) { 08552 i__1 = lend - 1; 08553 for (m = l; m <= i__1; ++m) { 08554 if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ 08555 m + 1], dabs(r__1))) { 08556 goto L70; 08557 } 08558 /* L60: */ 08559 } 08560 } 08561 m = lend; 08562 08563 L70: 08564 if (m < lend) { 08565 e[m] = 0.f; 08566 } 08567 p = d__[l]; 08568 if (m == l) { 08569 goto L90; 08570 } 08571 08572 /* If remaining matrix is 2 by 2, use SLAE2 to compute its 08573 eigenvalues. */ 08574 08575 if (m == l + 1) { 08576 rte = sqrt(e[l]); 08577 slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); 08578 d__[l] = rt1; 08579 d__[l + 1] = rt2; 08580 e[l] = 0.f; 08581 l += 2; 08582 if (l <= lend) { 08583 goto L50; 08584 } 08585 goto L150; 08586 } 08587 08588 if (jtot == nmaxit) { 08589 goto L150; 08590 } 08591 ++jtot; 08592 08593 /* Form shift. */ 08594 08595 rte = sqrt(e[l]); 08596 sigma = (d__[l + 1] - p) / (rte * 2.f); 08597 r__ = slapy2_(&sigma, &c_b32); 08598 sigma = p - rte / (sigma + r_sign(&r__, &sigma)); 08599 08600 c__ = 1.f; 08601 s = 0.f; 08602 gamma = d__[m] - sigma; 08603 p = gamma * gamma; 08604 08605 /* Inner loop */ 08606 08607 i__1 = l; 08608 for (i__ = m - 1; i__ >= i__1; --i__) { 08609 bb = e[i__]; 08610 r__ = p + bb; 08611 if (i__ != m - 1) { 08612 e[i__ + 1] = s * r__; 08613 } 08614 oldc = c__; 08615 c__ = p / r__; 08616 s = bb / r__; 08617 oldgam = gamma; 08618 alpha = d__[i__]; 08619 gamma = c__ * (alpha - sigma) - s * oldgam; 08620 d__[i__ + 1] = oldgam + (alpha - gamma); 08621 if (c__ != 0.f) { 08622 p = gamma * gamma / c__; 08623 } else { 08624 p = oldc * bb; 08625 } 08626 /* L80: */ 08627 } 08628 08629 e[l] = s * p; 08630 d__[l] = sigma + gamma; 08631 goto L50; 08632 08633 /* Eigenvalue found. */ 08634 08635 L90: 08636 d__[l] = p; 08637 08638 ++l; 08639 if (l <= lend) { 08640 goto L50; 08641 } 08642 goto L150; 08643 08644 } else { 08645 08646 /* QR Iteration 08647 08648 Look for small superdiagonal element. */ 08649 08650 L100: 08651 i__1 = lend + 1; 08652 for (m = l; m >= i__1; --m) { 08653 if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ 08654 m - 1], dabs(r__1))) { 08655 goto L120; 08656 } 08657 /* L110: */ 08658 } 08659 m = lend; 08660 08661 L120: 08662 if (m > lend) { 08663 e[m - 1] = 0.f; 08664 } 08665 p = d__[l]; 08666 if (m == l) { 08667 goto L140; 08668 } 08669 08670 /* If remaining matrix is 2 by 2, use SLAE2 to compute its 08671 eigenvalues. */ 08672 08673 if (m == l - 1) { 08674 rte = sqrt(e[l - 1]); 08675 slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); 08676 d__[l] = rt1; 08677 d__[l - 1] = rt2; 08678 e[l - 1] = 0.f; 08679 l += -2; 08680 if (l >= lend) { 08681 goto L100; 08682 } 08683 goto L150; 08684 } 08685 08686 if (jtot == nmaxit) { 08687 goto L150; 08688 } 08689 ++jtot; 08690 08691 /* Form shift. */ 08692 08693 rte = sqrt(e[l - 1]); 08694 sigma = (d__[l - 1] - p) / (rte * 2.f); 08695 r__ = slapy2_(&sigma, &c_b32); 08696 sigma = p - rte / (sigma + r_sign(&r__, &sigma)); 08697 08698 c__ = 1.f; 08699 s = 0.f; 08700 gamma = d__[m] - sigma; 08701 p = gamma * gamma; 08702 08703 /* Inner loop */ 08704 08705 i__1 = l - 1; 08706 for (i__ = m; i__ <= i__1; ++i__) { 08707 bb = e[i__]; 08708 r__ = p + bb; 08709 if (i__ != m) { 08710 e[i__ - 1] = s * r__; 08711 } 08712 oldc = c__; 08713 c__ = p / r__; 08714 s = bb / r__; 08715 oldgam = gamma; 08716 alpha = d__[i__ + 1]; 08717 gamma = c__ * (alpha - sigma) - s * oldgam; 08718 d__[i__] = oldgam + (alpha - gamma); 08719 if (c__ != 0.f) { 08720 p = gamma * gamma / c__; 08721 } else { 08722 p = oldc * bb; 08723 } 08724 /* L130: */ 08725 } 08726 08727 e[l - 1] = s * p; 08728 d__[l] = sigma + gamma; 08729 goto L100; 08730 08731 /* Eigenvalue found. */ 08732 08733 L140: 08734 d__[l] = p; 08735 08736 --l; 08737 if (l >= lend) { 08738 goto L100; 08739 } 08740 goto L150; 08741 08742 } 08743 08744 /* Undo scaling if necessary */ 08745 08746 L150: 08747 if (iscale == 1) { 08748 i__1 = lendsv - lsv + 1; 08749 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], 08750 n, info); 08751 } 08752 if (iscale == 2) { 08753 i__1 = lendsv - lsv + 1; 08754 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], 08755 n, info); 08756 } 08757 08758 /* Check for no convergence to an eigenvalue after a total 08759 of N*MAXIT iterations. */ 08760 08761 if (jtot < nmaxit) { 08762 goto L10; 08763 } 08764 i__1 = *n - 1; 08765 for (i__ = 1; i__ <= i__1; ++i__) { 08766 if (e[i__] != 0.f) { 08767 ++(*info); 08768 } 08769 /* L160: */ 08770 } 08771 goto L180; 08772 08773 /* Sort eigenvalues in increasing order. */ 08774 08775 L170: 08776 slasrt_("I", n, &d__[1], info); 08777 08778 L180: 08779 return 0; 08780 08781 /* End of SSTERF */ 08782 08783 } /* ssterf_ */
int sstevd_ | ( | char * | jobz, | |
integer * | n, | |||
real * | d__, | |||
real * | e, | |||
real * | z__, | |||
integer * | ldz, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | iwork, | |||
integer * | liwork, | |||
integer * | info | |||
) |
Definition at line 11683 of file lapackblas.cpp.
References c__1, integer, lsame_(), slamch_(), slanst_(), sqrt(), sscal_(), sstedc_(), ssterf_(), xerbla_(), and z___ref.
Referenced by EMAN::PCAlarge::analyze(), EMAN::PCA::dopca_lan(), and EMAN::PCA::dopca_ooc().
11686 { 11687 /* -- LAPACK driver routine (version 3.0) -- 11688 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 11689 Courant Institute, Argonne National Lab, and Rice University 11690 June 30, 1999 11691 11692 11693 Purpose 11694 ======= 11695 11696 SSTEVD computes all eigenvalues and, optionally, eigenvectors of a 11697 real symmetric tridiagonal matrix. If eigenvectors are desired, it 11698 uses a divide and conquer algorithm. 11699 11700 The divide and conquer algorithm makes very mild assumptions about 11701 floating point arithmetic. It will work on machines with a guard 11702 digit in add/subtract, or on those binary machines without guard 11703 digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or 11704 Cray-2. It could conceivably fail on hexadecimal or decimal machines 11705 without guard digits, but we know of none. 11706 11707 Arguments 11708 ========= 11709 11710 JOBZ (input) CHARACTER*1 11711 = 'N': Compute eigenvalues only; 11712 = 'V': Compute eigenvalues and eigenvectors. 11713 11714 N (input) INTEGER 11715 The order of the matrix. N >= 0. 11716 11717 D (input/output) REAL array, dimension (N) 11718 On entry, the n diagonal elements of the tridiagonal matrix 11719 A. 11720 On exit, if INFO = 0, the eigenvalues in ascending order. 11721 11722 E (input/output) REAL array, dimension (N) 11723 On entry, the (n-1) subdiagonal elements of the tridiagonal 11724 matrix A, stored in elements 1 to N-1 of E; E(N) need not 11725 be set, but is used by the routine. 11726 On exit, the contents of E are destroyed. 11727 11728 Z (output) REAL array, dimension (LDZ, N) 11729 If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal 11730 eigenvectors of the matrix A, with the i-th column of Z 11731 holding the eigenvector associated with D(i). 11732 If JOBZ = 'N', then Z is not referenced. 11733 11734 LDZ (input) INTEGER 11735 The leading dimension of the array Z. LDZ >= 1, and if 11736 JOBZ = 'V', LDZ >= max(1,N). 11737 11738 WORK (workspace/output) REAL array, 11739 dimension (LWORK) 11740 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 11741 11742 LWORK (input) INTEGER 11743 The dimension of the array WORK. 11744 If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. 11745 If JOBZ = 'V' and N > 1 then LWORK must be at least 11746 ( 1 + 4*N + N**2 ). 11747 11748 If LWORK = -1, then a workspace query is assumed; the routine 11749 only calculates the optimal size of the WORK array, returns 11750 this value as the first entry of the WORK array, and no error 11751 message related to LWORK is issued by XERBLA. 11752 11753 IWORK (workspace/output) INTEGER array, dimension (LIWORK) 11754 On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. 11755 11756 LIWORK (input) INTEGER 11757 The dimension of the array IWORK. 11758 If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. 11759 If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. 11760 11761 If LIWORK = -1, then a workspace query is assumed; the 11762 routine only calculates the optimal size of the IWORK array, 11763 returns this value as the first entry of the IWORK array, and 11764 no error message related to LIWORK is issued by XERBLA. 11765 11766 INFO (output) INTEGER 11767 = 0: successful exit 11768 < 0: if INFO = -i, the i-th argument had an illegal value 11769 > 0: if INFO = i, the algorithm failed to converge; i 11770 off-diagonal elements of E did not converge to zero. 11771 11772 ===================================================================== 11773 11774 11775 Test the input parameters. 11776 11777 Parameter adjustments */ 11778 /* Table of constant values */ 11779 static integer c__1 = 1; 11780 11781 /* System generated locals */ 11782 integer z_dim1, z_offset, i__1; 11783 real r__1; 11784 /* Builtin functions */ 11785 // double sqrt(doublereal); 11786 /* Local variables */ 11787 static real rmin, rmax, tnrm, sigma; 11788 extern logical lsame_(const char *, const char *); 11789 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 11790 static integer lwmin; 11791 static logical wantz; 11792 static integer iscale; 11793 extern doublereal slamch_(const char *); 11794 static real safmin; 11795 extern /* Subroutine */ int xerbla_(const char *, integer *); 11796 static real bignum; 11797 extern /* Subroutine */ int sstedc_(const char *, integer *, real *, real *, 11798 real *, integer *, real *, integer *, integer *, integer *, 11799 integer *); 11800 static integer liwmin; 11801 extern doublereal slanst_(const char *, integer *, real *, real *); 11802 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); 11803 static real smlnum; 11804 static logical lquery; 11805 static real eps; 11806 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] 11807 11808 11809 --d__; 11810 --e; 11811 z_dim1 = *ldz; 11812 z_offset = 1 + z_dim1 * 1; 11813 z__ -= z_offset; 11814 --work; 11815 --iwork; 11816 11817 /* Function Body */ 11818 wantz = lsame_(jobz, "V"); 11819 lquery = *lwork == -1 || *liwork == -1; 11820 11821 *info = 0; 11822 liwmin = 1; 11823 lwmin = 1; 11824 if (*n > 1 && wantz) { 11825 /* Computing 2nd power */ 11826 i__1 = *n; 11827 lwmin = (*n << 2) + 1 + i__1 * i__1; 11828 liwmin = *n * 5 + 3; 11829 } 11830 11831 if (! (wantz || lsame_(jobz, "N"))) { 11832 *info = -1; 11833 } else if (*n < 0) { 11834 *info = -2; 11835 } else if (*ldz < 1 || wantz && *ldz < *n) { 11836 *info = -6; 11837 } else if (*lwork < lwmin && ! lquery) { 11838 *info = -8; 11839 } else if (*liwork < liwmin && ! lquery) { 11840 *info = -10; 11841 } 11842 11843 if (*info == 0) { 11844 work[1] = (real) lwmin; 11845 iwork[1] = liwmin; 11846 } 11847 11848 if (*info != 0) { 11849 i__1 = -(*info); 11850 xerbla_("SSTEVD", &i__1); 11851 return 0; 11852 } else if (lquery) { 11853 return 0; 11854 } 11855 11856 /* Quick return if possible */ 11857 11858 if (*n == 0) { 11859 return 0; 11860 } 11861 11862 if (*n == 1) { 11863 if (wantz) { 11864 z___ref(1, 1) = 1.f; 11865 } 11866 return 0; 11867 } 11868 11869 /* Get machine constants. */ 11870 11871 safmin = slamch_("Safe minimum"); 11872 eps = slamch_("Precision"); 11873 smlnum = safmin / eps; 11874 bignum = 1.f / smlnum; 11875 rmin = sqrt(smlnum); 11876 rmax = sqrt(bignum); 11877 11878 /* Scale matrix to allowable range, if necessary. */ 11879 11880 iscale = 0; 11881 tnrm = slanst_("M", n, &d__[1], &e[1]); 11882 if (tnrm > 0.f && tnrm < rmin) { 11883 iscale = 1; 11884 sigma = rmin / tnrm; 11885 } else if (tnrm > rmax) { 11886 iscale = 1; 11887 sigma = rmax / tnrm; 11888 } 11889 if (iscale == 1) { 11890 sscal_(n, &sigma, &d__[1], &c__1); 11891 i__1 = *n - 1; 11892 sscal_(&i__1, &sigma, &e[1], &c__1); 11893 } 11894 11895 /* For eigenvalues only, call SSTERF. For eigenvalues and 11896 eigenvectors, call SSTEDC. */ 11897 11898 if (! wantz) { 11899 ssterf_(n, &d__[1], &e[1], info); 11900 } else { 11901 sstedc_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], lwork, 11902 &iwork[1], liwork, info); 11903 } 11904 11905 /* If matrix was scaled, then rescale eigenvalues appropriately. */ 11906 11907 if (iscale == 1) { 11908 r__1 = 1.f / sigma; 11909 sscal_(n, &r__1, &d__[1], &c__1); 11910 } 11911 11912 work[1] = (real) lwmin; 11913 iwork[1] = liwmin; 11914 11915 return 0; 11916 11917 /* End of SSTEVD */ 11918 11919 } /* sstevd_ */
Definition at line 8788 of file lapackblas.cpp.
References integer.
Referenced by sbdsqr_(), sstedc_(), and ssteqr_().
08790 { 08791 /* System generated locals */ 08792 integer i__1; 08793 /* Local variables */ 08794 static integer i__, m; 08795 static real stemp; 08796 static integer ix, iy, mp1; 08797 /* interchanges two vectors. 08798 uses unrolled loops for increments equal to 1. 08799 jack dongarra, linpack, 3/11/78. 08800 modified 12/3/93, array(1) declarations changed to array(*) 08801 Parameter adjustments */ 08802 --sy; 08803 --sx; 08804 /* Function Body */ 08805 if (*n <= 0) { 08806 return 0; 08807 } 08808 if (*incx == 1 && *incy == 1) { 08809 goto L20; 08810 } 08811 /* code for unequal increments or equal increments not equal 08812 to 1 */ 08813 ix = 1; 08814 iy = 1; 08815 if (*incx < 0) { 08816 ix = (-(*n) + 1) * *incx + 1; 08817 } 08818 if (*incy < 0) { 08819 iy = (-(*n) + 1) * *incy + 1; 08820 } 08821 i__1 = *n; 08822 for (i__ = 1; i__ <= i__1; ++i__) { 08823 stemp = sx[ix]; 08824 sx[ix] = sy[iy]; 08825 sy[iy] = stemp; 08826 ix += *incx; 08827 iy += *incy; 08828 /* L10: */ 08829 } 08830 return 0; 08831 /* code for both increments equal to 1 08832 clean-up loop */ 08833 L20: 08834 m = *n % 3; 08835 if (m == 0) { 08836 goto L40; 08837 } 08838 i__1 = m; 08839 for (i__ = 1; i__ <= i__1; ++i__) { 08840 stemp = sx[i__]; 08841 sx[i__] = sy[i__]; 08842 sy[i__] = stemp; 08843 /* L30: */ 08844 } 08845 if (*n < 3) { 08846 return 0; 08847 } 08848 L40: 08849 mp1 = m + 1; 08850 i__1 = *n; 08851 for (i__ = mp1; i__ <= i__1; i__ += 3) { 08852 stemp = sx[i__]; 08853 sx[i__] = sy[i__]; 08854 sy[i__] = stemp; 08855 stemp = sx[i__ + 1]; 08856 sx[i__ + 1] = sy[i__ + 1]; 08857 sy[i__ + 1] = stemp; 08858 stemp = sx[i__ + 2]; 08859 sx[i__ + 2] = sy[i__ + 2]; 08860 sy[i__ + 2] = stemp; 08861 /* L50: */ 08862 } 08863 return 0; 08864 } /* sswap_ */
int ssyev_ | ( | char * | jobz, | |
char * | uplo, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | w, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 8869 of file lapackblas.cpp.
References a_ref, c__0, c__1, c_n1, f2cmax, ilaenv_(), integer, lsame_(), slamch_(), slansy_(), slascl_(), sorgtr_(), sqrt(), sscal_(), ssteqr_(), ssterf_(), ssytrd_(), and xerbla_().
Referenced by EMAN::Util::coveig().
08871 { 08872 /* -- LAPACK driver routine (version 3.0) -- 08873 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 08874 Courant Institute, Argonne National Lab, and Rice University 08875 June 30, 1999 08876 08877 08878 Purpose 08879 ======= 08880 08881 SSYEV computes all eigenvalues and, optionally, eigenvectors of a 08882 real symmetric matrix A. 08883 08884 Arguments 08885 ========= 08886 08887 JOBZ (input) CHARACTER*1 08888 = 'N': Compute eigenvalues only; 08889 = 'V': Compute eigenvalues and eigenvectors. 08890 08891 UPLO (input) CHARACTER*1 08892 = 'U': Upper triangle of A is stored; 08893 = 'L': Lower triangle of A is stored. 08894 08895 N (input) INTEGER 08896 The order of the matrix A. N >= 0. 08897 08898 A (input/output) REAL array, dimension (LDA, N) 08899 On entry, the symmetric matrix A. If UPLO = 'U', the 08900 leading N-by-N upper triangular part of A contains the 08901 upper triangular part of the matrix A. If UPLO = 'L', 08902 the leading N-by-N lower triangular part of A contains 08903 the lower triangular part of the matrix A. 08904 On exit, if JOBZ = 'V', then if INFO = 0, A contains the 08905 orthonormal eigenvectors of the matrix A. 08906 If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') 08907 or the upper triangle (if UPLO='U') of A, including the 08908 diagonal, is destroyed. 08909 08910 LDA (input) INTEGER 08911 The leading dimension of the array A. LDA >= f2cmax(1,N). 08912 08913 W (output) REAL array, dimension (N) 08914 If INFO = 0, the eigenvalues in ascending order. 08915 08916 WORK (workspace/output) REAL array, dimension (LWORK) 08917 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 08918 08919 LWORK (input) INTEGER 08920 The length of the array WORK. LWORK >= f2cmax(1,3*N-1). 08921 For optimal efficiency, LWORK >= (NB+2)*N, 08922 where NB is the blocksize for SSYTRD returned by ILAENV. 08923 08924 If LWORK = -1, then a workspace query is assumed; the routine 08925 only calculates the optimal size of the WORK array, returns 08926 this value as the first entry of the WORK array, and no error 08927 message related to LWORK is issued by XERBLA. 08928 08929 INFO (output) INTEGER 08930 = 0: successful exit 08931 < 0: if INFO = -i, the i-th argument had an illegal value 08932 > 0: if INFO = i, the algorithm failed to converge; i 08933 off-diagonal elements of an intermediate tridiagonal 08934 form did not converge to zero. 08935 08936 ===================================================================== 08937 08938 08939 Test the input parameters. 08940 08941 Parameter adjustments */ 08942 /* Table of constant values */ 08943 static integer c__1 = 1; 08944 static integer c_n1 = -1; 08945 static integer c__0 = 0; 08946 static real c_b17 = 1.f; 08947 08948 /* System generated locals */ 08949 integer a_dim1, a_offset, i__1, i__2; 08950 real r__1; 08951 /* Builtin functions */ 08952 // double sqrt(doublereal); 08953 /* Local variables */ 08954 static integer inde; 08955 static real anrm; 08956 static integer imax; 08957 static real rmin, rmax; 08958 static integer lopt; 08959 static real sigma; 08960 extern logical lsame_(const char *, const char *); 08961 static integer iinfo; 08962 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); 08963 static logical lower, wantz; 08964 static integer nb, iscale; 08965 extern doublereal slamch_(const char *); 08966 static real safmin; 08967 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 08968 integer *, integer *, ftnlen, ftnlen); 08969 extern /* Subroutine */ int xerbla_(const char *, integer *); 08970 static real bignum; 08971 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *, 08972 real *, integer *, integer *, real *, integer *, integer *); 08973 static integer indtau, indwrk; 08974 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); 08975 extern doublereal slansy_(const char *, char *, integer *, real *, integer *, 08976 real *); 08977 static integer llwork; 08978 static real smlnum; 08979 static integer lwkopt; 08980 static logical lquery; 08981 extern /* Subroutine */ int sorgtr_(char *, integer *, real *, integer *, 08982 real *, real *, integer *, integer *), ssteqr_(const char *, 08983 integer *, real *, real *, real *, integer *, real *, integer *), ssytrd_(char *, integer *, real *, integer *, real *, 08984 real *, real *, real *, integer *, integer *); 08985 static real eps; 08986 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 08987 08988 08989 a_dim1 = *lda; 08990 a_offset = 1 + a_dim1 * 1; 08991 a -= a_offset; 08992 --w; 08993 --work; 08994 08995 /* Function Body */ 08996 wantz = lsame_(jobz, "V"); 08997 lower = lsame_(uplo, "L"); 08998 lquery = *lwork == -1; 08999 09000 *info = 0; 09001 if (! (wantz || lsame_(jobz, "N"))) { 09002 *info = -1; 09003 } else if (! (lower || lsame_(uplo, "U"))) { 09004 *info = -2; 09005 } else if (*n < 0) { 09006 *info = -3; 09007 } else if (*lda < f2cmax(1,*n)) { 09008 *info = -5; 09009 } else /* if(complicated condition) */ { 09010 /* Computing MAX */ 09011 i__1 = 1, i__2 = *n * 3 - 1; 09012 if (*lwork < f2cmax(i__1,i__2) && ! lquery) { 09013 *info = -8; 09014 } 09015 } 09016 09017 if (*info == 0) { 09018 nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, 09019 (ftnlen)1); 09020 /* Computing MAX */ 09021 i__1 = 1, i__2 = (nb + 2) * *n; 09022 lwkopt = f2cmax(i__1,i__2); 09023 work[1] = (real) lwkopt; 09024 } 09025 09026 if (*info != 0) { 09027 i__1 = -(*info); 09028 xerbla_("SSYEV ", &i__1); 09029 return 0; 09030 } else if (lquery) { 09031 return 0; 09032 } 09033 09034 /* Quick return if possible */ 09035 09036 if (*n == 0) { 09037 work[1] = 1.f; 09038 return 0; 09039 } 09040 09041 if (*n == 1) { 09042 w[1] = a_ref(1, 1); 09043 work[1] = 3.f; 09044 if (wantz) { 09045 a_ref(1, 1) = 1.f; 09046 } 09047 return 0; 09048 } 09049 09050 /* Get machine constants. */ 09051 09052 safmin = slamch_("Safe minimum"); 09053 eps = slamch_("Precision"); 09054 smlnum = safmin / eps; 09055 bignum = 1.f / smlnum; 09056 rmin = sqrt(smlnum); 09057 rmax = sqrt(bignum); 09058 09059 /* Scale matrix to allowable range, if necessary. */ 09060 09061 anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]); 09062 iscale = 0; 09063 if (anrm > 0.f && anrm < rmin) { 09064 iscale = 1; 09065 sigma = rmin / anrm; 09066 } else if (anrm > rmax) { 09067 iscale = 1; 09068 sigma = rmax / anrm; 09069 } 09070 if (iscale == 1) { 09071 slascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, 09072 info); 09073 } 09074 09075 /* Call SSYTRD to reduce symmetric matrix to tridiagonal form. */ 09076 09077 inde = 1; 09078 indtau = inde + *n; 09079 indwrk = indtau + *n; 09080 llwork = *lwork - indwrk + 1; 09081 ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & 09082 work[indwrk], &llwork, &iinfo); 09083 lopt = static_cast<integer>( (*n << 1) + work[indwrk] ); 09084 09085 /* For eigenvalues only, call SSTERF. For eigenvectors, first call 09086 SORGTR to generate the orthogonal matrix, then call SSTEQR. */ 09087 09088 if (! wantz) { 09089 ssterf_(n, &w[1], &work[inde], info); 09090 } else { 09091 sorgtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & 09092 llwork, &iinfo); 09093 ssteqr_(jobz, n, &w[1], &work[inde], &a[a_offset], lda, &work[indtau], 09094 info); 09095 } 09096 09097 /* If matrix was scaled, then rescale eigenvalues appropriately. */ 09098 09099 if (iscale == 1) { 09100 if (*info == 0) { 09101 imax = *n; 09102 } else { 09103 imax = *info - 1; 09104 } 09105 r__1 = 1.f / sigma; 09106 sscal_(&imax, &r__1, &w[1], &c__1); 09107 } 09108 09109 /* Set WORK(1) to optimal workspace size. */ 09110 09111 work[1] = (real) lwkopt; 09112 09113 return 0; 09114 09115 /* End of SSYEV */ 09116 09117 } /* ssyev_ */
int ssymv_ | ( | const char * | uplo, | |
integer * | n, | |||
real * | alpha, | |||
real * | a, | |||
integer * | lda, | |||
real * | x, | |||
integer * | incx, | |||
real * | beta, | |||
real * | y, | |||
integer * | incy | |||
) |
Definition at line 9125 of file lapackblas.cpp.
References a_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by slatrd_(), and ssytd2_().
09128 { 09129 /* System generated locals */ 09130 integer a_dim1, a_offset, i__1, i__2; 09131 /* Local variables */ 09132 static integer info; 09133 static real temp1, temp2; 09134 static integer i__, j; 09135 extern logical lsame_(const char *, const char *); 09136 static integer ix, iy, jx, jy, kx, ky; 09137 extern /* Subroutine */ int xerbla_(const char *, integer *); 09138 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 09139 /* Purpose 09140 ======= 09141 SSYMV performs the matrix-vector operation 09142 y := alpha*A*x + beta*y, 09143 where alpha and beta are scalars, x and y are n element vectors and 09144 A is an n by n symmetric matrix. 09145 Parameters 09146 ========== 09147 UPLO - CHARACTER*1. 09148 On entry, UPLO specifies whether the upper or lower 09149 triangular part of the array A is to be referenced as 09150 follows: 09151 UPLO = 'U' or 'u' Only the upper triangular part of A 09152 is to be referenced. 09153 UPLO = 'L' or 'l' Only the lower triangular part of A 09154 is to be referenced. 09155 Unchanged on exit. 09156 N - INTEGER. 09157 On entry, N specifies the order of the matrix A. 09158 N must be at least zero. 09159 Unchanged on exit. 09160 ALPHA - REAL . 09161 On entry, ALPHA specifies the scalar alpha. 09162 Unchanged on exit. 09163 A - REAL array of DIMENSION ( LDA, n ). 09164 Before entry with UPLO = 'U' or 'u', the leading n by n 09165 upper triangular part of the array A must contain the upper 09166 triangular part of the symmetric matrix and the strictly 09167 lower triangular part of A is not referenced. 09168 Before entry with UPLO = 'L' or 'l', the leading n by n 09169 lower triangular part of the array A must contain the lower 09170 triangular part of the symmetric matrix and the strictly 09171 upper triangular part of A is not referenced. 09172 Unchanged on exit. 09173 LDA - INTEGER. 09174 On entry, LDA specifies the first dimension of A as declared 09175 in the calling (sub) program. LDA must be at least 09176 f2cmax( 1, n ). 09177 Unchanged on exit. 09178 X - REAL array of dimension at least 09179 ( 1 + ( n - 1 )*abs( INCX ) ). 09180 Before entry, the incremented array X must contain the n 09181 element vector x. 09182 Unchanged on exit. 09183 INCX - INTEGER. 09184 On entry, INCX specifies the increment for the elements of 09185 X. INCX must not be zero. 09186 Unchanged on exit. 09187 BETA - REAL . 09188 On entry, BETA specifies the scalar beta. When BETA is 09189 supplied as zero then Y need not be set on input. 09190 Unchanged on exit. 09191 Y - REAL array of dimension at least 09192 ( 1 + ( n - 1 )*abs( INCY ) ). 09193 Before entry, the incremented array Y must contain the n 09194 element vector y. On exit, Y is overwritten by the updated 09195 vector y. 09196 INCY - INTEGER. 09197 On entry, INCY specifies the increment for the elements of 09198 Y. INCY must not be zero. 09199 Unchanged on exit. 09200 Level 2 Blas routine. 09201 -- Written on 22-October-1986. 09202 Jack Dongarra, Argonne National Lab. 09203 Jeremy Du Croz, Nag Central Office. 09204 Sven Hammarling, Nag Central Office. 09205 Richard Hanson, Sandia National Labs. 09206 Test the input parameters. 09207 Parameter adjustments */ 09208 a_dim1 = *lda; 09209 a_offset = 1 + a_dim1 * 1; 09210 a -= a_offset; 09211 --x; 09212 --y; 09213 /* Function Body */ 09214 info = 0; 09215 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { 09216 info = 1; 09217 } else if (*n < 0) { 09218 info = 2; 09219 } else if (*lda < f2cmax(1,*n)) { 09220 info = 5; 09221 } else if (*incx == 0) { 09222 info = 7; 09223 } else if (*incy == 0) { 09224 info = 10; 09225 } 09226 if (info != 0) { 09227 xerbla_("SSYMV ", &info); 09228 return 0; 09229 } 09230 /* Quick return if possible. */ 09231 if (*n == 0 || *alpha == 0.f && *beta == 1.f) { 09232 return 0; 09233 } 09234 /* Set up the start points in X and Y. */ 09235 if (*incx > 0) { 09236 kx = 1; 09237 } else { 09238 kx = 1 - (*n - 1) * *incx; 09239 } 09240 if (*incy > 0) { 09241 ky = 1; 09242 } else { 09243 ky = 1 - (*n - 1) * *incy; 09244 } 09245 /* Start the operations. In this version the elements of A are 09246 accessed sequentially with one pass through the triangular part 09247 of A. 09248 First form y := beta*y. */ 09249 if (*beta != 1.f) { 09250 if (*incy == 1) { 09251 if (*beta == 0.f) { 09252 i__1 = *n; 09253 for (i__ = 1; i__ <= i__1; ++i__) { 09254 y[i__] = 0.f; 09255 /* L10: */ 09256 } 09257 } else { 09258 i__1 = *n; 09259 for (i__ = 1; i__ <= i__1; ++i__) { 09260 y[i__] = *beta * y[i__]; 09261 /* L20: */ 09262 } 09263 } 09264 } else { 09265 iy = ky; 09266 if (*beta == 0.f) { 09267 i__1 = *n; 09268 for (i__ = 1; i__ <= i__1; ++i__) { 09269 y[iy] = 0.f; 09270 iy += *incy; 09271 /* L30: */ 09272 } 09273 } else { 09274 i__1 = *n; 09275 for (i__ = 1; i__ <= i__1; ++i__) { 09276 y[iy] = *beta * y[iy]; 09277 iy += *incy; 09278 /* L40: */ 09279 } 09280 } 09281 } 09282 } 09283 if (*alpha == 0.f) { 09284 return 0; 09285 } 09286 if (lsame_(uplo, "U")) { 09287 /* Form y when A is stored in upper triangle. */ 09288 if (*incx == 1 && *incy == 1) { 09289 i__1 = *n; 09290 for (j = 1; j <= i__1; ++j) { 09291 temp1 = *alpha * x[j]; 09292 temp2 = 0.f; 09293 i__2 = j - 1; 09294 for (i__ = 1; i__ <= i__2; ++i__) { 09295 y[i__] += temp1 * a_ref(i__, j); 09296 temp2 += a_ref(i__, j) * x[i__]; 09297 /* L50: */ 09298 } 09299 y[j] = y[j] + temp1 * a_ref(j, j) + *alpha * temp2; 09300 /* L60: */ 09301 } 09302 } else { 09303 jx = kx; 09304 jy = ky; 09305 i__1 = *n; 09306 for (j = 1; j <= i__1; ++j) { 09307 temp1 = *alpha * x[jx]; 09308 temp2 = 0.f; 09309 ix = kx; 09310 iy = ky; 09311 i__2 = j - 1; 09312 for (i__ = 1; i__ <= i__2; ++i__) { 09313 y[iy] += temp1 * a_ref(i__, j); 09314 temp2 += a_ref(i__, j) * x[ix]; 09315 ix += *incx; 09316 iy += *incy; 09317 /* L70: */ 09318 } 09319 y[jy] = y[jy] + temp1 * a_ref(j, j) + *alpha * temp2; 09320 jx += *incx; 09321 jy += *incy; 09322 /* L80: */ 09323 } 09324 } 09325 } else { 09326 /* Form y when A is stored in lower triangle. */ 09327 if (*incx == 1 && *incy == 1) { 09328 i__1 = *n; 09329 for (j = 1; j <= i__1; ++j) { 09330 temp1 = *alpha * x[j]; 09331 temp2 = 0.f; 09332 y[j] += temp1 * a_ref(j, j); 09333 i__2 = *n; 09334 for (i__ = j + 1; i__ <= i__2; ++i__) { 09335 y[i__] += temp1 * a_ref(i__, j); 09336 temp2 += a_ref(i__, j) * x[i__]; 09337 /* L90: */ 09338 } 09339 y[j] += *alpha * temp2; 09340 /* L100: */ 09341 } 09342 } else { 09343 jx = kx; 09344 jy = ky; 09345 i__1 = *n; 09346 for (j = 1; j <= i__1; ++j) { 09347 temp1 = *alpha * x[jx]; 09348 temp2 = 0.f; 09349 y[jy] += temp1 * a_ref(j, j); 09350 ix = jx; 09351 iy = jy; 09352 i__2 = *n; 09353 for (i__ = j + 1; i__ <= i__2; ++i__) { 09354 ix += *incx; 09355 iy += *incy; 09356 y[iy] += temp1 * a_ref(i__, j); 09357 temp2 += a_ref(i__, j) * x[ix]; 09358 /* L110: */ 09359 } 09360 y[jy] += *alpha * temp2; 09361 jx += *incx; 09362 jy += *incy; 09363 /* L120: */ 09364 } 09365 } 09366 } 09367 return 0; 09368 /* End of SSYMV . */ 09369 } /* ssymv_ */
int ssyr2_ | ( | char * | uplo, | |
integer * | n, | |||
real * | alpha, | |||
real * | x, | |||
integer * | incx, | |||
real * | y, | |||
integer * | incy, | |||
real * | a, | |||
integer * | lda | |||
) |
Definition at line 9375 of file lapackblas.cpp.
References a_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by ssytd2_().
09377 { 09378 /* System generated locals */ 09379 integer a_dim1, a_offset, i__1, i__2; 09380 /* Local variables */ 09381 static integer info; 09382 static real temp1, temp2; 09383 static integer i__, j; 09384 extern logical lsame_(const char *, const char *); 09385 static integer ix, iy, jx, jy, kx, ky; 09386 extern /* Subroutine */ int xerbla_(const char *, integer *); 09387 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 09388 /* Purpose 09389 ======= 09390 SSYR2 performs the symmetric rank 2 operation 09391 A := alpha*x*y' + alpha*y*x' + A, 09392 where alpha is a scalar, x and y are n element vectors and A is an n 09393 by n symmetric matrix. 09394 Parameters 09395 ========== 09396 UPLO - CHARACTER*1. 09397 On entry, UPLO specifies whether the upper or lower 09398 triangular part of the array A is to be referenced as 09399 follows: 09400 UPLO = 'U' or 'u' Only the upper triangular part of A 09401 is to be referenced. 09402 UPLO = 'L' or 'l' Only the lower triangular part of A 09403 is to be referenced. 09404 Unchanged on exit. 09405 N - INTEGER. 09406 On entry, N specifies the order of the matrix A. 09407 N must be at least zero. 09408 Unchanged on exit. 09409 ALPHA - REAL . 09410 On entry, ALPHA specifies the scalar alpha. 09411 Unchanged on exit. 09412 X - REAL array of dimension at least 09413 ( 1 + ( n - 1 )*abs( INCX ) ). 09414 Before entry, the incremented array X must contain the n 09415 element vector x. 09416 Unchanged on exit. 09417 INCX - INTEGER. 09418 On entry, INCX specifies the increment for the elements of 09419 X. INCX must not be zero. 09420 Unchanged on exit. 09421 Y - REAL array of dimension at least 09422 ( 1 + ( n - 1 )*abs( INCY ) ). 09423 Before entry, the incremented array Y must contain the n 09424 element vector y. 09425 Unchanged on exit. 09426 INCY - INTEGER. 09427 On entry, INCY specifies the increment for the elements of 09428 Y. INCY must not be zero. 09429 Unchanged on exit. 09430 A - REAL array of DIMENSION ( LDA, n ). 09431 Before entry with UPLO = 'U' or 'u', the leading n by n 09432 upper triangular part of the array A must contain the upper 09433 triangular part of the symmetric matrix and the strictly 09434 lower triangular part of A is not referenced. On exit, the 09435 upper triangular part of the array A is overwritten by the 09436 upper triangular part of the updated matrix. 09437 Before entry with UPLO = 'L' or 'l', the leading n by n 09438 lower triangular part of the array A must contain the lower 09439 triangular part of the symmetric matrix and the strictly 09440 upper triangular part of A is not referenced. On exit, the 09441 lower triangular part of the array A is overwritten by the 09442 lower triangular part of the updated matrix. 09443 LDA - INTEGER. 09444 On entry, LDA specifies the first dimension of A as declared 09445 in the calling (sub) program. LDA must be at least 09446 f2cmax( 1, n ). 09447 Unchanged on exit. 09448 Level 2 Blas routine. 09449 -- Written on 22-October-1986. 09450 Jack Dongarra, Argonne National Lab. 09451 Jeremy Du Croz, Nag Central Office. 09452 Sven Hammarling, Nag Central Office. 09453 Richard Hanson, Sandia National Labs. 09454 Test the input parameters. 09455 Parameter adjustments */ 09456 --x; 09457 --y; 09458 a_dim1 = *lda; 09459 a_offset = 1 + a_dim1 * 1; 09460 a -= a_offset; 09461 /* Function Body */ 09462 info = 0; 09463 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { 09464 info = 1; 09465 } else if (*n < 0) { 09466 info = 2; 09467 } else if (*incx == 0) { 09468 info = 5; 09469 } else if (*incy == 0) { 09470 info = 7; 09471 } else if (*lda < f2cmax(1,*n)) { 09472 info = 9; 09473 } 09474 if (info != 0) { 09475 xerbla_("SSYR2 ", &info); 09476 return 0; 09477 } 09478 /* Quick return if possible. */ 09479 if (*n == 0 || *alpha == 0.f) { 09480 return 0; 09481 } 09482 /* Set up the start points in X and Y if the increments are not both 09483 unity. */ 09484 if (*incx != 1 || *incy != 1) { 09485 if (*incx > 0) { 09486 kx = 1; 09487 } else { 09488 kx = 1 - (*n - 1) * *incx; 09489 } 09490 if (*incy > 0) { 09491 ky = 1; 09492 } else { 09493 ky = 1 - (*n - 1) * *incy; 09494 } 09495 jx = kx; 09496 jy = ky; 09497 } 09498 /* Start the operations. In this version the elements of A are 09499 accessed sequentially with one pass through the triangular part 09500 of A. */ 09501 if (lsame_(uplo, "U")) { 09502 /* Form A when A is stored in the upper triangle. */ 09503 if (*incx == 1 && *incy == 1) { 09504 i__1 = *n; 09505 for (j = 1; j <= i__1; ++j) { 09506 if (x[j] != 0.f || y[j] != 0.f) { 09507 temp1 = *alpha * y[j]; 09508 temp2 = *alpha * x[j]; 09509 i__2 = j; 09510 for (i__ = 1; i__ <= i__2; ++i__) { 09511 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[ 09512 i__] * temp2; 09513 /* L10: */ 09514 } 09515 } 09516 /* L20: */ 09517 } 09518 } else { 09519 i__1 = *n; 09520 for (j = 1; j <= i__1; ++j) { 09521 if (x[jx] != 0.f || y[jy] != 0.f) { 09522 temp1 = *alpha * y[jy]; 09523 temp2 = *alpha * x[jx]; 09524 ix = kx; 09525 iy = ky; 09526 i__2 = j; 09527 for (i__ = 1; i__ <= i__2; ++i__) { 09528 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] 09529 * temp2; 09530 ix += *incx; 09531 iy += *incy; 09532 /* L30: */ 09533 } 09534 } 09535 jx += *incx; 09536 jy += *incy; 09537 /* L40: */ 09538 } 09539 } 09540 } else { 09541 /* Form A when A is stored in the lower triangle. */ 09542 if (*incx == 1 && *incy == 1) { 09543 i__1 = *n; 09544 for (j = 1; j <= i__1; ++j) { 09545 if (x[j] != 0.f || y[j] != 0.f) { 09546 temp1 = *alpha * y[j]; 09547 temp2 = *alpha * x[j]; 09548 i__2 = *n; 09549 for (i__ = j; i__ <= i__2; ++i__) { 09550 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[ 09551 i__] * temp2; 09552 /* L50: */ 09553 } 09554 } 09555 /* L60: */ 09556 } 09557 } else { 09558 i__1 = *n; 09559 for (j = 1; j <= i__1; ++j) { 09560 if (x[jx] != 0.f || y[jy] != 0.f) { 09561 temp1 = *alpha * y[jy]; 09562 temp2 = *alpha * x[jx]; 09563 ix = jx; 09564 iy = jy; 09565 i__2 = *n; 09566 for (i__ = j; i__ <= i__2; ++i__) { 09567 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] 09568 * temp2; 09569 ix += *incx; 09570 iy += *incy; 09571 /* L70: */ 09572 } 09573 } 09574 jx += *incx; 09575 jy += *incy; 09576 /* L80: */ 09577 } 09578 } 09579 } 09580 return 0; 09581 /* End of SSYR2 . */ 09582 } /* ssyr2_ */
int ssyr2k_ | ( | char * | uplo, | |
const char * | trans, | |||
integer * | n, | |||
integer * | k, | |||
real * | alpha, | |||
real * | a, | |||
integer * | lda, | |||
real * | b, | |||
integer * | ldb, | |||
real * | beta, | |||
real * | c__, | |||
integer * | ldc | |||
) |
Definition at line 9588 of file lapackblas.cpp.
References a_ref, b_ref, c___ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by ssytrd_().
09591 { 09592 /* System generated locals */ 09593 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 09594 i__3; 09595 /* Local variables */ 09596 static integer info; 09597 static real temp1, temp2; 09598 static integer i__, j, l; 09599 extern logical lsame_(const char *, const char *); 09600 static integer nrowa; 09601 static logical upper; 09602 extern /* Subroutine */ int xerbla_(const char *, integer *); 09603 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 09604 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 09605 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 09606 /* Purpose 09607 ======= 09608 SSYR2K performs one of the symmetric rank 2k operations 09609 C := alpha*A*B' + alpha*B*A' + beta*C, 09610 or 09611 C := alpha*A'*B + alpha*B'*A + beta*C, 09612 where alpha and beta are scalars, C is an n by n symmetric matrix 09613 and A and B are n by k matrices in the first case and k by n 09614 matrices in the second case. 09615 Parameters 09616 ========== 09617 UPLO - CHARACTER*1. 09618 On entry, UPLO specifies whether the upper or lower 09619 triangular part of the array C is to be referenced as 09620 follows: 09621 UPLO = 'U' or 'u' Only the upper triangular part of C 09622 is to be referenced. 09623 UPLO = 'L' or 'l' Only the lower triangular part of C 09624 is to be referenced. 09625 Unchanged on exit. 09626 TRANS - CHARACTER*1. 09627 On entry, TRANS specifies the operation to be performed as 09628 follows: 09629 TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + 09630 beta*C. 09631 TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + 09632 beta*C. 09633 TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + 09634 beta*C. 09635 Unchanged on exit. 09636 N - INTEGER. 09637 On entry, N specifies the order of the matrix C. N must be 09638 at least zero. 09639 Unchanged on exit. 09640 K - INTEGER. 09641 On entry with TRANS = 'N' or 'n', K specifies the number 09642 of columns of the matrices A and B, and on entry with 09643 TRANS = 'T' or 't' or 'C' or 'c', K specifies the number 09644 of rows of the matrices A and B. K must be at least zero. 09645 Unchanged on exit. 09646 ALPHA - REAL . 09647 On entry, ALPHA specifies the scalar alpha. 09648 Unchanged on exit. 09649 A - REAL array of DIMENSION ( LDA, ka ), where ka is 09650 k when TRANS = 'N' or 'n', and is n otherwise. 09651 Before entry with TRANS = 'N' or 'n', the leading n by k 09652 part of the array A must contain the matrix A, otherwise 09653 the leading k by n part of the array A must contain the 09654 matrix A. 09655 Unchanged on exit. 09656 LDA - INTEGER. 09657 On entry, LDA specifies the first dimension of A as declared 09658 in the calling (sub) program. When TRANS = 'N' or 'n' 09659 then LDA must be at least f2cmax( 1, n ), otherwise LDA must 09660 be at least f2cmax( 1, k ). 09661 Unchanged on exit. 09662 B - REAL array of DIMENSION ( LDB, kb ), where kb is 09663 k when TRANS = 'N' or 'n', and is n otherwise. 09664 Before entry with TRANS = 'N' or 'n', the leading n by k 09665 part of the array B must contain the matrix B, otherwise 09666 the leading k by n part of the array B must contain the 09667 matrix B. 09668 Unchanged on exit. 09669 LDB - INTEGER. 09670 On entry, LDB specifies the first dimension of B as declared 09671 in the calling (sub) program. When TRANS = 'N' or 'n' 09672 then LDB must be at least f2cmax( 1, n ), otherwise LDB must 09673 be at least f2cmax( 1, k ). 09674 Unchanged on exit. 09675 BETA - REAL . 09676 On entry, BETA specifies the scalar beta. 09677 Unchanged on exit. 09678 C - REAL array of DIMENSION ( LDC, n ). 09679 Before entry with UPLO = 'U' or 'u', the leading n by n 09680 upper triangular part of the array C must contain the upper 09681 triangular part of the symmetric matrix and the strictly 09682 lower triangular part of C is not referenced. On exit, the 09683 upper triangular part of the array C is overwritten by the 09684 upper triangular part of the updated matrix. 09685 Before entry with UPLO = 'L' or 'l', the leading n by n 09686 lower triangular part of the array C must contain the lower 09687 triangular part of the symmetric matrix and the strictly 09688 upper triangular part of C is not referenced. On exit, the 09689 lower triangular part of the array C is overwritten by the 09690 lower triangular part of the updated matrix. 09691 LDC - INTEGER. 09692 On entry, LDC specifies the first dimension of C as declared 09693 in the calling (sub) program. LDC must be at least 09694 f2cmax( 1, n ). 09695 Unchanged on exit. 09696 Level 3 Blas routine. 09697 -- Written on 8-February-1989. 09698 Jack Dongarra, Argonne National Laboratory. 09699 Iain Duff, AERE Harwell. 09700 Jeremy Du Croz, Numerical Algorithms Group Ltd. 09701 Sven Hammarling, Numerical Algorithms Group Ltd. 09702 Test the input parameters. 09703 Parameter adjustments */ 09704 a_dim1 = *lda; 09705 a_offset = 1 + a_dim1 * 1; 09706 a -= a_offset; 09707 b_dim1 = *ldb; 09708 b_offset = 1 + b_dim1 * 1; 09709 b -= b_offset; 09710 c_dim1 = *ldc; 09711 c_offset = 1 + c_dim1 * 1; 09712 c__ -= c_offset; 09713 /* Function Body */ 09714 if (lsame_(trans, "N")) { 09715 nrowa = *n; 09716 } else { 09717 nrowa = *k; 09718 } 09719 upper = lsame_(uplo, "U"); 09720 info = 0; 09721 if (! upper && ! lsame_(uplo, "L")) { 09722 info = 1; 09723 } else if (! lsame_(trans, "N") && ! lsame_(trans, 09724 "T") && ! lsame_(trans, "C")) { 09725 info = 2; 09726 } else if (*n < 0) { 09727 info = 3; 09728 } else if (*k < 0) { 09729 info = 4; 09730 } else if (*lda < f2cmax(1,nrowa)) { 09731 info = 7; 09732 } else if (*ldb < f2cmax(1,nrowa)) { 09733 info = 9; 09734 } else if (*ldc < f2cmax(1,*n)) { 09735 info = 12; 09736 } 09737 if (info != 0) { 09738 xerbla_("SSYR2K", &info); 09739 return 0; 09740 } 09741 /* Quick return if possible. */ 09742 if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { 09743 return 0; 09744 } 09745 /* And when alpha.eq.zero. */ 09746 if (*alpha == 0.f) { 09747 if (upper) { 09748 if (*beta == 0.f) { 09749 i__1 = *n; 09750 for (j = 1; j <= i__1; ++j) { 09751 i__2 = j; 09752 for (i__ = 1; i__ <= i__2; ++i__) { 09753 c___ref(i__, j) = 0.f; 09754 /* L10: */ 09755 } 09756 /* L20: */ 09757 } 09758 } else { 09759 i__1 = *n; 09760 for (j = 1; j <= i__1; ++j) { 09761 i__2 = j; 09762 for (i__ = 1; i__ <= i__2; ++i__) { 09763 c___ref(i__, j) = *beta * c___ref(i__, j); 09764 /* L30: */ 09765 } 09766 /* L40: */ 09767 } 09768 } 09769 } else { 09770 if (*beta == 0.f) { 09771 i__1 = *n; 09772 for (j = 1; j <= i__1; ++j) { 09773 i__2 = *n; 09774 for (i__ = j; i__ <= i__2; ++i__) { 09775 c___ref(i__, j) = 0.f; 09776 /* L50: */ 09777 } 09778 /* L60: */ 09779 } 09780 } else { 09781 i__1 = *n; 09782 for (j = 1; j <= i__1; ++j) { 09783 i__2 = *n; 09784 for (i__ = j; i__ <= i__2; ++i__) { 09785 c___ref(i__, j) = *beta * c___ref(i__, j); 09786 /* L70: */ 09787 } 09788 /* L80: */ 09789 } 09790 } 09791 } 09792 return 0; 09793 } 09794 /* Start the operations. */ 09795 if (lsame_(trans, "N")) { 09796 /* Form C := alpha*A*B' + alpha*B*A' + C. */ 09797 if (upper) { 09798 i__1 = *n; 09799 for (j = 1; j <= i__1; ++j) { 09800 if (*beta == 0.f) { 09801 i__2 = j; 09802 for (i__ = 1; i__ <= i__2; ++i__) { 09803 c___ref(i__, j) = 0.f; 09804 /* L90: */ 09805 } 09806 } else if (*beta != 1.f) { 09807 i__2 = j; 09808 for (i__ = 1; i__ <= i__2; ++i__) { 09809 c___ref(i__, j) = *beta * c___ref(i__, j); 09810 /* L100: */ 09811 } 09812 } 09813 i__2 = *k; 09814 for (l = 1; l <= i__2; ++l) { 09815 if (a_ref(j, l) != 0.f || b_ref(j, l) != 0.f) { 09816 temp1 = *alpha * b_ref(j, l); 09817 temp2 = *alpha * a_ref(j, l); 09818 i__3 = j; 09819 for (i__ = 1; i__ <= i__3; ++i__) { 09820 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 09821 * temp1 + b_ref(i__, l) * temp2; 09822 /* L110: */ 09823 } 09824 } 09825 /* L120: */ 09826 } 09827 /* L130: */ 09828 } 09829 } else { 09830 i__1 = *n; 09831 for (j = 1; j <= i__1; ++j) { 09832 if (*beta == 0.f) { 09833 i__2 = *n; 09834 for (i__ = j; i__ <= i__2; ++i__) { 09835 c___ref(i__, j) = 0.f; 09836 /* L140: */ 09837 } 09838 } else if (*beta != 1.f) { 09839 i__2 = *n; 09840 for (i__ = j; i__ <= i__2; ++i__) { 09841 c___ref(i__, j) = *beta * c___ref(i__, j); 09842 /* L150: */ 09843 } 09844 } 09845 i__2 = *k; 09846 for (l = 1; l <= i__2; ++l) { 09847 if (a_ref(j, l) != 0.f || b_ref(j, l) != 0.f) { 09848 temp1 = *alpha * b_ref(j, l); 09849 temp2 = *alpha * a_ref(j, l); 09850 i__3 = *n; 09851 for (i__ = j; i__ <= i__3; ++i__) { 09852 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 09853 * temp1 + b_ref(i__, l) * temp2; 09854 /* L160: */ 09855 } 09856 } 09857 /* L170: */ 09858 } 09859 /* L180: */ 09860 } 09861 } 09862 } else { 09863 /* Form C := alpha*A'*B + alpha*B'*A + C. */ 09864 if (upper) { 09865 i__1 = *n; 09866 for (j = 1; j <= i__1; ++j) { 09867 i__2 = j; 09868 for (i__ = 1; i__ <= i__2; ++i__) { 09869 temp1 = 0.f; 09870 temp2 = 0.f; 09871 i__3 = *k; 09872 for (l = 1; l <= i__3; ++l) { 09873 temp1 += a_ref(l, i__) * b_ref(l, j); 09874 temp2 += b_ref(l, i__) * a_ref(l, j); 09875 /* L190: */ 09876 } 09877 if (*beta == 0.f) { 09878 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2; 09879 } else { 09880 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 09881 temp1 + *alpha * temp2; 09882 } 09883 /* L200: */ 09884 } 09885 /* L210: */ 09886 } 09887 } else { 09888 i__1 = *n; 09889 for (j = 1; j <= i__1; ++j) { 09890 i__2 = *n; 09891 for (i__ = j; i__ <= i__2; ++i__) { 09892 temp1 = 0.f; 09893 temp2 = 0.f; 09894 i__3 = *k; 09895 for (l = 1; l <= i__3; ++l) { 09896 temp1 += a_ref(l, i__) * b_ref(l, j); 09897 temp2 += b_ref(l, i__) * a_ref(l, j); 09898 /* L220: */ 09899 } 09900 if (*beta == 0.f) { 09901 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2; 09902 } else { 09903 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 09904 temp1 + *alpha * temp2; 09905 } 09906 /* L230: */ 09907 } 09908 /* L240: */ 09909 } 09910 } 09911 } 09912 return 0; 09913 /* End of SSYR2K. */ 09914 } /* ssyr2k_ */
int ssytd2_ | ( | char * | uplo, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | d__, | |||
real * | e, | |||
real * | tau, | |||
integer * | info | |||
) |
Definition at line 9922 of file lapackblas.cpp.
References a_ref, c__1, f2cmax, f2cmin, integer, lsame_(), saxpy_(), sdot_(), slarfg_(), ssymv_(), ssyr2_(), and xerbla_().
Referenced by ssytrd_().
09924 { 09925 /* -- LAPACK routine (version 3.0) -- 09926 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 09927 Courant Institute, Argonne National Lab, and Rice University 09928 October 31, 1992 09929 09930 09931 Purpose 09932 ======= 09933 09934 SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal 09935 form T by an orthogonal similarity transformation: Q' * A * Q = T. 09936 09937 Arguments 09938 ========= 09939 09940 UPLO (input) CHARACTER*1 09941 Specifies whether the upper or lower triangular part of the 09942 symmetric matrix A is stored: 09943 = 'U': Upper triangular 09944 = 'L': Lower triangular 09945 09946 N (input) INTEGER 09947 The order of the matrix A. N >= 0. 09948 09949 A (input/output) REAL array, dimension (LDA,N) 09950 On entry, the symmetric matrix A. If UPLO = 'U', the leading 09951 n-by-n upper triangular part of A contains the upper 09952 triangular part of the matrix A, and the strictly lower 09953 triangular part of A is not referenced. If UPLO = 'L', the 09954 leading n-by-n lower triangular part of A contains the lower 09955 triangular part of the matrix A, and the strictly upper 09956 triangular part of A is not referenced. 09957 On exit, if UPLO = 'U', the diagonal and first superdiagonal 09958 of A are overwritten by the corresponding elements of the 09959 tridiagonal matrix T, and the elements above the first 09960 superdiagonal, with the array TAU, represent the orthogonal 09961 matrix Q as a product of elementary reflectors; if UPLO 09962 = 'L', the diagonal and first subdiagonal of A are over- 09963 written by the corresponding elements of the tridiagonal 09964 matrix T, and the elements below the first subdiagonal, with 09965 the array TAU, represent the orthogonal matrix Q as a product 09966 of elementary reflectors. See Further Details. 09967 09968 LDA (input) INTEGER 09969 The leading dimension of the array A. LDA >= f2cmax(1,N). 09970 09971 D (output) REAL array, dimension (N) 09972 The diagonal elements of the tridiagonal matrix T: 09973 D(i) = A(i,i). 09974 09975 E (output) REAL array, dimension (N-1) 09976 The off-diagonal elements of the tridiagonal matrix T: 09977 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. 09978 09979 TAU (output) REAL array, dimension (N-1) 09980 The scalar factors of the elementary reflectors (see Further 09981 Details). 09982 09983 INFO (output) INTEGER 09984 = 0: successful exit 09985 < 0: if INFO = -i, the i-th argument had an illegal value. 09986 09987 Further Details 09988 =============== 09989 09990 If UPLO = 'U', the matrix Q is represented as a product of elementary 09991 reflectors 09992 09993 Q = H(n-1) . . . H(2) H(1). 09994 09995 Each H(i) has the form 09996 09997 H(i) = I - tau * v * v' 09998 09999 where tau is a real scalar, and v is a real vector with 10000 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in 10001 A(1:i-1,i+1), and tau in TAU(i). 10002 10003 If UPLO = 'L', the matrix Q is represented as a product of elementary 10004 reflectors 10005 10006 Q = H(1) H(2) . . . H(n-1). 10007 10008 Each H(i) has the form 10009 10010 H(i) = I - tau * v * v' 10011 10012 where tau is a real scalar, and v is a real vector with 10013 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), 10014 and tau in TAU(i). 10015 10016 The contents of A on exit are illustrated by the following examples 10017 with n = 5: 10018 10019 if UPLO = 'U': if UPLO = 'L': 10020 10021 ( d e v2 v3 v4 ) ( d ) 10022 ( d e v3 v4 ) ( e d ) 10023 ( d e v4 ) ( v1 e d ) 10024 ( d e ) ( v1 v2 e d ) 10025 ( d ) ( v1 v2 v3 e d ) 10026 10027 where d and e denote diagonal and off-diagonal elements of T, and vi 10028 denotes an element of the vector defining H(i). 10029 10030 ===================================================================== 10031 10032 10033 Test the input parameters 10034 10035 Parameter adjustments */ 10036 /* Table of constant values */ 10037 static integer c__1 = 1; 10038 static real c_b8 = 0.f; 10039 static real c_b14 = -1.f; 10040 10041 /* System generated locals */ 10042 integer a_dim1, a_offset, i__1, i__2, i__3; 10043 /* Local variables */ 10044 static real taui; 10045 extern doublereal sdot_(integer *, real *, integer *, real *, integer *); 10046 static integer i__; 10047 extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, 10048 integer *, real *, integer *, real *, integer *); 10049 static real alpha; 10050 extern logical lsame_(const char *, const char *); 10051 static logical upper; 10052 extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 10053 real *, integer *), ssymv_(const char *, integer *, real *, real *, 10054 integer *, real *, integer *, real *, real *, integer *), 10055 xerbla_(const char *, integer *), slarfg_(integer *, real *, 10056 real *, integer *, real *); 10057 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 10058 10059 10060 a_dim1 = *lda; 10061 a_offset = 1 + a_dim1 * 1; 10062 a -= a_offset; 10063 --d__; 10064 --e; 10065 --tau; 10066 10067 /* Function Body */ 10068 *info = 0; 10069 upper = lsame_(uplo, "U"); 10070 if (! upper && ! lsame_(uplo, "L")) { 10071 *info = -1; 10072 } else if (*n < 0) { 10073 *info = -2; 10074 } else if (*lda < f2cmax(1,*n)) { 10075 *info = -4; 10076 } 10077 if (*info != 0) { 10078 i__1 = -(*info); 10079 xerbla_("SSYTD2", &i__1); 10080 return 0; 10081 } 10082 10083 /* Quick return if possible */ 10084 10085 if (*n <= 0) { 10086 return 0; 10087 } 10088 10089 if (upper) { 10090 10091 /* Reduce the upper triangle of A */ 10092 10093 for (i__ = *n - 1; i__ >= 1; --i__) { 10094 10095 /* Generate elementary reflector H(i) = I - tau * v * v' 10096 to annihilate A(1:i-1,i+1) */ 10097 10098 slarfg_(&i__, &a_ref(i__, i__ + 1), &a_ref(1, i__ + 1), &c__1, & 10099 taui); 10100 e[i__] = a_ref(i__, i__ + 1); 10101 10102 if (taui != 0.f) { 10103 10104 /* Apply H(i) from both sides to A(1:i,1:i) */ 10105 10106 a_ref(i__, i__ + 1) = 1.f; 10107 10108 /* Compute x := tau * A * v storing x in TAU(1:i) */ 10109 10110 ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a_ref(1, i__ + 10111 1), &c__1, &c_b8, &tau[1], &c__1); 10112 10113 /* Compute w := x - 1/2 * tau * (x'*v) * v */ 10114 10115 alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a_ref(1, 10116 i__ + 1), &c__1); 10117 saxpy_(&i__, &alpha, &a_ref(1, i__ + 1), &c__1, &tau[1], & 10118 c__1); 10119 10120 /* Apply the transformation as a rank-2 update: 10121 A := A - v * w' - w * v' */ 10122 10123 ssyr2_(uplo, &i__, &c_b14, &a_ref(1, i__ + 1), &c__1, &tau[1], 10124 &c__1, &a[a_offset], lda); 10125 10126 a_ref(i__, i__ + 1) = e[i__]; 10127 } 10128 d__[i__ + 1] = a_ref(i__ + 1, i__ + 1); 10129 tau[i__] = taui; 10130 /* L10: */ 10131 } 10132 d__[1] = a_ref(1, 1); 10133 } else { 10134 10135 /* Reduce the lower triangle of A */ 10136 10137 i__1 = *n - 1; 10138 for (i__ = 1; i__ <= i__1; ++i__) { 10139 10140 /* Generate elementary reflector H(i) = I - tau * v * v' 10141 to annihilate A(i+2:n,i) 10142 10143 Computing MIN */ 10144 i__2 = i__ + 2; 10145 i__3 = *n - i__; 10146 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*n), i__), & 10147 c__1, &taui); 10148 e[i__] = a_ref(i__ + 1, i__); 10149 10150 if (taui != 0.f) { 10151 10152 /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ 10153 10154 a_ref(i__ + 1, i__) = 1.f; 10155 10156 /* Compute x := tau * A * v storing y in TAU(i:n-1) */ 10157 10158 i__2 = *n - i__; 10159 ssymv_(uplo, &i__2, &taui, &a_ref(i__ + 1, i__ + 1), lda, & 10160 a_ref(i__ + 1, i__), &c__1, &c_b8, &tau[i__], &c__1); 10161 10162 /* Compute w := x - 1/2 * tau * (x'*v) * v */ 10163 10164 i__2 = *n - i__; 10165 alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a_ref( 10166 i__ + 1, i__), &c__1); 10167 i__2 = *n - i__; 10168 saxpy_(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &tau[i__], 10169 &c__1); 10170 10171 /* Apply the transformation as a rank-2 update: 10172 A := A - v * w' - w * v' */ 10173 10174 i__2 = *n - i__; 10175 ssyr2_(uplo, &i__2, &c_b14, &a_ref(i__ + 1, i__), &c__1, &tau[ 10176 i__], &c__1, &a_ref(i__ + 1, i__ + 1), lda) 10177 ; 10178 10179 a_ref(i__ + 1, i__) = e[i__]; 10180 } 10181 d__[i__] = a_ref(i__, i__); 10182 tau[i__] = taui; 10183 /* L20: */ 10184 } 10185 d__[*n] = a_ref(*n, *n); 10186 } 10187 10188 return 0; 10189 10190 /* End of SSYTD2 */ 10191 10192 } /* ssytd2_ */
int ssytrd_ | ( | char * | uplo, | |
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | d__, | |||
real * | e, | |||
real * | tau, | |||
real * | work, | |||
integer * | lwork, | |||
integer * | info | |||
) |
Definition at line 10200 of file lapackblas.cpp.
References a_ref, c__1, c__2, c__3, c_n1, f2cmax, ilaenv_(), integer, lsame_(), nx, slatrd_(), ssyr2k_(), ssytd2_(), and xerbla_().
Referenced by ssyev_().
10203 { 10204 /* -- LAPACK routine (version 3.0) -- 10205 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 10206 Courant Institute, Argonne National Lab, and Rice University 10207 June 30, 1999 10208 10209 10210 Purpose 10211 ======= 10212 10213 SSYTRD reduces a real symmetric matrix A to real symmetric 10214 tridiagonal form T by an orthogonal similarity transformation: 10215 Q**T * A * Q = T. 10216 10217 Arguments 10218 ========= 10219 10220 UPLO (input) CHARACTER*1 10221 = 'U': Upper triangle of A is stored; 10222 = 'L': Lower triangle of A is stored. 10223 10224 N (input) INTEGER 10225 The order of the matrix A. N >= 0. 10226 10227 A (input/output) REAL array, dimension (LDA,N) 10228 On entry, the symmetric matrix A. If UPLO = 'U', the leading 10229 N-by-N upper triangular part of A contains the upper 10230 triangular part of the matrix A, and the strictly lower 10231 triangular part of A is not referenced. If UPLO = 'L', the 10232 leading N-by-N lower triangular part of A contains the lower 10233 triangular part of the matrix A, and the strictly upper 10234 triangular part of A is not referenced. 10235 On exit, if UPLO = 'U', the diagonal and first superdiagonal 10236 of A are overwritten by the corresponding elements of the 10237 tridiagonal matrix T, and the elements above the first 10238 superdiagonal, with the array TAU, represent the orthogonal 10239 matrix Q as a product of elementary reflectors; if UPLO 10240 = 'L', the diagonal and first subdiagonal of A are over- 10241 written by the corresponding elements of the tridiagonal 10242 matrix T, and the elements below the first subdiagonal, with 10243 the array TAU, represent the orthogonal matrix Q as a product 10244 of elementary reflectors. See Further Details. 10245 10246 LDA (input) INTEGER 10247 The leading dimension of the array A. LDA >= f2cmax(1,N). 10248 10249 D (output) REAL array, dimension (N) 10250 The diagonal elements of the tridiagonal matrix T: 10251 D(i) = A(i,i). 10252 10253 E (output) REAL array, dimension (N-1) 10254 The off-diagonal elements of the tridiagonal matrix T: 10255 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. 10256 10257 TAU (output) REAL array, dimension (N-1) 10258 The scalar factors of the elementary reflectors (see Further 10259 Details). 10260 10261 WORK (workspace/output) REAL array, dimension (LWORK) 10262 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 10263 10264 LWORK (input) INTEGER 10265 The dimension of the array WORK. LWORK >= 1. 10266 For optimum performance LWORK >= N*NB, where NB is the 10267 optimal blocksize. 10268 10269 If LWORK = -1, then a workspace query is assumed; the routine 10270 only calculates the optimal size of the WORK array, returns 10271 this value as the first entry of the WORK array, and no error 10272 message related to LWORK is issued by XERBLA. 10273 10274 INFO (output) INTEGER 10275 = 0: successful exit 10276 < 0: if INFO = -i, the i-th argument had an illegal value 10277 10278 Further Details 10279 =============== 10280 10281 If UPLO = 'U', the matrix Q is represented as a product of elementary 10282 reflectors 10283 10284 Q = H(n-1) . . . H(2) H(1). 10285 10286 Each H(i) has the form 10287 10288 H(i) = I - tau * v * v' 10289 10290 where tau is a real scalar, and v is a real vector with 10291 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in 10292 A(1:i-1,i+1), and tau in TAU(i). 10293 10294 If UPLO = 'L', the matrix Q is represented as a product of elementary 10295 reflectors 10296 10297 Q = H(1) H(2) . . . H(n-1). 10298 10299 Each H(i) has the form 10300 10301 H(i) = I - tau * v * v' 10302 10303 where tau is a real scalar, and v is a real vector with 10304 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), 10305 and tau in TAU(i). 10306 10307 The contents of A on exit are illustrated by the following examples 10308 with n = 5: 10309 10310 if UPLO = 'U': if UPLO = 'L': 10311 10312 ( d e v2 v3 v4 ) ( d ) 10313 ( d e v3 v4 ) ( e d ) 10314 ( d e v4 ) ( v1 e d ) 10315 ( d e ) ( v1 v2 e d ) 10316 ( d ) ( v1 v2 v3 e d ) 10317 10318 where d and e denote diagonal and off-diagonal elements of T, and vi 10319 denotes an element of the vector defining H(i). 10320 10321 ===================================================================== 10322 10323 10324 Test the input parameters 10325 10326 Parameter adjustments */ 10327 /* Table of constant values */ 10328 static integer c__1 = 1; 10329 static integer c_n1 = -1; 10330 static integer c__3 = 3; 10331 static integer c__2 = 2; 10332 static real c_b22 = -1.f; 10333 static real c_b23 = 1.f; 10334 10335 /* System generated locals */ 10336 integer a_dim1, a_offset, i__1, i__2, i__3; 10337 /* Local variables */ 10338 static integer i__, j; 10339 extern logical lsame_(const char *, const char *); 10340 static integer nbmin, iinfo; 10341 static logical upper; 10342 static integer nb, kk; 10343 extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *, 10344 real *, real *, real *, integer *), ssyr2k_(char *, const char * 10345 , integer *, integer *, real *, real *, integer *, real *, 10346 integer *, real *, real *, integer *); 10347 static integer nx; 10348 extern /* Subroutine */ int xerbla_(const char *, integer *); 10349 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *, 10350 integer *, integer *, ftnlen, ftnlen); 10351 extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *, 10352 integer *, real *, real *, real *, integer *); 10353 static integer ldwork, lwkopt; 10354 static logical lquery; 10355 static integer iws; 10356 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 10357 10358 10359 a_dim1 = *lda; 10360 a_offset = 1 + a_dim1 * 1; 10361 a -= a_offset; 10362 --d__; 10363 --e; 10364 --tau; 10365 --work; 10366 10367 /* Function Body */ 10368 *info = 0; 10369 upper = lsame_(uplo, "U"); 10370 lquery = *lwork == -1; 10371 if (! upper && ! lsame_(uplo, "L")) { 10372 *info = -1; 10373 } else if (*n < 0) { 10374 *info = -2; 10375 } else if (*lda < f2cmax(1,*n)) { 10376 *info = -4; 10377 } else if (*lwork < 1 && ! lquery) { 10378 *info = -9; 10379 } 10380 10381 if (*info == 0) { 10382 10383 /* Determine the block size. */ 10384 10385 nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, 10386 (ftnlen)1); 10387 lwkopt = *n * nb; 10388 work[1] = (real) lwkopt; 10389 } 10390 10391 if (*info != 0) { 10392 i__1 = -(*info); 10393 xerbla_("SSYTRD", &i__1); 10394 return 0; 10395 } else if (lquery) { 10396 return 0; 10397 } 10398 10399 /* Quick return if possible */ 10400 10401 if (*n == 0) { 10402 work[1] = 1.f; 10403 return 0; 10404 } 10405 10406 nx = *n; 10407 iws = 1; 10408 if (nb > 1 && nb < *n) { 10409 10410 /* Determine when to cross over from blocked to unblocked code 10411 (last block is always handled by unblocked code). 10412 10413 Computing MAX */ 10414 i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, & 10415 c_n1, (ftnlen)6, (ftnlen)1); 10416 nx = f2cmax(i__1,i__2); 10417 if (nx < *n) { 10418 10419 /* Determine if workspace is large enough for blocked code. */ 10420 10421 ldwork = *n; 10422 iws = ldwork * nb; 10423 if (*lwork < iws) { 10424 10425 /* Not enough workspace to use optimal NB: determine the 10426 minimum value of NB, and reduce NB or force use of 10427 unblocked code by setting NX = N. 10428 10429 Computing MAX */ 10430 i__1 = *lwork / ldwork; 10431 nb = f2cmax(i__1,1); 10432 nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, 10433 (ftnlen)6, (ftnlen)1); 10434 if (nb < nbmin) { 10435 nx = *n; 10436 } 10437 } 10438 } else { 10439 nx = *n; 10440 } 10441 } else { 10442 nb = 1; 10443 } 10444 10445 if (upper) { 10446 10447 /* Reduce the upper triangle of A. 10448 Columns 1:kk are handled by the unblocked method. */ 10449 10450 kk = *n - (*n - nx + nb - 1) / nb * nb; 10451 i__1 = kk + 1; 10452 i__2 = -nb; 10453 for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 10454 i__2) { 10455 10456 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 10457 matrix W which is needed to update the unreduced part of 10458 the matrix */ 10459 10460 i__3 = i__ + nb - 1; 10461 slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & 10462 work[1], &ldwork); 10463 10464 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an 10465 update of the form: A := A - V*W' - W*V' */ 10466 10467 i__3 = i__ - 1; 10468 ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__), 10469 lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); 10470 10471 /* Copy superdiagonal elements back into A, and diagonal 10472 elements into D */ 10473 10474 i__3 = i__ + nb - 1; 10475 for (j = i__; j <= i__3; ++j) { 10476 a_ref(j - 1, j) = e[j - 1]; 10477 d__[j] = a_ref(j, j); 10478 /* L10: */ 10479 } 10480 /* L20: */ 10481 } 10482 10483 /* Use unblocked code to reduce the last or only block */ 10484 10485 ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); 10486 } else { 10487 10488 /* Reduce the lower triangle of A */ 10489 10490 i__2 = *n - nx; 10491 i__1 = nb; 10492 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { 10493 10494 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 10495 matrix W which is needed to update the unreduced part of 10496 the matrix */ 10497 10498 i__3 = *n - i__ + 1; 10499 slatrd_(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[ 10500 i__], &work[1], &ldwork); 10501 10502 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using 10503 an update of the form: A := A - V*W' - W*V' */ 10504 10505 i__3 = *n - i__ - nb + 1; 10506 ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb, 10507 i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ + 10508 nb, i__ + nb), lda); 10509 10510 /* Copy subdiagonal elements back into A, and diagonal 10511 elements into D */ 10512 10513 i__3 = i__ + nb - 1; 10514 for (j = i__; j <= i__3; ++j) { 10515 a_ref(j + 1, j) = e[j]; 10516 d__[j] = a_ref(j, j); 10517 /* L30: */ 10518 } 10519 /* L40: */ 10520 } 10521 10522 /* Use unblocked code to reduce the last or only block */ 10523 10524 i__1 = *n - i__ + 1; 10525 ssytd2_(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[ 10526 i__], &iinfo); 10527 } 10528 10529 work[1] = (real) lwkopt; 10530 return 0; 10531 10532 /* End of SSYTRD */ 10533 10534 } /* ssytrd_ */
int strmm_ | ( | const char * | side, | |
const char * | uplo, | |||
const char * | transa, | |||
const char * | diag, | |||
integer * | m, | |||
integer * | n, | |||
real * | alpha, | |||
real * | a, | |||
integer * | lda, | |||
real * | b, | |||
integer * | ldb | |||
) |
Definition at line 10542 of file lapackblas.cpp.
References a_ref, b_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by slarfb_().
10545 { 10546 /* System generated locals */ 10547 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; 10548 /* Local variables */ 10549 static integer info; 10550 static real temp; 10551 static integer i__, j, k; 10552 static logical lside; 10553 extern logical lsame_(const char *, const char *); 10554 static integer nrowa; 10555 static logical upper; 10556 extern /* Subroutine */ int xerbla_(const char *, integer *); 10557 static logical nounit; 10558 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 10559 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 10560 /* Purpose 10561 ======= 10562 STRMM performs one of the matrix-matrix operations 10563 B := alpha*op( A )*B, or B := alpha*B*op( A ), 10564 where alpha is a scalar, B is an m by n matrix, A is a unit, or 10565 non-unit, upper or lower triangular matrix and op( A ) is one of 10566 op( A ) = A or op( A ) = A'. 10567 Parameters 10568 ========== 10569 SIDE - CHARACTER*1. 10570 On entry, SIDE specifies whether op( A ) multiplies B from 10571 the left or right as follows: 10572 SIDE = 'L' or 'l' B := alpha*op( A )*B. 10573 SIDE = 'R' or 'r' B := alpha*B*op( A ). 10574 Unchanged on exit. 10575 UPLO - CHARACTER*1. 10576 On entry, UPLO specifies whether the matrix A is an upper or 10577 lower triangular matrix as follows: 10578 UPLO = 'U' or 'u' A is an upper triangular matrix. 10579 UPLO = 'L' or 'l' A is a lower triangular matrix. 10580 Unchanged on exit. 10581 TRANSA - CHARACTER*1. 10582 On entry, TRANSA specifies the form of op( A ) to be used in 10583 the matrix multiplication as follows: 10584 TRANSA = 'N' or 'n' op( A ) = A. 10585 TRANSA = 'T' or 't' op( A ) = A'. 10586 TRANSA = 'C' or 'c' op( A ) = A'. 10587 Unchanged on exit. 10588 DIAG - CHARACTER*1. 10589 On entry, DIAG specifies whether or not A is unit triangular 10590 as follows: 10591 DIAG = 'U' or 'u' A is assumed to be unit triangular. 10592 DIAG = 'N' or 'n' A is not assumed to be unit 10593 triangular. 10594 Unchanged on exit. 10595 M - INTEGER. 10596 On entry, M specifies the number of rows of B. M must be at 10597 least zero. 10598 Unchanged on exit. 10599 N - INTEGER. 10600 On entry, N specifies the number of columns of B. N must be 10601 at least zero. 10602 Unchanged on exit. 10603 ALPHA - REAL . 10604 On entry, ALPHA specifies the scalar alpha. When alpha is 10605 zero then A is not referenced and B need not be set before 10606 entry. 10607 Unchanged on exit. 10608 A - REAL array of DIMENSION ( LDA, k ), where k is m 10609 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. 10610 Before entry with UPLO = 'U' or 'u', the leading k by k 10611 upper triangular part of the array A must contain the upper 10612 triangular matrix and the strictly lower triangular part of 10613 A is not referenced. 10614 Before entry with UPLO = 'L' or 'l', the leading k by k 10615 lower triangular part of the array A must contain the lower 10616 triangular matrix and the strictly upper triangular part of 10617 A is not referenced. 10618 Note that when DIAG = 'U' or 'u', the diagonal elements of 10619 A are not referenced either, but are assumed to be unity. 10620 Unchanged on exit. 10621 LDA - INTEGER. 10622 On entry, LDA specifies the first dimension of A as declared 10623 in the calling (sub) program. When SIDE = 'L' or 'l' then 10624 LDA must be at least f2cmax( 1, m ), when SIDE = 'R' or 'r' 10625 then LDA must be at least f2cmax( 1, n ). 10626 Unchanged on exit. 10627 B - REAL array of DIMENSION ( LDB, n ). 10628 Before entry, the leading m by n part of the array B must 10629 contain the matrix B, and on exit is overwritten by the 10630 transformed matrix. 10631 LDB - INTEGER. 10632 On entry, LDB specifies the first dimension of B as declared 10633 in the calling (sub) program. LDB must be at least 10634 f2cmax( 1, m ). 10635 Unchanged on exit. 10636 Level 3 Blas routine. 10637 -- Written on 8-February-1989. 10638 Jack Dongarra, Argonne National Laboratory. 10639 Iain Duff, AERE Harwell. 10640 Jeremy Du Croz, Numerical Algorithms Group Ltd. 10641 Sven Hammarling, Numerical Algorithms Group Ltd. 10642 Test the input parameters. 10643 Parameter adjustments */ 10644 a_dim1 = *lda; 10645 a_offset = 1 + a_dim1 * 1; 10646 a -= a_offset; 10647 b_dim1 = *ldb; 10648 b_offset = 1 + b_dim1 * 1; 10649 b -= b_offset; 10650 /* Function Body */ 10651 lside = lsame_(side, "L"); 10652 if (lside) { 10653 nrowa = *m; 10654 } else { 10655 nrowa = *n; 10656 } 10657 nounit = lsame_(diag, "N"); 10658 upper = lsame_(uplo, "U"); 10659 info = 0; 10660 if (! lside && ! lsame_(side, "R")) { 10661 info = 1; 10662 } else if (! upper && ! lsame_(uplo, "L")) { 10663 info = 2; 10664 } else if (! lsame_(transa, "N") && ! lsame_(transa, 10665 "T") && ! lsame_(transa, "C")) { 10666 info = 3; 10667 } else if (! lsame_(diag, "U") && ! lsame_(diag, 10668 "N")) { 10669 info = 4; 10670 } else if (*m < 0) { 10671 info = 5; 10672 } else if (*n < 0) { 10673 info = 6; 10674 } else if (*lda < f2cmax(1,nrowa)) { 10675 info = 9; 10676 } else if (*ldb < f2cmax(1,*m)) { 10677 info = 11; 10678 } 10679 if (info != 0) { 10680 xerbla_("STRMM ", &info); 10681 return 0; 10682 } 10683 /* Quick return if possible. */ 10684 if (*n == 0) { 10685 return 0; 10686 } 10687 /* And when alpha.eq.zero. */ 10688 if (*alpha == 0.f) { 10689 i__1 = *n; 10690 for (j = 1; j <= i__1; ++j) { 10691 i__2 = *m; 10692 for (i__ = 1; i__ <= i__2; ++i__) { 10693 b_ref(i__, j) = 0.f; 10694 /* L10: */ 10695 } 10696 /* L20: */ 10697 } 10698 return 0; 10699 } 10700 /* Start the operations. */ 10701 if (lside) { 10702 if (lsame_(transa, "N")) { 10703 /* Form B := alpha*A*B. */ 10704 if (upper) { 10705 i__1 = *n; 10706 for (j = 1; j <= i__1; ++j) { 10707 i__2 = *m; 10708 for (k = 1; k <= i__2; ++k) { 10709 if (b_ref(k, j) != 0.f) { 10710 temp = *alpha * b_ref(k, j); 10711 i__3 = k - 1; 10712 for (i__ = 1; i__ <= i__3; ++i__) { 10713 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( 10714 i__, k); 10715 /* L30: */ 10716 } 10717 if (nounit) { 10718 temp *= a_ref(k, k); 10719 } 10720 b_ref(k, j) = temp; 10721 } 10722 /* L40: */ 10723 } 10724 /* L50: */ 10725 } 10726 } else { 10727 i__1 = *n; 10728 for (j = 1; j <= i__1; ++j) { 10729 for (k = *m; k >= 1; --k) { 10730 if (b_ref(k, j) != 0.f) { 10731 temp = *alpha * b_ref(k, j); 10732 b_ref(k, j) = temp; 10733 if (nounit) { 10734 b_ref(k, j) = b_ref(k, j) * a_ref(k, k); 10735 } 10736 i__2 = *m; 10737 for (i__ = k + 1; i__ <= i__2; ++i__) { 10738 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( 10739 i__, k); 10740 /* L60: */ 10741 } 10742 } 10743 /* L70: */ 10744 } 10745 /* L80: */ 10746 } 10747 } 10748 } else { 10749 /* Form B := alpha*A'*B. */ 10750 if (upper) { 10751 i__1 = *n; 10752 for (j = 1; j <= i__1; ++j) { 10753 for (i__ = *m; i__ >= 1; --i__) { 10754 temp = b_ref(i__, j); 10755 if (nounit) { 10756 temp *= a_ref(i__, i__); 10757 } 10758 i__2 = i__ - 1; 10759 for (k = 1; k <= i__2; ++k) { 10760 temp += a_ref(k, i__) * b_ref(k, j); 10761 /* L90: */ 10762 } 10763 b_ref(i__, j) = *alpha * temp; 10764 /* L100: */ 10765 } 10766 /* L110: */ 10767 } 10768 } else { 10769 i__1 = *n; 10770 for (j = 1; j <= i__1; ++j) { 10771 i__2 = *m; 10772 for (i__ = 1; i__ <= i__2; ++i__) { 10773 temp = b_ref(i__, j); 10774 if (nounit) { 10775 temp *= a_ref(i__, i__); 10776 } 10777 i__3 = *m; 10778 for (k = i__ + 1; k <= i__3; ++k) { 10779 temp += a_ref(k, i__) * b_ref(k, j); 10780 /* L120: */ 10781 } 10782 b_ref(i__, j) = *alpha * temp; 10783 /* L130: */ 10784 } 10785 /* L140: */ 10786 } 10787 } 10788 } 10789 } else { 10790 if (lsame_(transa, "N")) { 10791 /* Form B := alpha*B*A. */ 10792 if (upper) { 10793 for (j = *n; j >= 1; --j) { 10794 temp = *alpha; 10795 if (nounit) { 10796 temp *= a_ref(j, j); 10797 } 10798 i__1 = *m; 10799 for (i__ = 1; i__ <= i__1; ++i__) { 10800 b_ref(i__, j) = temp * b_ref(i__, j); 10801 /* L150: */ 10802 } 10803 i__1 = j - 1; 10804 for (k = 1; k <= i__1; ++k) { 10805 if (a_ref(k, j) != 0.f) { 10806 temp = *alpha * a_ref(k, j); 10807 i__2 = *m; 10808 for (i__ = 1; i__ <= i__2; ++i__) { 10809 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 10810 i__, k); 10811 /* L160: */ 10812 } 10813 } 10814 /* L170: */ 10815 } 10816 /* L180: */ 10817 } 10818 } else { 10819 i__1 = *n; 10820 for (j = 1; j <= i__1; ++j) { 10821 temp = *alpha; 10822 if (nounit) { 10823 temp *= a_ref(j, j); 10824 } 10825 i__2 = *m; 10826 for (i__ = 1; i__ <= i__2; ++i__) { 10827 b_ref(i__, j) = temp * b_ref(i__, j); 10828 /* L190: */ 10829 } 10830 i__2 = *n; 10831 for (k = j + 1; k <= i__2; ++k) { 10832 if (a_ref(k, j) != 0.f) { 10833 temp = *alpha * a_ref(k, j); 10834 i__3 = *m; 10835 for (i__ = 1; i__ <= i__3; ++i__) { 10836 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 10837 i__, k); 10838 /* L200: */ 10839 } 10840 } 10841 /* L210: */ 10842 } 10843 /* L220: */ 10844 } 10845 } 10846 } else { 10847 /* Form B := alpha*B*A'. */ 10848 if (upper) { 10849 i__1 = *n; 10850 for (k = 1; k <= i__1; ++k) { 10851 i__2 = k - 1; 10852 for (j = 1; j <= i__2; ++j) { 10853 if (a_ref(j, k) != 0.f) { 10854 temp = *alpha * a_ref(j, k); 10855 i__3 = *m; 10856 for (i__ = 1; i__ <= i__3; ++i__) { 10857 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 10858 i__, k); 10859 /* L230: */ 10860 } 10861 } 10862 /* L240: */ 10863 } 10864 temp = *alpha; 10865 if (nounit) { 10866 temp *= a_ref(k, k); 10867 } 10868 if (temp != 1.f) { 10869 i__2 = *m; 10870 for (i__ = 1; i__ <= i__2; ++i__) { 10871 b_ref(i__, k) = temp * b_ref(i__, k); 10872 /* L250: */ 10873 } 10874 } 10875 /* L260: */ 10876 } 10877 } else { 10878 for (k = *n; k >= 1; --k) { 10879 i__1 = *n; 10880 for (j = k + 1; j <= i__1; ++j) { 10881 if (a_ref(j, k) != 0.f) { 10882 temp = *alpha * a_ref(j, k); 10883 i__2 = *m; 10884 for (i__ = 1; i__ <= i__2; ++i__) { 10885 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 10886 i__, k); 10887 /* L270: */ 10888 } 10889 } 10890 /* L280: */ 10891 } 10892 temp = *alpha; 10893 if (nounit) { 10894 temp *= a_ref(k, k); 10895 } 10896 if (temp != 1.f) { 10897 i__1 = *m; 10898 for (i__ = 1; i__ <= i__1; ++i__) { 10899 b_ref(i__, k) = temp * b_ref(i__, k); 10900 /* L290: */ 10901 } 10902 } 10903 /* L300: */ 10904 } 10905 } 10906 } 10907 } 10908 return 0; 10909 /* End of STRMM . */ 10910 } /* strmm_ */
int strmv_ | ( | const char * | uplo, | |
const char * | trans, | |||
const char * | diag, | |||
integer * | n, | |||
real * | a, | |||
integer * | lda, | |||
real * | x, | |||
integer * | incx | |||
) |
Definition at line 10917 of file lapackblas.cpp.
References a_ref, f2cmax, integer, lsame_(), and xerbla_().
Referenced by slarft_().
10919 { 10920 /* System generated locals */ 10921 integer a_dim1, a_offset, i__1, i__2; 10922 /* Local variables */ 10923 static integer info; 10924 static real temp; 10925 static integer i__, j; 10926 extern logical lsame_(const char *, const char *); 10927 static integer ix, jx, kx; 10928 extern /* Subroutine */ int xerbla_(const char *, integer *); 10929 static logical nounit; 10930 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 10931 /* Purpose 10932 ======= 10933 STRMV performs one of the matrix-vector operations 10934 x := A*x, or x := A'*x, 10935 where x is an n element vector and A is an n by n unit, or non-unit, 10936 upper or lower triangular matrix. 10937 Parameters 10938 ========== 10939 UPLO - CHARACTER*1. 10940 On entry, UPLO specifies whether the matrix is an upper or 10941 lower triangular matrix as follows: 10942 UPLO = 'U' or 'u' A is an upper triangular matrix. 10943 UPLO = 'L' or 'l' A is a lower triangular matrix. 10944 Unchanged on exit. 10945 TRANS - CHARACTER*1. 10946 On entry, TRANS specifies the operation to be performed as 10947 follows: 10948 TRANS = 'N' or 'n' x := A*x. 10949 TRANS = 'T' or 't' x := A'*x. 10950 TRANS = 'C' or 'c' x := A'*x. 10951 Unchanged on exit. 10952 DIAG - CHARACTER*1. 10953 On entry, DIAG specifies whether or not A is unit 10954 triangular as follows: 10955 DIAG = 'U' or 'u' A is assumed to be unit triangular. 10956 DIAG = 'N' or 'n' A is not assumed to be unit 10957 triangular. 10958 Unchanged on exit. 10959 N - INTEGER. 10960 On entry, N specifies the order of the matrix A. 10961 N must be at least zero. 10962 Unchanged on exit. 10963 A - REAL array of DIMENSION ( LDA, n ). 10964 Before entry with UPLO = 'U' or 'u', the leading n by n 10965 upper triangular part of the array A must contain the upper 10966 triangular matrix and the strictly lower triangular part of 10967 A is not referenced. 10968 Before entry with UPLO = 'L' or 'l', the leading n by n 10969 lower triangular part of the array A must contain the lower 10970 triangular matrix and the strictly upper triangular part of 10971 A is not referenced. 10972 Note that when DIAG = 'U' or 'u', the diagonal elements of 10973 A are not referenced either, but are assumed to be unity. 10974 Unchanged on exit. 10975 LDA - INTEGER. 10976 On entry, LDA specifies the first dimension of A as declared 10977 in the calling (sub) program. LDA must be at least 10978 f2cmax( 1, n ). 10979 Unchanged on exit. 10980 X - REAL array of dimension at least 10981 ( 1 + ( n - 1 )*abs( INCX ) ). 10982 Before entry, the incremented array X must contain the n 10983 element vector x. On exit, X is overwritten with the 10984 tranformed vector x. 10985 INCX - INTEGER. 10986 On entry, INCX specifies the increment for the elements of 10987 X. INCX must not be zero. 10988 Unchanged on exit. 10989 Level 2 Blas routine. 10990 -- Written on 22-October-1986. 10991 Jack Dongarra, Argonne National Lab. 10992 Jeremy Du Croz, Nag Central Office. 10993 Sven Hammarling, Nag Central Office. 10994 Richard Hanson, Sandia National Labs. 10995 Test the input parameters. 10996 Parameter adjustments */ 10997 a_dim1 = *lda; 10998 a_offset = 1 + a_dim1 * 1; 10999 a -= a_offset; 11000 --x; 11001 /* Function Body */ 11002 info = 0; 11003 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { 11004 info = 1; 11005 } else if (! lsame_(trans, "N") && ! lsame_(trans, 11006 "T") && ! lsame_(trans, "C")) { 11007 info = 2; 11008 } else if (! lsame_(diag, "U") && ! lsame_(diag, 11009 "N")) { 11010 info = 3; 11011 } else if (*n < 0) { 11012 info = 4; 11013 } else if (*lda < f2cmax(1,*n)) { 11014 info = 6; 11015 } else if (*incx == 0) { 11016 info = 8; 11017 } 11018 if (info != 0) { 11019 xerbla_("STRMV ", &info); 11020 return 0; 11021 } 11022 /* Quick return if possible. */ 11023 if (*n == 0) { 11024 return 0; 11025 } 11026 nounit = lsame_(diag, "N"); 11027 /* Set up the start point in X if the increment is not unity. This 11028 will be ( N - 1 )*INCX too small for descending loops. */ 11029 if (*incx <= 0) { 11030 kx = 1 - (*n - 1) * *incx; 11031 } else if (*incx != 1) { 11032 kx = 1; 11033 } 11034 /* Start the operations. In this version the elements of A are 11035 accessed sequentially with one pass through A. */ 11036 if (lsame_(trans, "N")) { 11037 /* Form x := A*x. */ 11038 if (lsame_(uplo, "U")) { 11039 if (*incx == 1) { 11040 i__1 = *n; 11041 for (j = 1; j <= i__1; ++j) { 11042 if (x[j] != 0.f) { 11043 temp = x[j]; 11044 i__2 = j - 1; 11045 for (i__ = 1; i__ <= i__2; ++i__) { 11046 x[i__] += temp * a_ref(i__, j); 11047 /* L10: */ 11048 } 11049 if (nounit) { 11050 x[j] *= a_ref(j, j); 11051 } 11052 } 11053 /* L20: */ 11054 } 11055 } else { 11056 jx = kx; 11057 i__1 = *n; 11058 for (j = 1; j <= i__1; ++j) { 11059 if (x[jx] != 0.f) { 11060 temp = x[jx]; 11061 ix = kx; 11062 i__2 = j - 1; 11063 for (i__ = 1; i__ <= i__2; ++i__) { 11064 x[ix] += temp * a_ref(i__, j); 11065 ix += *incx; 11066 /* L30: */ 11067 } 11068 if (nounit) { 11069 x[jx] *= a_ref(j, j); 11070 } 11071 } 11072 jx += *incx; 11073 /* L40: */ 11074 } 11075 } 11076 } else { 11077 if (*incx == 1) { 11078 for (j = *n; j >= 1; --j) { 11079 if (x[j] != 0.f) { 11080 temp = x[j]; 11081 i__1 = j + 1; 11082 for (i__ = *n; i__ >= i__1; --i__) { 11083 x[i__] += temp * a_ref(i__, j); 11084 /* L50: */ 11085 } 11086 if (nounit) { 11087 x[j] *= a_ref(j, j); 11088 } 11089 } 11090 /* L60: */ 11091 } 11092 } else { 11093 kx += (*n - 1) * *incx; 11094 jx = kx; 11095 for (j = *n; j >= 1; --j) { 11096 if (x[jx] != 0.f) { 11097 temp = x[jx]; 11098 ix = kx; 11099 i__1 = j + 1; 11100 for (i__ = *n; i__ >= i__1; --i__) { 11101 x[ix] += temp * a_ref(i__, j); 11102 ix -= *incx; 11103 /* L70: */ 11104 } 11105 if (nounit) { 11106 x[jx] *= a_ref(j, j); 11107 } 11108 } 11109 jx -= *incx; 11110 /* L80: */ 11111 } 11112 } 11113 } 11114 } else { 11115 /* Form x := A'*x. */ 11116 if (lsame_(uplo, "U")) { 11117 if (*incx == 1) { 11118 for (j = *n; j >= 1; --j) { 11119 temp = x[j]; 11120 if (nounit) { 11121 temp *= a_ref(j, j); 11122 } 11123 for (i__ = j - 1; i__ >= 1; --i__) { 11124 temp += a_ref(i__, j) * x[i__]; 11125 /* L90: */ 11126 } 11127 x[j] = temp; 11128 /* L100: */ 11129 } 11130 } else { 11131 jx = kx + (*n - 1) * *incx; 11132 for (j = *n; j >= 1; --j) { 11133 temp = x[jx]; 11134 ix = jx; 11135 if (nounit) { 11136 temp *= a_ref(j, j); 11137 } 11138 for (i__ = j - 1; i__ >= 1; --i__) { 11139 ix -= *incx; 11140 temp += a_ref(i__, j) * x[ix]; 11141 /* L110: */ 11142 } 11143 x[jx] = temp; 11144 jx -= *incx; 11145 /* L120: */ 11146 } 11147 } 11148 } else { 11149 if (*incx == 1) { 11150 i__1 = *n; 11151 for (j = 1; j <= i__1; ++j) { 11152 temp = x[j]; 11153 if (nounit) { 11154 temp *= a_ref(j, j); 11155 } 11156 i__2 = *n; 11157 for (i__ = j + 1; i__ <= i__2; ++i__) { 11158 temp += a_ref(i__, j) * x[i__]; 11159 /* L130: */ 11160 } 11161 x[j] = temp; 11162 /* L140: */ 11163 } 11164 } else { 11165 jx = kx; 11166 i__1 = *n; 11167 for (j = 1; j <= i__1; ++j) { 11168 temp = x[jx]; 11169 ix = jx; 11170 if (nounit) { 11171 temp *= a_ref(j, j); 11172 } 11173 i__2 = *n; 11174 for (i__ = j + 1; i__ <= i__2; ++i__) { 11175 ix += *incx; 11176 temp += a_ref(i__, j) * x[ix]; 11177 /* L150: */ 11178 } 11179 x[jx] = temp; 11180 jx += *incx; 11181 /* L160: */ 11182 } 11183 } 11184 } 11185 } 11186 return 0; 11187 /* End of STRMV . */ 11188 } /* strmv_ */
int xerbla_ | ( | const char * | srname, | |
integer * | info | |||
) |
Definition at line 11194 of file lapackblas.cpp.
Referenced by sbdsqr_(), sgebd2_(), sgebrd_(), sgelq2_(), sgelqf_(), sgemm_(), sgemv_(), sgeqr2_(), sgeqrf_(), sger_(), sgesvd_(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed7_(), slaed8_(), slaed9_(), slaeda_(), slascl_(), slasq1_(), slasq2_(), slasr_(), slasrt_(), sorg2l_(), sorg2r_(), sorgbr_(), sorgl2_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_().
11195 { 11196 /* -- LAPACK auxiliary routine (version 2.0) -- 11197 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 11198 Courant Institute, Argonne National Lab, and Rice University 11199 September 30, 1994 11200 11201 11202 Purpose 11203 ======= 11204 11205 XERBLA is an error handler for the LAPACK routines. 11206 It is called by an LAPACK routine if an input parameter has an 11207 invalid value. A message is printed and execution stops. 11208 11209 Installers may consider modifying the STOP statement in order to 11210 call system-specific exception-handling facilities. 11211 11212 Arguments 11213 ========= 11214 11215 SRNAME (input) CHARACTER*6 11216 The name of the routine which called XERBLA. 11217 11218 INFO (input) INTEGER 11219 The position of the invalid parameter in the parameter list 11220 11221 of the calling routine. 11222 11223 ===================================================================== 11224 */ 11225 11226 printf("** On entry to %6s, parameter number %2i had an illegal value\n", 11227 srname, *info); 11228 11229 /* End of XERBLA */ 11230 11231 return 0; 11232 } /* xerbla_ */
Definition at line 13567 of file lapackblas.cpp.
Referenced by bmv_(), cauchy_(), dpofa_(), dtrsl_(), formk_(), ilaenv_(), lnsrlb_(), mainlb_(), matupd_(), slaed0_(), slaed1_(), slaed7_(), slaed8_(), slanst_(), slansy_(), slarf_(), slarfb_(), slarft_(), slatrd_(), sorg2l_(), sorg2r_(), sorgql_(), sorgqr_(), sorgtr_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), ssyev_(), ssytd2_(), ssytrd_(), and subsm_().
Definition at line 25498 of file lapackblas.cpp.
Definition at line 25500 of file lapackblas.cpp.
Definition at line 16369 of file lapackblas.cpp.
doublereal c_b15 = -.125 [static] |
Definition at line 22949 of file lapackblas.cpp.
real c_b3 = -1.f [static] |
real c_b416 = 0.f [static] |
Definition at line 16374 of file lapackblas.cpp.
real c_b438 = 1.f [static] |
Definition at line 16375 of file lapackblas.cpp.
real c_b49 = 1.f [static] |
Definition at line 22951 of file lapackblas.cpp.
real c_b72 = -1.f [static] |
Definition at line 22952 of file lapackblas.cpp.