#include <cstdio>#include <cstdlib>#include "lapackblas.h"Include dependency graph for lapackblas.cpp:

Go to the source code of this file.
Defines | |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | work_ref(a_1, a_2) work[(a_2)*work_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | v_ref(a_1, a_2) v[(a_2)*v_dim1 + a_1] |
| #define | t_ref(a_1, a_2) t[(a_2)*t_dim1 + a_1] |
| #define | v_ref(a_1, a_2) v[(a_2)*v_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | stack_ref(a_1, a_2) stack[(a_2)*2 + a_1 - 3] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | w_ref(a_1, a_2) w[(a_2)*w_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
| #define | z___ref(a_1, a_2) z__[(a_2)*z_dim1 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | qstore_ref(a_1, a_2) qstore[(a_2)*qstore_dim1 + a_1] |
| #define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
| #define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | b_ref(a_1, a_2) b[(a_2)*b_dim1 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | q2_ref(a_1, a_2) q2[(a_2)*q2_dim1 + a_1] |
| #define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
| #define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | s_ref(a_1, a_2) s[(a_2)*s_dim1 + a_1] |
| #define | givcol_ref(a_1, a_2) givcol[(a_2)*2 + a_1] |
| #define | givnum_ref(a_1, a_2) givnum[(a_2)*2 + a_1] |
| #define | q_ref(a_1, a_2) q[(a_2)*q_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | u_ref(a_1, a_2) u[(a_2)*u_dim1 + a_1] |
| #define | vt_ref(a_1, a_2) vt[(a_2)*vt_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | u_ref(a_1, a_2) u[(a_2)*u_dim1 + a_1] |
| #define | vt_ref(a_1, a_2) vt[(a_2)*vt_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | x_ref(a_1, a_2) x[(a_2)*x_dim1 + a_1] |
| #define | y_ref(a_1, a_2) y[(a_2)*y_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
| #define | c___ref(a_1, a_2) c__[(a_2)*c_dim1 + a_1] |
| #define | a_ref(a_1, a_2) a[(a_2)*a_dim1 + a_1] |
Functions | |
| int | s_cat (char *lp, const char **rpp, integer *rnp, integer *np, ftnlen ll) |
| integer | ieeeck_ (integer *ispec, real *zero, real *one) |
| integer | ilaenv_ (integer *ispec, const char *name__, const char *, integer *n1, integer *n2, integer *, integer *n4, ftnlen name_len, ftnlen) |
| logical | lsame_ (const char *ca, const char *cb) |
| double | pow_ri (real *ap, integer *bp) |
| integer | pow_ii (integer *ap, integer *bp) |
| double | r_sign (real *a, real *b) |
| int | saxpy_ (integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy) |
| integer | s_cmp (char *a0, const char *b0, ftnlen la, ftnlen lb) |
| void | s_copy (char *a, const char *b, ftnlen la, ftnlen lb) |
| int | scopy_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
| doublereal | sdot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
| int | sgemm_ (const char *transa, const char *transb, integer *m, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc) |
| int | sgemv_ (const char *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy) |
| int | sger_ (integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) |
| int | slae2_ (real *a, real *b, real *c__, real *rt1, real *rt2) |
| int | slaev2_ (real *a, real *b, real *c__, real *rt1, real *rt2, real *cs1, real *sn1) |
| doublereal | slamch_ (const char *cmach) |
| int | slamc1_ (integer *beta, integer *t, logical *rnd, logical *ieee1) |
| int | slamc2_ (integer *beta, integer *t, logical *rnd, real *eps, integer *emin, real *rmin, integer *emax, real *rmax) |
| doublereal | slamc3_ (real *a, real *b) |
| int | slamc4_ (integer *emin, real *start, integer *base) |
| int | slamc5_ (integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax) |
| doublereal | slanst_ (const char *norm, integer *n, real *d__, real *e) |
| doublereal | slansy_ (const char *norm, char *uplo, integer *n, real *a, integer *lda, real *work) |
| doublereal | slapy2_ (real *x, real *y) |
| int | slarfb_ (const char *side, const char *trans, const char *direct, const char *storev, integer *m, integer *n, integer *k, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *ldwork) |
| int | slarf_ (const char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work) |
| int | slarfg_ (integer *n, real *alpha, real *x, integer *incx, real *tau) |
| int | slarft_ (const char *direct, const char *storev, integer *n, integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt) |
| int | slartg_ (real *f, real *g, real *cs, real *sn, real *r__) |
| int | slascl_ (const char *type__, integer *kl, integer *ku, real *cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, integer *info) |
| int | slaset_ (const char *uplo, integer *m, integer *n, real *alpha, real *beta, real *a, integer *lda) |
| int | slasr_ (const char *side, const char *pivot, const char *direct, integer *m, integer *n, real *c__, real *s, real *a, integer *lda) |
| int | slasrt_ (const char *id, integer *n, real *d__, integer *info) |
| int | slassq_ (integer *n, real *x, integer *incx, real *scale, real *sumsq) |
| int | slatrd_ (char *uplo, integer *n, integer *nb, real *a, integer *lda, real *e, real *tau, real *w, integer *ldw) |
| doublereal | snrm2_ (integer *n, real *x, integer *incx) |
| int | sorg2l_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
| int | sorg2r_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
| int | sorgql_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | sorgqr_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | sorgtr_ (char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | sscal_ (integer *n, real *sa, real *sx, integer *incx) |
| int | ssteqr_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info) |
| int | ssterf_ (integer *n, real *d__, real *e, integer *info) |
| int | sswap_ (integer *n, real *sx, integer *incx, real *sy, integer *incy) |
| int | ssyev_ (char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *info) |
| int | ssymv_ (const char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy) |
| int | ssyr2_ (char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) |
| int | ssyr2k_ (char *uplo, const char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc) |
| int | ssytd2_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info) |
| int | ssytrd_ (char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, real *work, integer *lwork, integer *info) |
| int | strmm_ (const char *side, const char *uplo, const char *transa, const char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb) |
| int | strmv_ (const char *uplo, const char *trans, const char *diag, integer *n, real *a, integer *lda, real *x, integer *incx) |
| int | xerbla_ (const char *srname, integer *info) |
| int | sstedc_ (const char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) |
| int | sstevd_ (char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) |
| int | slaed0_ (integer *icompq, integer *qsiz, integer *n, real *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, real *work, integer *iwork, integer *info) |
| int | slaed7_ (integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *qstore, integer *qptr, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *work, integer *iwork, integer *info) |
| int | slaed1_ (integer *n, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *work, integer *iwork, integer *info) |
| int | slacpy_ (const char *uplo, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb) |
| int | slamrg_ (integer *n1, integer *n2, real *a, integer *strd1, integer *strd2, integer *index) |
| int | slaed8_ (integer *icompq, integer *k, integer *n, integer *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *indxp, integer *indx, integer *info) |
| int | slaed2_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info) |
| int | slaed9_ (integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info) |
| integer | isamax_ (integer *n, real *sx, integer *incx) |
| int | srot_ (integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c__, real *s) |
| int | slaed4_ (integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam, integer *info) |
| int | slaeda_ (integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info) |
| int | slaed3_ (integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *indx, integer *ctot, real *w, real *s, integer *info) |
| int | slaed6_ (integer *kniter, logical *orgati, real *rho, real *d__, real *z__, real *finit, real *tau, integer *info) |
| int | slaed5_ (integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam) |
| int | sgesvd_ (char *jobu, char *jobvt, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *info) |
| int | sorgl2_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) |
| int | sorglq_ (integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| doublereal | slange_ (const char *norm, integer *m, integer *n, real *a, integer *lda, real *work) |
| int | sgebrd_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *lwork, integer *info) |
| int | sgebd2_ (integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *info) |
| int | sormbr_ (const char *vect, const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
| int | sgelqf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | sormlq_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
| int | sormqr_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info) |
| int | sgelq2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info) |
| int | sbdsqr_ (const char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) |
| int | sgeqrf_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | sorml2_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info) |
| int | slabrd_ (integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy) |
| int | sgeqr2_ (integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info) |
| int | sorm2r_ (const char *side, const char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info) |
| int | sorgbr_ (const char *vect, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) |
| int | slasq1_ (integer *n, real *d__, real *e, real *work, integer *info) |
| int | slasq2_ (integer *n, real *z__, integer *info) |
| int | slasq3_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee) |
| int | slasq4_ (integer *i0, integer *n0, real *z__, integer *pp, integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *tau, integer *ttype) |
| int | slasq5_ (integer *i0, integer *n0, real *z__, integer *pp, real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2, logical *ieee) |
| int | slasq6_ (integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *dnm2) |
| int | slasv2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax, real *snr, real *csr, real *snl, real *csl) |
| int | slas2_ (real *f, real *g, real *h__, real *ssmin, real *ssmax) |
Variables | |
| real | c_b3 = -1.f |
| integer | c__1 = 1 |
| integer | c__6 = 6 |
| integer | c__0 = 0 |
| integer | c__2 = 2 |
| integer | c_n1 = -1 |
| real | c_b416 = 0.f |
| real | c_b438 = 1.f |
| doublereal | c_b15 = -.125 |
| real | c_b49 = 1.f |
| real | c_b72 = -1.f |
| integer | c__10 = 10 |
| integer | c__3 = 3 |
| integer | c__4 = 4 |
| integer | c__11 = 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by sgebd2_(), sgebrd_(), sgelq2_(), sgelqf_(), sgemm_(), sgemv_(), sgeqr2_(), sgeqrf_(), sger_(), sgesvd_(), slabrd_(), slacpy_(), slange_(), slansy_(), slascl_(), slaset_(), slasr_(), slatrd_(), sorg2l_(), sorg2r_(), sorgbr_(), sorgl2_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by sbdsqr_(), sgemm_(), slarfb_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), and ssyr2k_(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by slaed8_(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed8_(), and slaed9_(). |
|
|
Referenced by slaed0_(). |
|
|
Referenced by slaed9_(). |
|
|
Referenced by slasrt_(). |
|
|
Referenced by slarft_(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by slatrd_(). |
|
|
Referenced by slarfb_(). |
|
|
Referenced by slabrd_(). |
|
|
Referenced by slabrd_(). |
|
|
|
|
|
|
|
|
|
|
||||||||||||||||
|
Definition at line 56 of file lapackblas.cpp. Referenced by ilaenv_(). 00057 {
00058 /* -- LAPACK auxiliary routine (version 3.0) --
00059 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
00060 Courant Institute, Argonne National Lab, and Rice University
00061 June 30, 1998
00062
00063
00064 Purpose
00065 =======
00066
00067 IEEECK is called from the ILAENV to verify that Infinity and
00068 possibly NaN arithmetic is safe (i.e. will not trap).
00069
00070 Arguments
00071 =========
00072
00073 ISPEC (input) INTEGER
00074 Specifies whether to test just for inifinity arithmetic
00075 or whether to test for infinity and NaN arithmetic.
00076 = 0: Verify infinity arithmetic only.
00077 = 1: Verify infinity and NaN arithmetic.
00078
00079 ZERO (input) REAL
00080 Must contain the value 0.0
00081 This is passed to prevent the compiler from optimizing
00082 away this code.
00083
00084 ONE (input) REAL
00085 Must contain the value 1.0
00086 This is passed to prevent the compiler from optimizing
00087 away this code.
00088
00089 RETURN VALUE: INTEGER
00090 = 0: Arithmetic failed to produce the correct answers
00091 = 1: Arithmetic produced the correct answers */
00092 /* System generated locals */
00093 integer ret_val;
00094 /* Local variables */
00095 static real neginf, posinf, negzro, newzro, nan1, nan2, nan3, nan4, nan5,
00096 nan6;
00097
00098
00099 ret_val = 1;
00100
00101 posinf = *one / *zero;
00102 if (posinf <= *one) {
00103 ret_val = 0;
00104 return ret_val;
00105 }
00106
00107 neginf = -(*one) / *zero;
00108 if (neginf >= *zero) {
00109 ret_val = 0;
00110 return ret_val;
00111 }
00112
00113 negzro = *one / (neginf + *one);
00114 if (negzro != *zero) {
00115 ret_val = 0;
00116 return ret_val;
00117 }
00118
00119 neginf = *one / negzro;
00120 if (neginf >= *zero) {
00121 ret_val = 0;
00122 return ret_val;
00123 }
00124
00125 newzro = negzro + *zero;
00126 if (newzro != *zero) {
00127 ret_val = 0;
00128 return ret_val;
00129 }
00130
00131 posinf = *one / newzro;
00132 if (posinf <= *one) {
00133 ret_val = 0;
00134 return ret_val;
00135 }
00136
00137 neginf *= posinf;
00138 if (neginf >= *zero) {
00139 ret_val = 0;
00140 return ret_val;
00141 }
00142
00143 posinf *= posinf;
00144 if (posinf <= *one) {
00145 ret_val = 0;
00146 return ret_val;
00147 }
00148
00149
00150
00151
00152 /* Return if we were only asked to check infinity arithmetic */
00153
00154 if (*ispec == 0) {
00155 return ret_val;
00156 }
00157
00158 nan1 = posinf + neginf;
00159
00160 nan2 = posinf / neginf;
00161
00162 nan3 = posinf / posinf;
00163
00164 nan4 = posinf * *zero;
00165
00166 nan5 = neginf * negzro;
00167
00168 nan6 = nan5 * 0.f;
00169
00170 if (nan1 == nan1) {
00171 ret_val = 0;
00172 return ret_val;
00173 }
00174
00175 if (nan2 == nan2) {
00176 ret_val = 0;
00177 return ret_val;
00178 }
00179
00180 if (nan3 == nan3) {
00181 ret_val = 0;
00182 return ret_val;
00183 }
00184
00185 if (nan4 == nan4) {
00186 ret_val = 0;
00187 return ret_val;
00188 }
00189
00190 if (nan5 == nan5) {
00191 ret_val = 0;
00192 return ret_val;
00193 }
00194
00195 if (nan6 == nan6) {
00196 ret_val = 0;
00197 return ret_val;
00198 }
00199
00200 return ret_val;
00201 } /* ieeeck_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 206 of file lapackblas.cpp. References c__0, c__1, f2cmin, ftnlen, ieeeck_(), integer, logical, nx, real, s_cmp(), and s_copy(). Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), sgesvd_(), slaed0_(), slasq2_(), sorgbr_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sormbr_(), sormlq_(), sormqr_(), sstedc_(), ssyev_(), and ssytrd_(). 00208 {
00209 /* -- LAPACK auxiliary routine (version 3.0) --
00210 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
00211 Courant Institute, Argonne National Lab, and Rice University
00212 June 30, 1999
00213
00214
00215 Purpose
00216 =======
00217
00218 ILAENV is called from the LAPACK routines to choose problem-dependent
00219 parameters for the local environment. See ISPEC for a description of
00220 the parameters.
00221
00222 This version provides a set of parameters which should give good,
00223 but not optimal, performance on many of the currently available
00224 computers. Users are encouraged to modify this subroutine to set
00225 the tuning parameters for their particular machine using the option
00226 and problem size information in the arguments.
00227
00228 This routine will not function correctly if it is converted to all
00229 lower case. Converting it to all upper case is allowed.
00230
00231 Arguments
00232 =========
00233
00234 ISPEC (input) INTEGER
00235 Specifies the parameter to be returned as the value of
00236 ILAENV.
00237 = 1: the optimal blocksize; if this value is 1, an unblocked
00238 algorithm will give the best performance.
00239 = 2: the minimum block size for which the block routine
00240 should be used; if the usable block size is less than
00241 this value, an unblocked routine should be used.
00242 = 3: the crossover point (in a block routine, for N less
00243 than this value, an unblocked routine should be used)
00244 = 4: the number of shifts, used in the nonsymmetric
00245 eigenvalue routines
00246 = 5: the MINIMUM Column dimension for blocking to be used;
00247 rectangular blocks must have dimension at least k by m,
00248 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
00249 = 6: the crossover point for the SVD (when reducing an m by n
00250 matrix to bidiagonal form, if f2cmax(m,n)/min(m,n) exceeds
00251 this value, a QR factorization is used first to reduce
00252 the matrix to a triangular form.)
00253 = 7: the number of processors
00254 = 8: the crossover point for the multishift QR and QZ methods
00255 for nonsymmetric eigenvalue problems.
00256 = 9: maximum size of the subproblems at the bottom of the
00257 computation tree in the divide-and-conquer algorithm
00258 (used by xGELSD and xGESDD)
00259 =10: ieee NaN arithmetic can be trusted not to trap
00260 =11: infinity arithmetic can be trusted not to trap
00261
00262 NAME (input) CHARACTER*(*)
00263 The name of the calling subroutine, in either upper case or
00264 lower case.
00265
00266 OPTS (input) CHARACTER*(*)
00267 The character options to the subroutine NAME, concatenated
00268 into a single character string. For example, UPLO = 'U',
00269 TRANS = 'T', and DIAG = 'N' for a triangular routine would
00270 be specified as OPTS = 'UTN'.
00271
00272 N1 (input) INTEGER
00273 N2 (input) INTEGER
00274 N3 (input) INTEGER
00275 N4 (input) INTEGER
00276 Problem dimensions for the subroutine NAME; these may not all
00277 be required.
00278
00279 (ILAENV) (output) INTEGER
00280 >= 0: the value of the parameter specified by ISPEC
00281 < 0: if ILAENV = -k, the k-th argument had an illegal value.
00282
00283 Further Details
00284 ===============
00285
00286 The following conventions have been used when calling ILAENV from the
00287 LAPACK routines:
00288 1) OPTS is a concatenation of all of the character options to
00289 subroutine NAME, in the same order that they appear in the
00290 argument list for NAME, even if they are not used in determining
00291 the value of the parameter specified by ISPEC.
00292 2) The problem dimensions N1, N2, N3, N4 are specified in the order
00293 that they appear in the argument list for NAME. N1 is used
00294 first, N2 second, and so on, and unused problem dimensions are
00295 passed a value of -1.
00296 3) The parameter value returned by ILAENV is checked for validity in
00297 the calling subroutine. For example, ILAENV is used to retrieve
00298 the optimal blocksize for STRTRI as follows:
00299
00300 NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
00301 IF( NB.LE.1 ) NB = MAX( 1, N )
00302
00303 ===================================================================== */
00304 /* Table of constant values */
00305 static integer c__0 = 0;
00306 static real c_b162 = 0.f;
00307 static real c_b163 = 1.f;
00308 static integer c__1 = 1;
00309
00310 /* System generated locals */
00311 integer ret_val;
00312 /* Builtin functions
00313 Subroutine */ void s_copy(char *, const char *, ftnlen, ftnlen);
00314 integer s_cmp(char *, const char *, ftnlen, ftnlen);
00315 /* Local variables */
00316 static integer i__;
00317 static logical cname, sname;
00318 static integer nbmin;
00319 static char c1[1], c2[2], c3[3], c4[2];
00320 static integer ic, nb;
00321 extern integer ieeeck_(integer *, real *, real *);
00322 static integer iz, nx;
00323 static char subnam[6];
00324
00325
00326
00327
00328 switch (*ispec) {
00329 case 1: goto L100;
00330 case 2: goto L100;
00331 case 3: goto L100;
00332 case 4: goto L400;
00333 case 5: goto L500;
00334 case 6: goto L600;
00335 case 7: goto L700;
00336 case 8: goto L800;
00337 case 9: goto L900;
00338 case 10: goto L1000;
00339 case 11: goto L1100;
00340 }
00341
00342 /* Invalid value for ISPEC */
00343
00344 ret_val = -1;
00345 return ret_val;
00346
00347 L100:
00348
00349 /* Convert NAME to upper case if the first character is lower case. */
00350
00351 ret_val = 1;
00352 s_copy(subnam, name__, (ftnlen)6, name_len);
00353 ic = *(unsigned char *)subnam;
00354 iz = 'Z';
00355 if (iz == 90 || iz == 122) {
00356
00357 /* ASCII character set */
00358
00359 if (ic >= 97 && ic <= 122) {
00360 *(unsigned char *)subnam = (char) (ic - 32);
00361 for (i__ = 2; i__ <= 6; ++i__) {
00362 ic = *(unsigned char *)&subnam[i__ - 1];
00363 if (ic >= 97 && ic <= 122) {
00364 *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
00365 }
00366 /* L10: */
00367 }
00368 }
00369
00370 } else if (iz == 233 || iz == 169) {
00371
00372 /* EBCDIC character set */
00373
00374 if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 &&
00375 ic <= 169) {
00376 *(unsigned char *)subnam = (char) (ic + 64);
00377 for (i__ = 2; i__ <= 6; ++i__) {
00378 ic = *(unsigned char *)&subnam[i__ - 1];
00379 if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >=
00380 162 && ic <= 169) {
00381 *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
00382 }
00383 /* L20: */
00384 }
00385 }
00386
00387 } else if (iz == 218 || iz == 250) {
00388
00389 /* Prime machines: ASCII+128 */
00390
00391 if (ic >= 225 && ic <= 250) {
00392 *(unsigned char *)subnam = (char) (ic - 32);
00393 for (i__ = 2; i__ <= 6; ++i__) {
00394 ic = *(unsigned char *)&subnam[i__ - 1];
00395 if (ic >= 225 && ic <= 250) {
00396 *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
00397 }
00398 /* L30: */
00399 }
00400 }
00401 }
00402
00403 *(unsigned char *)c1 = *(unsigned char *)subnam;
00404 sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
00405 cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
00406 if (! (cname || sname)) {
00407 return ret_val;
00408 }
00409 s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
00410 s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
00411 s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
00412
00413 switch (*ispec) {
00414 case 1: goto L110;
00415 case 2: goto L200;
00416 case 3: goto L300;
00417 }
00418
00419 L110:
00420
00421 /* ISPEC = 1: block size
00422
00423 In these examples, separate code is provided for setting NB for
00424 real and complex. We assume that NB will take the same value in
00425 single or double precision. */
00426
00427 nb = 1;
00428
00429 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
00430 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00431 if (sname) {
00432 nb = 64;
00433 } else {
00434 nb = 64;
00435 }
00436 } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3,
00437 "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
00438 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3)
00439 == 0) {
00440 if (sname) {
00441 nb = 32;
00442 } else {
00443 nb = 32;
00444 }
00445 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
00446 if (sname) {
00447 nb = 32;
00448 } else {
00449 nb = 32;
00450 }
00451 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
00452 if (sname) {
00453 nb = 32;
00454 } else {
00455 nb = 32;
00456 }
00457 } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
00458 if (sname) {
00459 nb = 64;
00460 } else {
00461 nb = 64;
00462 }
00463 }
00464 } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
00465 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00466 if (sname) {
00467 nb = 64;
00468 } else {
00469 nb = 64;
00470 }
00471 }
00472 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
00473 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00474 if (sname) {
00475 nb = 64;
00476 } else {
00477 nb = 64;
00478 }
00479 } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00480 nb = 32;
00481 } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
00482 nb = 64;
00483 }
00484 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
00485 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00486 nb = 64;
00487 } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00488 nb = 32;
00489 } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
00490 nb = 64;
00491 }
00492 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
00493 if (*(unsigned char *)c3 == 'G') {
00494 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00495 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00496 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00497 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00498 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00499 ftnlen)2, (ftnlen)2) == 0) {
00500 nb = 32;
00501 }
00502 } else if (*(unsigned char *)c3 == 'M') {
00503 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00504 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00505 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00506 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00507 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00508 ftnlen)2, (ftnlen)2) == 0) {
00509 nb = 32;
00510 }
00511 }
00512 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
00513 if (*(unsigned char *)c3 == 'G') {
00514 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00515 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00516 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00517 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00518 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00519 ftnlen)2, (ftnlen)2) == 0) {
00520 nb = 32;
00521 }
00522 } else if (*(unsigned char *)c3 == 'M') {
00523 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00524 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00525 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00526 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00527 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00528 ftnlen)2, (ftnlen)2) == 0) {
00529 nb = 32;
00530 }
00531 }
00532 } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
00533 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00534 if (sname) {
00535 if (*n4 <= 64) {
00536 nb = 1;
00537 } else {
00538 nb = 32;
00539 }
00540 } else {
00541 if (*n4 <= 64) {
00542 nb = 1;
00543 } else {
00544 nb = 32;
00545 }
00546 }
00547 }
00548 } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
00549 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00550 if (sname) {
00551 if (*n2 <= 64) {
00552 nb = 1;
00553 } else {
00554 nb = 32;
00555 }
00556 } else {
00557 if (*n2 <= 64) {
00558 nb = 1;
00559 } else {
00560 nb = 32;
00561 }
00562 }
00563 }
00564 } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
00565 if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
00566 if (sname) {
00567 nb = 64;
00568 } else {
00569 nb = 64;
00570 }
00571 }
00572 } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
00573 if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
00574 if (sname) {
00575 nb = 64;
00576 } else {
00577 nb = 64;
00578 }
00579 }
00580 } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
00581 if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
00582 nb = 1;
00583 }
00584 }
00585 ret_val = nb;
00586 return ret_val;
00587
00588 L200:
00589
00590 /* ISPEC = 2: minimum block size */
00591
00592 nbmin = 2;
00593 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
00594 if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
00595 ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
00596 ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
00597 {
00598 if (sname) {
00599 nbmin = 2;
00600 } else {
00601 nbmin = 2;
00602 }
00603 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
00604 if (sname) {
00605 nbmin = 2;
00606 } else {
00607 nbmin = 2;
00608 }
00609 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
00610 if (sname) {
00611 nbmin = 2;
00612 } else {
00613 nbmin = 2;
00614 }
00615 } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
00616 if (sname) {
00617 nbmin = 2;
00618 } else {
00619 nbmin = 2;
00620 }
00621 }
00622 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
00623 if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
00624 if (sname) {
00625 nbmin = 8;
00626 } else {
00627 nbmin = 8;
00628 }
00629 } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00630 nbmin = 2;
00631 }
00632 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
00633 if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00634 nbmin = 2;
00635 }
00636 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
00637 if (*(unsigned char *)c3 == 'G') {
00638 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00639 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00640 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00641 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00642 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00643 ftnlen)2, (ftnlen)2) == 0) {
00644 nbmin = 2;
00645 }
00646 } else if (*(unsigned char *)c3 == 'M') {
00647 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00648 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00649 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00650 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00651 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00652 ftnlen)2, (ftnlen)2) == 0) {
00653 nbmin = 2;
00654 }
00655 }
00656 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
00657 if (*(unsigned char *)c3 == 'G') {
00658 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00659 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00660 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00661 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00662 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00663 ftnlen)2, (ftnlen)2) == 0) {
00664 nbmin = 2;
00665 }
00666 } else if (*(unsigned char *)c3 == 'M') {
00667 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00668 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00669 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00670 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00671 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00672 ftnlen)2, (ftnlen)2) == 0) {
00673 nbmin = 2;
00674 }
00675 }
00676 }
00677 ret_val = nbmin;
00678 return ret_val;
00679
00680 L300:
00681
00682 /* ISPEC = 3: crossover point */
00683
00684 nx = 0;
00685 if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
00686 if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
00687 ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
00688 ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
00689 {
00690 if (sname) {
00691 nx = 128;
00692 } else {
00693 nx = 128;
00694 }
00695 } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
00696 if (sname) {
00697 nx = 128;
00698 } else {
00699 nx = 128;
00700 }
00701 } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
00702 if (sname) {
00703 nx = 128;
00704 } else {
00705 nx = 128;
00706 }
00707 }
00708 } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
00709 if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00710 nx = 32;
00711 }
00712 } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
00713 if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
00714 nx = 32;
00715 }
00716 } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
00717 if (*(unsigned char *)c3 == 'G') {
00718 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00719 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00720 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00721 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00722 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00723 ftnlen)2, (ftnlen)2) == 0) {
00724 nx = 128;
00725 }
00726 }
00727 } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
00728 if (*(unsigned char *)c3 == 'G') {
00729 if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
00730 (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
00731 ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
00732 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
00733 c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
00734 ftnlen)2, (ftnlen)2) == 0) {
00735 nx = 128;
00736 }
00737 }
00738 }
00739 ret_val = nx;
00740 return ret_val;
00741
00742 L400:
00743
00744 /* ISPEC = 4: number of shifts (used by xHSEQR) */
00745
00746 ret_val = 6;
00747 return ret_val;
00748
00749 L500:
00750
00751 /* ISPEC = 5: minimum column dimension (not used) */
00752
00753 ret_val = 2;
00754 return ret_val;
00755
00756 L600:
00757
00758 /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
00759
00760 ret_val = (integer) ((real) f2cmin(*n1,*n2) * 1.6f);
00761 return ret_val;
00762
00763 L700:
00764
00765 /* ISPEC = 7: number of processors (not used) */
00766
00767 ret_val = 1;
00768 return ret_val;
00769
00770 L800:
00771
00772 /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
00773
00774 ret_val = 50;
00775 return ret_val;
00776
00777 L900:
00778
00779 /* ISPEC = 9: maximum size of the subproblems at the bottom of the
00780 computation tree in the divide-and-conquer algorithm
00781 (used by xGELSD and xGESDD) */
00782
00783 ret_val = 25;
00784 return ret_val;
00785
00786 L1000:
00787
00788 /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
00789
00790 ILAENV = 0 */
00791 ret_val = 1;
00792 if (ret_val == 1) {
00793 ret_val = ieeeck_(&c__0, &c_b162, &c_b163);
00794 }
00795 return ret_val;
00796
00797 L1100:
00798
00799 /* ISPEC = 11: infinity arithmetic can be trusted not to trap
00800
00801 ILAENV = 0 */
00802 ret_val = 1;
00803 if (ret_val == 1) {
00804 ret_val = ieeeck_(&c__1, &c_b162, &c_b163);
00805 }
00806 return ret_val;
00807
00808 /* End of ILAENV */
00809
00810 } /* ilaenv_ */
|
|
||||||||||||||||
|
Definition at line 14323 of file lapackblas.cpp. References dabs, integer, and real. Referenced by slaed2_(), and slaed8_(). 14324 {
14325 /* System generated locals */
14326 integer ret_val, i__1;
14327 real r__1;
14328 /* Local variables */
14329 static real smax;
14330 static integer i__, ix;
14331 /* finds the index of element having max. absolute value.
14332 jack dongarra, linpack, 3/11/78.
14333 modified 3/93 to return if incx .le. 0.
14334 modified 12/3/93, array(1) declarations changed to array(*)
14335 Parameter adjustments */
14336 --sx;
14337 /* Function Body */
14338 ret_val = 0;
14339 if (*n < 1 || *incx <= 0) {
14340 return ret_val;
14341 }
14342 ret_val = 1;
14343 if (*n == 1) {
14344 return ret_val;
14345 }
14346 if (*incx == 1) {
14347 goto L20;
14348 }
14349 /* code for increment not equal to 1 */
14350 ix = 1;
14351 smax = dabs(sx[1]);
14352 ix += *incx;
14353 i__1 = *n;
14354 for (i__ = 2; i__ <= i__1; ++i__) {
14355 if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
14356 goto L5;
14357 }
14358 ret_val = i__;
14359 smax = (r__1 = sx[ix], dabs(r__1));
14360 L5:
14361 ix += *incx;
14362 /* L10: */
14363 }
14364 return ret_val;
14365 /* code for increment equal to 1 */
14366 L20:
14367 smax = dabs(sx[1]);
14368 i__1 = *n;
14369 for (i__ = 2; i__ <= i__1; ++i__) {
14370 if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
14371 goto L30;
14372 }
14373 ret_val = i__;
14374 smax = (r__1 = sx[i__], dabs(r__1));
14375 L30:
14376 ;
14377 }
14378 return ret_val;
14379 } /* isamax_ */
|
|
||||||||||||
|
Definition at line 814 of file lapackblas.cpp. References integer, and logical. Referenced by sbdsqr_(), sgemm_(), sgemv_(), sgesvd_(), slacpy_(), slamch_(), slange_(), slanst_(), slansy_(), slarf_(), slarfb_(), slarft_(), slascl_(), slaset_(), slasr_(), slasrt_(), slatrd_(), sorgbr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), sstevd_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_(). 00815 {
00816 /* -- LAPACK auxiliary routine (version 3.0) --
00817 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
00818 Courant Institute, Argonne National Lab, and Rice University
00819 September 30, 1994
00820
00821
00822 Purpose
00823 =======
00824
00825 LSAME returns .TRUE. if CA is the same letter as CB regardless of
00826 case.
00827
00828 Arguments
00829 =========
00830
00831 CA (input) CHARACTER*1
00832 CB (input) CHARACTER*1
00833 CA and CB specify the single characters to be compared.
00834
00835 =====================================================================
00836
00837
00838
00839 Test if the characters are equal */
00840 /* System generated locals */
00841 logical ret_val;
00842 /* Local variables */
00843 static integer inta, intb, zcode;
00844
00845
00846 ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
00847 if (ret_val) {
00848 return ret_val;
00849 }
00850
00851 /* Now test for equivalence if both characters are alphabetic. */
00852
00853 zcode = 'Z';
00854
00855 /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
00856 machines, on which ICHAR returns a value with bit 8 set.
00857 ICHAR('A') on Prime machines returns 193 which is the same as
00858 ICHAR('A') on an EBCDIC machine. */
00859
00860 inta = *(unsigned char *)ca;
00861 intb = *(unsigned char *)cb;
00862
00863 if (zcode == 90 || zcode == 122) {
00864
00865 /* ASCII is assumed - ZCODE is the ASCII code of either lower o
00866 r
00867 upper case 'Z'. */
00868
00869 if (inta >= 97 && inta <= 122) {
00870 inta += -32;
00871 }
00872 if (intb >= 97 && intb <= 122) {
00873 intb += -32;
00874 }
00875
00876 } else if (zcode == 233 || zcode == 169) {
00877
00878 /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower
00879 or
00880 upper case 'Z'. */
00881
00882 if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta
00883 >= 162 && inta <= 169) {
00884 inta += 64;
00885 }
00886 if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb
00887 >= 162 && intb <= 169) {
00888 intb += 64;
00889 }
00890
00891 } else if (zcode == 218 || zcode == 250) {
00892
00893 /* ASCII is assumed, on Prime machines - ZCODE is the ASCII cod
00894 e
00895 plus 128 of either lower or upper case 'Z'. */
00896
00897 if (inta >= 225 && inta <= 250) {
00898 inta += -32;
00899 }
00900 if (intb >= 225 && intb <= 250) {
00901 intb += -32;
00902 }
00903 }
00904 ret_val = inta == intb;
00905
00906 /* RETURN
00907
00908 End of LSAME */
00909
00910 return ret_val;
00911 } /* lsame_ */
|
|
||||||||||||
|
Definition at line 952 of file lapackblas.cpp. Referenced by slaed0_(), slaed7_(), slaeda_(), and sstedc_(). 00954 {
00955 integer pow, x, n;
00956 unsigned long u;
00957
00958 x = *ap;
00959 n = *bp;
00960
00961 if (n <= 0) {
00962 if (n == 0 || x == 1)
00963 return 1;
00964 if (x != -1)
00965 return x != 0 ? 1/x : 0;
00966 n = -n;
00967 }
00968 u = n;
00969 for(pow = 1; ; )
00970 {
00971 if(u & 01)
00972 pow *= x;
00973 if(u >>= 1)
00974 x *= x;
00975 else
00976 break;
00977 }
00978 return(pow);
00979 }
|
|
||||||||||||
|
Definition at line 918 of file lapackblas.cpp. Referenced by slaed6_(), slamc2_(), slamch_(), and slartg_(). 00920 {
00921 double pow, x;
00922 integer n;
00923 unsigned long u;
00924
00925 pow = 1;
00926 x = *ap;
00927 n = *bp;
00928
00929 if(n != 0)
00930 {
00931 if(n < 0)
00932 {
00933 n = -n;
00934 x = 1/x;
00935 }
00936 for(u = n; ; )
00937 {
00938 if(u & 01)
00939 pow *= x;
00940 if(u >>= 1)
00941 x *= x;
00942 else
00943 break;
00944 }
00945 }
00946 return(pow);
00947 }
|
|
||||||||||||
|
Definition at line 984 of file lapackblas.cpp. Referenced by sbdsqr_(), slaed3_(), slaed9_(), slarfg_(), slasv2_(), ssteqr_(), and ssterf_().
|
|
||||||||||||||||||||||||
|
Definition at line 37 of file lapackblas.cpp. References ftnlen. Referenced by sgesvd_(), sormbr_(), sormlq_(), and sormqr_(). 00039 {
00040 ftnlen i, n, nc;
00041 const char *f__rp;
00042
00043 n = (int)*np;
00044 for(i = 0 ; i < n ; ++i) {
00045 nc = ll;
00046 if(rnp[i] < nc) nc = rnp[i];
00047 ll -= nc;
00048 f__rp = rpp[i];
00049 while(--nc >= 0) *lp++ = *f__rp++;
00050 }
00051 while(--ll >= 0)
00052 *lp++ = ' ';
00053 return 0;
00054 }
|
|
||||||||||||||||||||
|
Definition at line 1071 of file lapackblas.cpp. 01073 {
01074 register unsigned char *a, *aend, *b, *bend;
01075 a = (unsigned char *)a0;
01076 b = (unsigned char *)b0;
01077 aend = a + la;
01078 bend = b + lb;
01079
01080 if(la <= lb)
01081 {
01082 while(a < aend)
01083 if(*a != *b)
01084 return( *a - *b );
01085 else
01086 { ++a; ++b; }
01087
01088 while(b < bend)
01089 if(*b != ' ')
01090 return( ' ' - *b );
01091 else ++b;
01092 }
01093
01094 else
01095 {
01096 while(b < bend)
01097 if(*a == *b)
01098 { ++a; ++b; }
01099 else
01100 return( *a - *b );
01101 while(a < aend)
01102 if(*a != ' ')
01103 return(*a - ' ');
01104 else ++a;
01105 }
01106 return(0);
01107 }
|
|
||||||||||||||||||||
|
Definition at line 1121 of file lapackblas.cpp. 01123 {
01124 register char *aend;
01125 const register char *bend;
01126
01127 aend = a + la;
01128
01129 if(la <= lb)
01130 #ifndef NO_OVERWRITE
01131 if (a <= b || a >= b + la)
01132 #endif
01133 while(a < aend)
01134 *a++ = *b++;
01135 #ifndef NO_OVERWRITE
01136 else
01137 for(b += la; a < aend; )
01138 *--aend = *--b;
01139 #endif
01140
01141 else {
01142 bend = b + lb;
01143 #ifndef NO_OVERWRITE
01144 if (a <= b || a >= bend)
01145 #endif
01146 while(b < bend)
01147 *a++ = *b++;
01148 #ifndef NO_OVERWRITE
01149 else {
01150 a += lb;
01151 while(b < bend)
01152 *--a = *--bend;
01153 a += lb;
01154 }
01155 #endif
01156 while(a < aend)
01157 *a++ = ' ';
01158 }
01159 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 994 of file lapackblas.cpp. References integer. Referenced by EMAN::PCA::dopca_ooc(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_(). 00996 {
00997 /* System generated locals */
00998 integer i__1;
00999 /* Local variables */
01000 static integer i__, m, ix, iy, mp1;
01001 /* constant times a vector plus a vector.
01002 uses unrolled loop for increments equal to one.
01003 jack dongarra, linpack, 3/11/78.
01004 modified 12/3/93, array(1) declarations changed to array(*)
01005 Parameter adjustments */
01006 --sy;
01007 --sx;
01008 /* Function Body */
01009 if (*n <= 0) {
01010 return 0;
01011 }
01012 if (*sa == 0.f) {
01013 return 0;
01014 }
01015 if (*incx == 1 && *incy == 1) {
01016 goto L20;
01017 }
01018 /* code for unequal increments or equal increments
01019 not equal to 1 */
01020 ix = 1;
01021 iy = 1;
01022 if (*incx < 0) {
01023 ix = (-(*n) + 1) * *incx + 1;
01024 }
01025 if (*incy < 0) {
01026 iy = (-(*n) + 1) * *incy + 1;
01027 }
01028 i__1 = *n;
01029 for (i__ = 1; i__ <= i__1; ++i__) {
01030 sy[iy] += *sa * sx[ix];
01031 ix += *incx;
01032 iy += *incy;
01033 /* L10: */
01034 }
01035 return 0;
01036 /* code for both increments equal to 1
01037 clean-up loop */
01038 L20:
01039 m = *n % 4;
01040 if (m == 0) {
01041 goto L40;
01042 }
01043 i__1 = m;
01044 for (i__ = 1; i__ <= i__1; ++i__) {
01045 sy[i__] += *sa * sx[i__];
01046 /* L30: */
01047 }
01048 if (*n < 4) {
01049 return 0;
01050 }
01051 L40:
01052 mp1 = m + 1;
01053 i__1 = *n;
01054 for (i__ = mp1; i__ <= i__1; i__ += 4) {
01055 sy[i__] += *sa * sx[i__];
01056 sy[i__ + 1] += *sa * sx[i__ + 1];
01057 sy[i__ + 2] += *sa * sx[i__ + 2];
01058 sy[i__ + 3] += *sa * sx[i__ + 3];
01059 /* L50: */
01060 }
01061 return 0;
01062 } /* saxpy_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 22954 of file lapackblas.cpp. References c__1, c___ref, c_b15, c_b49, c_b72, dabs, df2cmax, df2cmin, doublereal, f2cmax, integer, logical, lsame_(), r_sign(), real, slamch_(), slartg_(), slas2_(), slasq1_(), slasr_(), slasv2_(), sqrt(), srot_(), sscal_(), sswap_(), u_ref, vt_ref, and xerbla_(). Referenced by sgesvd_(). 22957 {
22958 /* System generated locals */
22959 integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
22960 i__2;
22961 real r__1, r__2, r__3, r__4;
22962 doublereal d__1;
22963
22964 /* Builtin functions */
22965 // double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *, real *);
22966
22967 /* Local variables */
22968 static real abse;
22969 static integer idir;
22970 static real abss;
22971 static integer oldm;
22972 static real cosl;
22973 static integer isub, iter;
22974 static real unfl, sinl, cosr, smin, smax, sinr;
22975 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
22976 integer *, real *, real *), slas2_(real *, real *, real *, real *,
22977 real *);
22978 static real f, g, h__;
22979 static integer i__, j, m;
22980 static real r__;
22981 extern logical lsame_(const char *, const char *);
22982 static real oldcs;
22983 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
22984 static integer oldll;
22985 static real shift, sigmn, oldsn;
22986 static integer maxit;
22987 static real sminl;
22988 extern /* Subroutine */ int slasr_(const char *, const char *, const char *, integer *,
22989 integer *, real *, real *, real *, integer *);
22990 static real sigmx;
22991 static logical lower;
22992 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
22993 integer *), slasq1_(integer *, real *, real *, real *, integer *),
22994 slasv2_(real *, real *, real *, real *, real *, real *, real *,
22995 real *, real *);
22996 static real cs;
22997 static integer ll;
22998 static real sn, mu;
22999 extern doublereal slamch_(const char *);
23000 extern /* Subroutine */ int xerbla_(const char *, integer *);
23001 static real sminoa;
23002 extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
23003 );
23004 static real thresh;
23005 static logical rotate;
23006 static real sminlo;
23007 static integer nm1;
23008 static real tolmul;
23009 static integer nm12, nm13, lll;
23010 static real eps, sll, tol;
23011
23012
23013 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
23014 #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
23015 #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
23016
23017
23018 /* -- LAPACK routine (version 3.0) --
23019 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
23020 Courant Institute, Argonne National Lab, and Rice University
23021 October 31, 1999
23022
23023
23024 Purpose
23025 =======
23026
23027 SBDSQR computes the singular value decomposition (SVD) of a real
23028 N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P'
23029 denotes the transpose of P), where S is a diagonal matrix with
23030 non-negative diagonal elements (the singular values of B), and Q
23031 and P are orthogonal matrices.
23032
23033 The routine computes S, and optionally computes U * Q, P' * VT,
23034 or Q' * C, for given real input matrices U, VT, and C.
23035
23036 See "Computing Small Singular Values of Bidiagonal Matrices With
23037 Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
23038 LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
23039 no. 5, pp. 873-912, Sept 1990) and
23040 "Accurate singular values and differential qd algorithms," by
23041 B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
23042 Department, University of California at Berkeley, July 1992
23043 for a detailed description of the algorithm.
23044
23045 Arguments
23046 =========
23047
23048 UPLO (input) CHARACTER*1
23049 = 'U': B is upper bidiagonal;
23050 = 'L': B is lower bidiagonal.
23051
23052 N (input) INTEGER
23053 The order of the matrix B. N >= 0.
23054
23055 NCVT (input) INTEGER
23056 The number of columns of the matrix VT. NCVT >= 0.
23057
23058 NRU (input) INTEGER
23059 The number of rows of the matrix U. NRU >= 0.
23060
23061 NCC (input) INTEGER
23062 The number of columns of the matrix C. NCC >= 0.
23063
23064 D (input/output) REAL array, dimension (N)
23065 On entry, the n diagonal elements of the bidiagonal matrix B.
23066 On exit, if INFO=0, the singular values of B in decreasing
23067 order.
23068
23069 E (input/output) REAL array, dimension (N)
23070 On entry, the elements of E contain the
23071 offdiagonal elements of the bidiagonal matrix whose SVD
23072 is desired. On normal exit (INFO = 0), E is destroyed.
23073 If the algorithm does not converge (INFO > 0), D and E
23074 will contain the diagonal and superdiagonal elements of a
23075 bidiagonal matrix orthogonally equivalent to the one given
23076 as input. E(N) is used for workspace.
23077
23078 VT (input/output) REAL array, dimension (LDVT, NCVT)
23079 On entry, an N-by-NCVT matrix VT.
23080 On exit, VT is overwritten by P' * VT.
23081 VT is not referenced if NCVT = 0.
23082
23083 LDVT (input) INTEGER
23084 The leading dimension of the array VT.
23085 LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
23086
23087 U (input/output) REAL array, dimension (LDU, N)
23088 On entry, an NRU-by-N matrix U.
23089 On exit, U is overwritten by U * Q.
23090 U is not referenced if NRU = 0.
23091
23092 LDU (input) INTEGER
23093 The leading dimension of the array U. LDU >= max(1,NRU).
23094
23095 C (input/output) REAL array, dimension (LDC, NCC)
23096 On entry, an N-by-NCC matrix C.
23097 On exit, C is overwritten by Q' * C.
23098 C is not referenced if NCC = 0.
23099
23100 LDC (input) INTEGER
23101 The leading dimension of the array C.
23102 LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
23103
23104 WORK (workspace) REAL array, dimension (4*N)
23105
23106 INFO (output) INTEGER
23107 = 0: successful exit
23108 < 0: If INFO = -i, the i-th argument had an illegal value
23109 > 0: the algorithm did not converge; D and E contain the
23110 elements of a bidiagonal matrix which is orthogonally
23111 similar to the input matrix B; if INFO = i, i
23112 elements of E have not converged to zero.
23113
23114 Internal Parameters
23115 ===================
23116
23117 TOLMUL REAL, default = max(10,min(100,EPS**(-1/8)))
23118 TOLMUL controls the convergence criterion of the QR loop.
23119 If it is positive, TOLMUL*EPS is the desired relative
23120 precision in the computed singular values.
23121 If it is negative, abs(TOLMUL*EPS*sigma_max) is the
23122 desired absolute accuracy in the computed singular
23123 values (corresponds to relative accuracy
23124 abs(TOLMUL*EPS) in the largest singular value.
23125 abs(TOLMUL) should be between 1 and 1/EPS, and preferably
23126 between 10 (for fast convergence) and .1/EPS
23127 (for there to be some accuracy in the results).
23128 Default is to lose at either one eighth or 2 of the
23129 available decimal digits in each computed singular value
23130 (whichever is smaller).
23131
23132 MAXITR INTEGER, default = 6
23133 MAXITR controls the maximum number of passes of the
23134 algorithm through its inner loop. The algorithms stops
23135 (and so fails to converge) if the number of passes
23136 through the inner loop exceeds MAXITR*N**2.
23137
23138 =====================================================================
23139
23140
23141 Test the input parameters.
23142
23143 Parameter adjustments */
23144 --d__;
23145 --e;
23146 vt_dim1 = *ldvt;
23147 vt_offset = 1 + vt_dim1 * 1;
23148 vt -= vt_offset;
23149 u_dim1 = *ldu;
23150 u_offset = 1 + u_dim1 * 1;
23151 u -= u_offset;
23152 c_dim1 = *ldc;
23153 c_offset = 1 + c_dim1 * 1;
23154 c__ -= c_offset;
23155 --work;
23156
23157 /* Function Body */
23158 *info = 0;
23159 lower = lsame_(uplo, "L");
23160 if (! lsame_(uplo, "U") && ! lower) {
23161 *info = -1;
23162 } else if (*n < 0) {
23163 *info = -2;
23164 } else if (*ncvt < 0) {
23165 *info = -3;
23166 } else if (*nru < 0) {
23167 *info = -4;
23168 } else if (*ncc < 0) {
23169 *info = -5;
23170 } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < f2cmax(1,*n)) {
23171 *info = -9;
23172 } else if (*ldu < f2cmax(1,*nru)) {
23173 *info = -11;
23174 } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < f2cmax(1,*n)) {
23175 *info = -13;
23176 }
23177 if (*info != 0) {
23178 i__1 = -(*info);
23179 xerbla_("SBDSQR", &i__1);
23180 return 0;
23181 }
23182 if (*n == 0) {
23183 return 0;
23184 }
23185 if (*n == 1) {
23186 goto L160;
23187 }
23188
23189 /* ROTATE is true if any singular vectors desired, false otherwise */
23190
23191 rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
23192
23193 /* If no singular vectors desired, use qd algorithm */
23194
23195 if (! rotate) {
23196 slasq1_(n, &d__[1], &e[1], &work[1], info);
23197 return 0;
23198 }
23199
23200 nm1 = *n - 1;
23201 nm12 = nm1 + nm1;
23202 nm13 = nm12 + nm1;
23203 idir = 0;
23204
23205 /* Get machine constants */
23206
23207 eps = slamch_("Epsilon");
23208 unfl = slamch_("Safe minimum");
23209
23210 /* If matrix lower bidiagonal, rotate to be upper bidiagonal
23211 by applying Givens rotations on the left */
23212
23213 if (lower) {
23214 i__1 = *n - 1;
23215 for (i__ = 1; i__ <= i__1; ++i__) {
23216 slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
23217 d__[i__] = r__;
23218 e[i__] = sn * d__[i__ + 1];
23219 d__[i__ + 1] = cs * d__[i__ + 1];
23220 work[i__] = cs;
23221 work[nm1 + i__] = sn;
23222 /* L10: */
23223 }
23224
23225 /* Update singular vectors if desired */
23226
23227 if (*nru > 0) {
23228 slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
23229 ldu);
23230 }
23231 if (*ncc > 0) {
23232 slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
23233 ldc);
23234 }
23235 }
23236
23237 /* Compute singular values to relative accuracy TOL
23238 (By setting TOL to be negative, algorithm will compute
23239 singular values to absolute accuracy ABS(TOL)*norm(input matrix))
23240
23241 Computing MAX
23242 Computing MIN */
23243 d__1 = (doublereal) eps;
23244 //chao changed pow_dd to pow
23245 r__3 = 100.f, r__4 = pow(d__1, c_b15);
23246 r__1 = 10.f, r__2 = df2cmin(r__3,r__4);
23247 tolmul = df2cmax(r__1,r__2);
23248 tol = tolmul * eps;
23249
23250 /* Compute approximate maximum, minimum singular values */
23251
23252 smax = 0.f;
23253 i__1 = *n;
23254 for (i__ = 1; i__ <= i__1; ++i__) {
23255 /* Computing MAX */
23256 r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
23257 smax = df2cmax(r__2,r__3);
23258 /* L20: */
23259 }
23260 i__1 = *n - 1;
23261 for (i__ = 1; i__ <= i__1; ++i__) {
23262 /* Computing MAX */
23263 r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
23264 smax = df2cmax(r__2,r__3);
23265 /* L30: */
23266 }
23267 sminl = 0.f;
23268 if (tol >= 0.f) {
23269
23270 /* Relative accuracy desired */
23271
23272 sminoa = dabs(d__[1]);
23273 if (sminoa == 0.f) {
23274 goto L50;
23275 }
23276 mu = sminoa;
23277 i__1 = *n;
23278 for (i__ = 2; i__ <= i__1; ++i__) {
23279 mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ -
23280 1], dabs(r__1))));
23281 sminoa = df2cmin(sminoa,mu);
23282 if (sminoa == 0.f) {
23283 goto L50;
23284 }
23285 /* L40: */
23286 }
23287 L50:
23288 sminoa /= sqrt((real) (*n));
23289 /* Computing MAX */
23290 r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
23291 thresh = df2cmax(r__1,r__2);
23292 } else {
23293
23294 /* Absolute accuracy desired
23295
23296 Computing MAX */
23297 r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
23298 thresh = df2cmax(r__1,r__2);
23299 }
23300
23301 /* Prepare for main iteration loop for the singular values
23302 (MAXIT is the maximum number of passes through the inner
23303 loop permitted before nonconvergence signalled.) */
23304
23305 maxit = *n * 6 * *n;
23306 iter = 0;
23307 oldll = -1;
23308 oldm = -1;
23309
23310 /* M points to last element of unconverged part of matrix */
23311
23312 m = *n;
23313
23314 /* Begin main iteration loop */
23315
23316 L60:
23317
23318 /* Check for convergence or exceeding iteration count */
23319
23320 if (m <= 1) {
23321 goto L160;
23322 }
23323 if (iter > maxit) {
23324 goto L200;
23325 }
23326
23327 /* Find diagonal block of matrix to work on */
23328
23329 if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
23330 d__[m] = 0.f;
23331 }
23332 smax = (r__1 = d__[m], dabs(r__1));
23333 smin = smax;
23334 i__1 = m - 1;
23335 for (lll = 1; lll <= i__1; ++lll) {
23336 ll = m - lll;
23337 abss = (r__1 = d__[ll], dabs(r__1));
23338 abse = (r__1 = e[ll], dabs(r__1));
23339 if (tol < 0.f && abss <= thresh) {
23340 d__[ll] = 0.f;
23341 }
23342 if (abse <= thresh) {
23343 goto L80;
23344 }
23345 smin = df2cmin(smin,abss);
23346 /* Computing MAX */
23347 r__1 = f2cmax(smax,abss);
23348 smax = df2cmax(r__1,abse);
23349 /* L70: */
23350 }
23351 ll = 0;
23352 goto L90;
23353 L80:
23354 e[ll] = 0.f;
23355
23356 /* Matrix splits since E(LL) = 0 */
23357
23358 if (ll == m - 1) {
23359
23360 /* Convergence of bottom singular value, return to top of loop */
23361
23362 --m;
23363 goto L60;
23364 }
23365 L90:
23366 ++ll;
23367
23368 /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
23369
23370 if (ll == m - 1) {
23371
23372 /* 2 by 2 block, handle separately */
23373
23374 slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
23375 &sinl, &cosl);
23376 d__[m - 1] = sigmx;
23377 e[m - 1] = 0.f;
23378 d__[m] = sigmn;
23379
23380 /* Compute singular vectors, if desired */
23381
23382 if (*ncvt > 0) {
23383 srot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, &
23384 sinr);
23385 }
23386 if (*nru > 0) {
23387 srot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, &
23388 sinl);
23389 }
23390 if (*ncc > 0) {
23391 srot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, &
23392 sinl);
23393 }
23394 m += -2;
23395 goto L60;
23396 }
23397
23398 /* If working on new submatrix, choose shift direction
23399 (from larger end diagonal element towards smaller) */
23400
23401 if (ll > oldm || m < oldll) {
23402 if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {
23403
23404 /* Chase bulge from top (big end) to bottom (small end) */
23405
23406 idir = 1;
23407 } else {
23408
23409 /* Chase bulge from bottom (big end) to top (small end) */
23410
23411 idir = 2;
23412 }
23413 }
23414
23415 /* Apply convergence tests */
23416
23417 if (idir == 1) {
23418
23419 /* Run convergence test in forward direction
23420 First apply standard test to bottom of matrix */
23421
23422 if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs(
23423 r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <=
23424 thresh) {
23425 e[m - 1] = 0.f;
23426 goto L60;
23427 }
23428
23429 if (tol >= 0.f) {
23430
23431 /* If relative accuracy desired,
23432 apply convergence criterion forward */
23433
23434 mu = (r__1 = d__[ll], dabs(r__1));
23435 sminl = mu;
23436 i__1 = m - 1;
23437 for (lll = ll; lll <= i__1; ++lll) {
23438 if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
23439 e[lll] = 0.f;
23440 goto L60;
23441 }
23442 sminlo = sminl;
23443 mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 =
23444 e[lll], dabs(r__1))));
23445 sminl = df2cmin(sminl,mu);
23446 /* L100: */
23447 }
23448 }
23449
23450 } else {
23451
23452 /* Run convergence test in backward direction
23453 First apply standard test to top of matrix */
23454
23455 if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
23456 r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) {
23457 e[ll] = 0.f;
23458 goto L60;
23459 }
23460
23461 if (tol >= 0.f) {
23462
23463 /* If relative accuracy desired,
23464 apply convergence criterion backward */
23465
23466 mu = (r__1 = d__[m], dabs(r__1));
23467 sminl = mu;
23468 i__1 = ll;
23469 for (lll = m - 1; lll >= i__1; --lll) {
23470 if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
23471 e[lll] = 0.f;
23472 goto L60;
23473 }
23474 sminlo = sminl;
23475 mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
23476 lll], dabs(r__1))));
23477 sminl = df2cmin(sminl,mu);
23478 /* L110: */
23479 }
23480 }
23481 }
23482 oldll = ll;
23483 oldm = m;
23484
23485 /* Compute shift. First, test if shifting would ruin relative
23486 accuracy, and if so set the shift to zero.
23487
23488 Computing MAX */
23489 r__1 = eps, r__2 = tol * .01f;
23490 if (tol >= 0.f && *n * tol * (sminl / smax) <= df2cmax(r__1,r__2)) {
23491
23492 /* Use a zero shift to avoid loss of relative accuracy */
23493
23494 shift = 0.f;
23495 } else {
23496
23497 /* Compute the shift from 2-by-2 block at end of matrix */
23498
23499 if (idir == 1) {
23500 sll = (r__1 = d__[ll], dabs(r__1));
23501 slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
23502 } else {
23503 sll = (r__1 = d__[m], dabs(r__1));
23504 slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
23505 }
23506
23507 /* Test if shift negligible, and if so set to zero */
23508
23509 if (sll > 0.f) {
23510 /* Computing 2nd power */
23511 r__1 = shift / sll;
23512 if (r__1 * r__1 < eps) {
23513 shift = 0.f;
23514 }
23515 }
23516 }
23517
23518 /* Increment iteration count */
23519
23520 iter = iter + m - ll;
23521
23522 /* If SHIFT = 0, do simplified QR iteration */
23523
23524 if (shift == 0.f) {
23525 if (idir == 1) {
23526
23527 /* Chase bulge from top to bottom
23528 Save cosines and sines for later singular vector updates */
23529
23530 cs = 1.f;
23531 oldcs = 1.f;
23532 i__1 = m - 1;
23533 for (i__ = ll; i__ <= i__1; ++i__) {
23534 r__1 = d__[i__] * cs;
23535 slartg_(&r__1, &e[i__], &cs, &sn, &r__);
23536 if (i__ > ll) {
23537 e[i__ - 1] = oldsn * r__;
23538 }
23539 r__1 = oldcs * r__;
23540 r__2 = d__[i__ + 1] * sn;
23541 slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
23542 work[i__ - ll + 1] = cs;
23543 work[i__ - ll + 1 + nm1] = sn;
23544 work[i__ - ll + 1 + nm12] = oldcs;
23545 work[i__ - ll + 1 + nm13] = oldsn;
23546 /* L120: */
23547 }
23548 h__ = d__[m] * cs;
23549 d__[m] = h__ * oldcs;
23550 e[m - 1] = h__ * oldsn;
23551
23552 /* Update singular vectors */
23553
23554 if (*ncvt > 0) {
23555 i__1 = m - ll + 1;
23556 slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
23557 vt_ref(ll, 1), ldvt);
23558 }
23559 if (*nru > 0) {
23560 i__1 = m - ll + 1;
23561 slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
23562 + 1], &u_ref(1, ll), ldu);
23563 }
23564 if (*ncc > 0) {
23565 i__1 = m - ll + 1;
23566 slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
23567 + 1], &c___ref(ll, 1), ldc);
23568 }
23569
23570 /* Test convergence */
23571
23572 if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
23573 e[m - 1] = 0.f;
23574 }
23575
23576 } else {
23577
23578 /* Chase bulge from bottom to top
23579 Save cosines and sines for later singular vector updates */
23580
23581 cs = 1.f;
23582 oldcs = 1.f;
23583 i__1 = ll + 1;
23584 for (i__ = m; i__ >= i__1; --i__) {
23585 r__1 = d__[i__] * cs;
23586 slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
23587 if (i__ < m) {
23588 e[i__] = oldsn * r__;
23589 }
23590 r__1 = oldcs * r__;
23591 r__2 = d__[i__ - 1] * sn;
23592 slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
23593 work[i__ - ll] = cs;
23594 work[i__ - ll + nm1] = -sn;
23595 work[i__ - ll + nm12] = oldcs;
23596 work[i__ - ll + nm13] = -oldsn;
23597 /* L130: */
23598 }
23599 h__ = d__[ll] * cs;
23600 d__[ll] = h__ * oldcs;
23601 e[ll] = h__ * oldsn;
23602
23603 /* Update singular vectors */
23604
23605 if (*ncvt > 0) {
23606 i__1 = m - ll + 1;
23607 slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
23608 nm13 + 1], &vt_ref(ll, 1), ldvt);
23609 }
23610 if (*nru > 0) {
23611 i__1 = m - ll + 1;
23612 slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
23613 1, ll), ldu);
23614 }
23615 if (*ncc > 0) {
23616 i__1 = m - ll + 1;
23617 slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
23618 c___ref(ll, 1), ldc);
23619 }
23620
23621 /* Test convergence */
23622
23623 if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
23624 e[ll] = 0.f;
23625 }
23626 }
23627 } else {
23628
23629 /* Use nonzero shift */
23630
23631 if (idir == 1) {
23632
23633 /* Chase bulge from top to bottom
23634 Save cosines and sines for later singular vector updates */
23635
23636 f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
23637 ll]) + shift / d__[ll]);
23638 g = e[ll];
23639 i__1 = m - 1;
23640 for (i__ = ll; i__ <= i__1; ++i__) {
23641 slartg_(&f, &g, &cosr, &sinr, &r__);
23642 if (i__ > ll) {
23643 e[i__ - 1] = r__;
23644 }
23645 f = cosr * d__[i__] + sinr * e[i__];
23646 e[i__] = cosr * e[i__] - sinr * d__[i__];
23647 g = sinr * d__[i__ + 1];
23648 d__[i__ + 1] = cosr * d__[i__ + 1];
23649 slartg_(&f, &g, &cosl, &sinl, &r__);
23650 d__[i__] = r__;
23651 f = cosl * e[i__] + sinl * d__[i__ + 1];
23652 d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
23653 if (i__ < m - 1) {
23654 g = sinl * e[i__ + 1];
23655 e[i__ + 1] = cosl * e[i__ + 1];
23656 }
23657 work[i__ - ll + 1] = cosr;
23658 work[i__ - ll + 1 + nm1] = sinr;
23659 work[i__ - ll + 1 + nm12] = cosl;
23660 work[i__ - ll + 1 + nm13] = sinl;
23661 /* L140: */
23662 }
23663 e[m - 1] = f;
23664
23665 /* Update singular vectors */
23666
23667 if (*ncvt > 0) {
23668 i__1 = m - ll + 1;
23669 slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
23670 vt_ref(ll, 1), ldvt);
23671 }
23672 if (*nru > 0) {
23673 i__1 = m - ll + 1;
23674 slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
23675 + 1], &u_ref(1, ll), ldu);
23676 }
23677 if (*ncc > 0) {
23678 i__1 = m - ll + 1;
23679 slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
23680 + 1], &c___ref(ll, 1), ldc);
23681 }
23682
23683 /* Test convergence */
23684
23685 if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
23686 e[m - 1] = 0.f;
23687 }
23688
23689 } else {
23690
23691 /* Chase bulge from bottom to top
23692 Save cosines and sines for later singular vector updates */
23693
23694 f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
23695 m]) + shift / d__[m]);
23696 g = e[m - 1];
23697 i__1 = ll + 1;
23698 for (i__ = m; i__ >= i__1; --i__) {
23699 slartg_(&f, &g, &cosr, &sinr, &r__);
23700 if (i__ < m) {
23701 e[i__] = r__;
23702 }
23703 f = cosr * d__[i__] + sinr * e[i__ - 1];
23704 e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
23705 g = sinr * d__[i__ - 1];
23706 d__[i__ - 1] = cosr * d__[i__ - 1];
23707 slartg_(&f, &g, &cosl, &sinl, &r__);
23708 d__[i__] = r__;
23709 f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
23710 d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
23711 if (i__ > ll + 1) {
23712 g = sinl * e[i__ - 2];
23713 e[i__ - 2] = cosl * e[i__ - 2];
23714 }
23715 work[i__ - ll] = cosr;
23716 work[i__ - ll + nm1] = -sinr;
23717 work[i__ - ll + nm12] = cosl;
23718 work[i__ - ll + nm13] = -sinl;
23719 /* L150: */
23720 }
23721 e[ll] = f;
23722
23723 /* Test convergence */
23724
23725 if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
23726 e[ll] = 0.f;
23727 }
23728
23729 /* Update singular vectors if desired */
23730
23731 if (*ncvt > 0) {
23732 i__1 = m - ll + 1;
23733 slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
23734 nm13 + 1], &vt_ref(ll, 1), ldvt);
23735 }
23736 if (*nru > 0) {
23737 i__1 = m - ll + 1;
23738 slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
23739 1, ll), ldu);
23740 }
23741 if (*ncc > 0) {
23742 i__1 = m - ll + 1;
23743 slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
23744 c___ref(ll, 1), ldc);
23745 }
23746 }
23747 }
23748
23749 /* QR iteration finished, go back and check convergence */
23750
23751 goto L60;
23752
23753 /* All singular values converged, so make them positive */
23754
23755 L160:
23756 i__1 = *n;
23757 for (i__ = 1; i__ <= i__1; ++i__) {
23758 if (d__[i__] < 0.f) {
23759 d__[i__] = -d__[i__];
23760
23761 /* Change sign of singular vectors, if desired */
23762
23763 if (*ncvt > 0) {
23764 sscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt);
23765 }
23766 }
23767 /* L170: */
23768 }
23769
23770 /* Sort the singular values into decreasing order (insertion sort on
23771 singular values, but only one transposition per singular vector) */
23772
23773 i__1 = *n - 1;
23774 for (i__ = 1; i__ <= i__1; ++i__) {
23775
23776 /* Scan for smallest D(I) */
23777
23778 isub = 1;
23779 smin = d__[1];
23780 i__2 = *n + 1 - i__;
23781 for (j = 2; j <= i__2; ++j) {
23782 if (d__[j] <= smin) {
23783 isub = j;
23784 smin = d__[j];
23785 }
23786 /* L180: */
23787 }
23788 if (isub != *n + 1 - i__) {
23789
23790 /* Swap singular values and vectors */
23791
23792 d__[isub] = d__[*n + 1 - i__];
23793 d__[*n + 1 - i__] = smin;
23794 if (*ncvt > 0) {
23795 sswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1),
23796 ldvt);
23797 }
23798 if (*nru > 0) {
23799 sswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), &
23800 c__1);
23801 }
23802 if (*ncc > 0) {
23803 sswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1),
23804 ldc);
23805 }
23806 }
23807 /* L190: */
23808 }
23809 goto L220;
23810
23811 /* Maximum number of iterations exceeded, failure to converge */
23812
23813 L200:
23814 *info = 0;
23815 i__1 = *n - 1;
23816 for (i__ = 1; i__ <= i__1; ++i__) {
23817 if (e[i__] != 0.f) {
23818 ++(*info);
23819 }
23820 /* L210: */
23821 }
23822 L220:
23823 return 0;
23824
23825 /* End of SBDSQR */
23826
23827 } /* sbdsqr_ */
|
|
||||||||||||||||||||||||
|
Definition at line 1163 of file lapackblas.cpp. References integer. Referenced by EMAN::PCA::Lanczos_ooc(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed8_(), slaed9_(), slaeda_(), slarfb_(), and slasq1_(). 01165 {
01166 /* System generated locals */
01167 integer i__1;
01168 /* Local variables */
01169 static integer i__, m, ix, iy, mp1;
01170 /* copies a vector, x, to a vector, y.
01171 uses unrolled loops for increments equal to 1.
01172 jack dongarra, linpack, 3/11/78.
01173 modified 12/3/93, array(1) declarations changed to array(*)
01174 Parameter adjustments */
01175 --sy;
01176 --sx;
01177 /* Function Body */
01178 if (*n <= 0) {
01179 return 0;
01180 }
01181 if (*incx == 1 && *incy == 1) {
01182 goto L20;
01183 }
01184 /* code for unequal increments or equal increments
01185 not equal to 1 */
01186 ix = 1;
01187 iy = 1;
01188 if (*incx < 0) {
01189 ix = (-(*n) + 1) * *incx + 1;
01190 }
01191 if (*incy < 0) {
01192 iy = (-(*n) + 1) * *incy + 1;
01193 }
01194 i__1 = *n;
01195 for (i__ = 1; i__ <= i__1; ++i__) {
01196 sy[iy] = sx[ix];
01197 ix += *incx;
01198 iy += *incy;
01199 /* L10: */
01200 }
01201 return 0;
01202 /* code for both increments equal to 1
01203 clean-up loop */
01204 L20:
01205 m = *n % 7;
01206 if (m == 0) {
01207 goto L40;
01208 }
01209 i__1 = m;
01210 for (i__ = 1; i__ <= i__1; ++i__) {
01211 sy[i__] = sx[i__];
01212 /* L30: */
01213 }
01214 if (*n < 7) {
01215 return 0;
01216 }
01217 L40:
01218 mp1 = m + 1;
01219 i__1 = *n;
01220 for (i__ = mp1; i__ <= i__1; i__ += 7) {
01221 sy[i__] = sx[i__];
01222 sy[i__ + 1] = sx[i__ + 1];
01223 sy[i__ + 2] = sx[i__ + 2];
01224 sy[i__ + 3] = sx[i__ + 3];
01225 sy[i__ + 4] = sx[i__ + 4];
01226 sy[i__ + 5] = sx[i__ + 5];
01227 sy[i__ + 6] = sx[i__ + 6];
01228 /* L50: */
01229 }
01230 return 0;
01231 } /* scopy_ */
|
|
||||||||||||||||||||||||
|
Definition at line 1236 of file lapackblas.cpp. References doublereal, integer, and real. Referenced by EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos(), EMAN::PCA::Lanczos_ooc(), slatrd_(), and ssytd2_(). 01237 {
01238 /* System generated locals */
01239 integer i__1;
01240 real ret_val;
01241 /* Local variables */
01242 static integer i__, m;
01243 static real stemp;
01244 static integer ix, iy, mp1;
01245 /* forms the dot product of two vectors.
01246 uses unrolled loops for increments equal to one.
01247 jack dongarra, linpack, 3/11/78.
01248 modified 12/3/93, array(1) declarations changed to array(*)
01249 Parameter adjustments */
01250 --sy;
01251 --sx;
01252 /* Function Body */
01253 stemp = 0.f;
01254 ret_val = 0.f;
01255 if (*n <= 0) {
01256 return ret_val;
01257 }
01258 if (*incx == 1 && *incy == 1) {
01259 goto L20;
01260 }
01261 /* code for unequal increments or equal increments
01262 not equal to 1 */
01263 ix = 1;
01264 iy = 1;
01265 if (*incx < 0) {
01266 ix = (-(*n) + 1) * *incx + 1;
01267 }
01268 if (*incy < 0) {
01269 iy = (-(*n) + 1) * *incy + 1;
01270 }
01271 i__1 = *n;
01272 for (i__ = 1; i__ <= i__1; ++i__) {
01273 stemp += sx[ix] * sy[iy];
01274 ix += *incx;
01275 iy += *incy;
01276 /* L10: */
01277 }
01278 ret_val = stemp;
01279 return ret_val;
01280 /* code for both increments equal to 1
01281 clean-up loop */
01282 L20:
01283 m = *n % 5;
01284 if (m == 0) {
01285 goto L40;
01286 }
01287 i__1 = m;
01288 for (i__ = 1; i__ <= i__1; ++i__) {
01289 stemp += sx[i__] * sy[i__];
01290 /* L30: */
01291 }
01292 if (*n < 5) {
01293 goto L60;
01294 }
01295 L40:
01296 mp1 = m + 1;
01297 i__1 = *n;
01298 for (i__ = mp1; i__ <= i__1; i__ += 5) {
01299 stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
01300 i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ +
01301 4] * sy[i__ + 4];
01302 /* L50: */
01303 }
01304 L60:
01305 ret_val = stemp;
01306 return ret_val;
01307 } /* sdot_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 21343 of file lapackblas.cpp. References a_ref, c__1, f2cmax, f2cmin, integer, real, slarf_(), slarfg_(), and xerbla_(). Referenced by sgebrd_(). 21345 {
21346 /* -- LAPACK routine (version 3.0) --
21347 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21348 Courant Institute, Argonne National Lab, and Rice University
21349 February 29, 1992
21350
21351
21352 Purpose
21353 =======
21354
21355 SGEBD2 reduces a real general m by n matrix A to upper or lower
21356 bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
21357
21358 If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
21359
21360 Arguments
21361 =========
21362
21363 M (input) INTEGER
21364 The number of rows in the matrix A. M >= 0.
21365
21366 N (input) INTEGER
21367 The number of columns in the matrix A. N >= 0.
21368
21369 A (input/output) REAL array, dimension (LDA,N)
21370 On entry, the m by n general matrix to be reduced.
21371 On exit,
21372 if m >= n, the diagonal and the first superdiagonal are
21373 overwritten with the upper bidiagonal matrix B; the
21374 elements below the diagonal, with the array TAUQ, represent
21375 the orthogonal matrix Q as a product of elementary
21376 reflectors, and the elements above the first superdiagonal,
21377 with the array TAUP, represent the orthogonal matrix P as
21378 a product of elementary reflectors;
21379 if m < n, the diagonal and the first subdiagonal are
21380 overwritten with the lower bidiagonal matrix B; the
21381 elements below the first subdiagonal, with the array TAUQ,
21382 represent the orthogonal matrix Q as a product of
21383 elementary reflectors, and the elements above the diagonal,
21384 with the array TAUP, represent the orthogonal matrix P as
21385 a product of elementary reflectors.
21386 See Further Details.
21387
21388 LDA (input) INTEGER
21389 The leading dimension of the array A. LDA >= max(1,M).
21390
21391 D (output) REAL array, dimension (min(M,N))
21392 The diagonal elements of the bidiagonal matrix B:
21393 D(i) = A(i,i).
21394
21395 E (output) REAL array, dimension (min(M,N)-1)
21396 The off-diagonal elements of the bidiagonal matrix B:
21397 if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
21398 if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
21399
21400 TAUQ (output) REAL array dimension (min(M,N))
21401 The scalar factors of the elementary reflectors which
21402 represent the orthogonal matrix Q. See Further Details.
21403
21404 TAUP (output) REAL array, dimension (min(M,N))
21405 The scalar factors of the elementary reflectors which
21406 represent the orthogonal matrix P. See Further Details.
21407
21408 WORK (workspace) REAL array, dimension (max(M,N))
21409
21410 INFO (output) INTEGER
21411 = 0: successful exit.
21412 < 0: if INFO = -i, the i-th argument had an illegal value.
21413
21414 Further Details
21415 ===============
21416
21417 The matrices Q and P are represented as products of elementary
21418 reflectors:
21419
21420 If m >= n,
21421
21422 Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
21423
21424 Each H(i) and G(i) has the form:
21425
21426 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
21427
21428 where tauq and taup are real scalars, and v and u are real vectors;
21429 v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
21430 u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
21431 tauq is stored in TAUQ(i) and taup in TAUP(i).
21432
21433 If m < n,
21434
21435 Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
21436
21437 Each H(i) and G(i) has the form:
21438
21439 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
21440
21441 where tauq and taup are real scalars, and v and u are real vectors;
21442 v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
21443 u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
21444 tauq is stored in TAUQ(i) and taup in TAUP(i).
21445
21446 The contents of A on exit are illustrated by the following examples:
21447
21448 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
21449
21450 ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
21451 ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
21452 ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
21453 ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
21454 ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
21455 ( v1 v2 v3 v4 v5 )
21456
21457 where d and e denote diagonal and off-diagonal elements of B, vi
21458 denotes an element of the vector defining H(i), and ui an element of
21459 the vector defining G(i).
21460
21461 =====================================================================
21462
21463
21464 Test the input parameters
21465
21466 Parameter adjustments */
21467 /* Table of constant values */
21468 static integer c__1 = 1;
21469
21470 /* System generated locals */
21471 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
21472 /* Local variables */
21473 static integer i__;
21474 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *,
21475 integer *, real *, real *, integer *, real *), xerbla_(
21476 const char *, integer *), slarfg_(integer *, real *, real *,
21477 integer *, real *);
21478 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
21479
21480
21481 a_dim1 = *lda;
21482 a_offset = 1 + a_dim1 * 1;
21483 a -= a_offset;
21484 --d__;
21485 --e;
21486 --tauq;
21487 --taup;
21488 --work;
21489
21490 /* Function Body */
21491 *info = 0;
21492 if (*m < 0) {
21493 *info = -1;
21494 } else if (*n < 0) {
21495 *info = -2;
21496 } else if (*lda < f2cmax(1,*m)) {
21497 *info = -4;
21498 }
21499 if (*info < 0) {
21500 i__1 = -(*info);
21501 xerbla_("SGEBD2", &i__1);
21502 return 0;
21503 }
21504
21505 if (*m >= *n) {
21506
21507 /* Reduce to upper bidiagonal form */
21508
21509 i__1 = *n;
21510 for (i__ = 1; i__ <= i__1; ++i__) {
21511
21512 /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
21513
21514 Computing MIN */
21515 i__2 = i__ + 1;
21516 i__3 = *m - i__ + 1;
21517 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1,
21518 &tauq[i__]);
21519 d__[i__] = a_ref(i__, i__);
21520 a_ref(i__, i__) = 1.f;
21521
21522 /* Apply H(i) to A(i:m,i+1:n) from the left */
21523
21524 i__2 = *m - i__ + 1;
21525 i__3 = *n - i__;
21526 slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tauq[i__],
21527 &a_ref(i__, i__ + 1), lda, &work[1]);
21528 a_ref(i__, i__) = d__[i__];
21529
21530 if (i__ < *n) {
21531
21532 /* Generate elementary reflector G(i) to annihilate
21533 A(i,i+2:n)
21534
21535 Computing MIN */
21536 i__2 = i__ + 2;
21537 i__3 = *n - i__;
21538 slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n))
21539 , lda, &taup[i__]);
21540 e[i__] = a_ref(i__, i__ + 1);
21541 a_ref(i__, i__ + 1) = 1.f;
21542
21543 /* Apply G(i) to A(i+1:m,i+1:n) from the right */
21544
21545 i__2 = *m - i__;
21546 i__3 = *n - i__;
21547 slarf_("Right", &i__2, &i__3, &a_ref(i__, i__ + 1), lda, &
21548 taup[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]);
21549 a_ref(i__, i__ + 1) = e[i__];
21550 } else {
21551 taup[i__] = 0.f;
21552 }
21553 /* L10: */
21554 }
21555 } else {
21556
21557 /* Reduce to lower bidiagonal form */
21558
21559 i__1 = *m;
21560 for (i__ = 1; i__ <= i__1; ++i__) {
21561
21562 /* Generate elementary reflector G(i) to annihilate A(i,i+1:n)
21563
21564 Computing MIN */
21565 i__2 = i__ + 1;
21566 i__3 = *n - i__ + 1;
21567 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &
21568 taup[i__]);
21569 d__[i__] = a_ref(i__, i__);
21570 a_ref(i__, i__) = 1.f;
21571
21572 /* Apply G(i) to A(i+1:m,i:n) from the right
21573
21574 Computing MIN */
21575 i__2 = i__ + 1;
21576 i__3 = *m - i__;
21577 i__4 = *n - i__ + 1;
21578 slarf_("Right", &i__3, &i__4, &a_ref(i__, i__), lda, &taup[i__], &
21579 a_ref(f2cmin(i__2,*m), i__), lda, &work[1]);
21580 a_ref(i__, i__) = d__[i__];
21581
21582 if (i__ < *m) {
21583
21584 /* Generate elementary reflector H(i) to annihilate
21585 A(i+2:m,i)
21586
21587 Computing MIN */
21588 i__2 = i__ + 2;
21589 i__3 = *m - i__;
21590 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__)
21591 , &c__1, &tauq[i__]);
21592 e[i__] = a_ref(i__ + 1, i__);
21593 a_ref(i__ + 1, i__) = 1.f;
21594
21595 /* Apply H(i) to A(i+1:m,i+1:n) from the left */
21596
21597 i__2 = *m - i__;
21598 i__3 = *n - i__;
21599 slarf_("Left", &i__2, &i__3, &a_ref(i__ + 1, i__), &c__1, &
21600 tauq[i__], &a_ref(i__ + 1, i__ + 1), lda, &work[1]);
21601 a_ref(i__ + 1, i__) = e[i__];
21602 } else {
21603 tauq[i__] = 0.f;
21604 }
21605 /* L20: */
21606 }
21607 }
21608 return 0;
21609
21610 /* End of SGEBD2 */
21611
21612 } /* sgebd2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 21027 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, sgebd2_(), sgemm_(), slabrd_(), and xerbla_(). Referenced by sgesvd_(). 21030 {
21031 /* -- LAPACK routine (version 3.0) --
21032 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21033 Courant Institute, Argonne National Lab, and Rice University
21034 June 30, 1999
21035
21036
21037 Purpose
21038 =======
21039
21040 SGEBRD reduces a general real M-by-N matrix A to upper or lower
21041 bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
21042
21043 If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
21044
21045 Arguments
21046 =========
21047
21048 M (input) INTEGER
21049 The number of rows in the matrix A. M >= 0.
21050
21051 N (input) INTEGER
21052 The number of columns in the matrix A. N >= 0.
21053
21054 A (input/output) REAL array, dimension (LDA,N)
21055 On entry, the M-by-N general matrix to be reduced.
21056 On exit,
21057 if m >= n, the diagonal and the first superdiagonal are
21058 overwritten with the upper bidiagonal matrix B; the
21059 elements below the diagonal, with the array TAUQ, represent
21060 the orthogonal matrix Q as a product of elementary
21061 reflectors, and the elements above the first superdiagonal,
21062 with the array TAUP, represent the orthogonal matrix P as
21063 a product of elementary reflectors;
21064 if m < n, the diagonal and the first subdiagonal are
21065 overwritten with the lower bidiagonal matrix B; the
21066 elements below the first subdiagonal, with the array TAUQ,
21067 represent the orthogonal matrix Q as a product of
21068 elementary reflectors, and the elements above the diagonal,
21069 with the array TAUP, represent the orthogonal matrix P as
21070 a product of elementary reflectors.
21071 See Further Details.
21072
21073 LDA (input) INTEGER
21074 The leading dimension of the array A. LDA >= max(1,M).
21075
21076 D (output) REAL array, dimension (min(M,N))
21077 The diagonal elements of the bidiagonal matrix B:
21078 D(i) = A(i,i).
21079
21080 E (output) REAL array, dimension (min(M,N)-1)
21081 The off-diagonal elements of the bidiagonal matrix B:
21082 if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
21083 if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
21084
21085 TAUQ (output) REAL array dimension (min(M,N))
21086 The scalar factors of the elementary reflectors which
21087 represent the orthogonal matrix Q. See Further Details.
21088
21089 TAUP (output) REAL array, dimension (min(M,N))
21090 The scalar factors of the elementary reflectors which
21091 represent the orthogonal matrix P. See Further Details.
21092
21093 WORK (workspace/output) REAL array, dimension (LWORK)
21094 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
21095
21096 LWORK (input) INTEGER
21097 The length of the array WORK. LWORK >= max(1,M,N).
21098 For optimum performance LWORK >= (M+N)*NB, where NB
21099 is the optimal blocksize.
21100
21101 If LWORK = -1, then a workspace query is assumed; the routine
21102 only calculates the optimal size of the WORK array, returns
21103 this value as the first entry of the WORK array, and no error
21104 message related to LWORK is issued by XERBLA.
21105
21106 INFO (output) INTEGER
21107 = 0: successful exit
21108 < 0: if INFO = -i, the i-th argument had an illegal value.
21109
21110 Further Details
21111 ===============
21112
21113 The matrices Q and P are represented as products of elementary
21114 reflectors:
21115
21116 If m >= n,
21117
21118 Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
21119
21120 Each H(i) and G(i) has the form:
21121
21122 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
21123
21124 where tauq and taup are real scalars, and v and u are real vectors;
21125 v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
21126 u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
21127 tauq is stored in TAUQ(i) and taup in TAUP(i).
21128
21129 If m < n,
21130
21131 Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
21132
21133 Each H(i) and G(i) has the form:
21134
21135 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
21136
21137 where tauq and taup are real scalars, and v and u are real vectors;
21138 v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
21139 u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
21140 tauq is stored in TAUQ(i) and taup in TAUP(i).
21141
21142 The contents of A on exit are illustrated by the following examples:
21143
21144 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
21145
21146 ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
21147 ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
21148 ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
21149 ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
21150 ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
21151 ( v1 v2 v3 v4 v5 )
21152
21153 where d and e denote diagonal and off-diagonal elements of B, vi
21154 denotes an element of the vector defining H(i), and ui an element of
21155 the vector defining G(i).
21156
21157 =====================================================================
21158
21159
21160 Test the input parameters
21161
21162 Parameter adjustments */
21163 /* Table of constant values */
21164 static integer c__1 = 1;
21165 static integer c_n1 = -1;
21166 static integer c__3 = 3;
21167 static integer c__2 = 2;
21168 static real c_b21 = -1.f;
21169 static real c_b22 = 1.f;
21170
21171 /* System generated locals */
21172 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
21173 /* Local variables */
21174 static integer i__, j, nbmin, iinfo;
21175 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
21176 integer *, real *, real *, integer *, real *, integer *, real *,
21177 real *, integer *);
21178 static integer minmn;
21179 extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer
21180 *, real *, real *, real *, real *, real *, integer *);
21181 static integer nb, nx;
21182 extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real
21183 *, integer *, real *, real *, real *, real *, real *, integer *,
21184 real *, integer *);
21185 static real ws;
21186 extern /* Subroutine */ int xerbla_(const char *, integer *);
21187 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
21188 integer *, integer *, ftnlen, ftnlen);
21189 static integer ldwrkx, ldwrky, lwkopt;
21190 static logical lquery;
21191 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
21192
21193
21194 a_dim1 = *lda;
21195 a_offset = 1 + a_dim1 * 1;
21196 a -= a_offset;
21197 --d__;
21198 --e;
21199 --tauq;
21200 --taup;
21201 --work;
21202
21203 /* Function Body */
21204 *info = 0;
21205 /* Computing MAX */
21206 i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
21207 ftnlen)6, (ftnlen)1);
21208 nb = f2cmax(i__1,i__2);
21209 lwkopt = (*m + *n) * nb;
21210 work[1] = (real) lwkopt;
21211 lquery = *lwork == -1;
21212 if (*m < 0) {
21213 *info = -1;
21214 } else if (*n < 0) {
21215 *info = -2;
21216 } else if (*lda < f2cmax(1,*m)) {
21217 *info = -4;
21218 } else /* if(complicated condition) */ {
21219 /* Computing MAX */
21220 i__1 = f2cmax(1,*m);
21221 if (*lwork < f2cmax(i__1,*n) && ! lquery) {
21222 *info = -10;
21223 }
21224 }
21225 if (*info < 0) {
21226 i__1 = -(*info);
21227 xerbla_("SGEBRD", &i__1);
21228 return 0;
21229 } else if (lquery) {
21230 return 0;
21231 }
21232
21233 /* Quick return if possible */
21234
21235 minmn = f2cmin(*m,*n);
21236 if (minmn == 0) {
21237 work[1] = 1.f;
21238 return 0;
21239 }
21240
21241 ws = (real) f2cmax(*m,*n);
21242 ldwrkx = *m;
21243 ldwrky = *n;
21244
21245 if (nb > 1 && nb < minmn) {
21246
21247 /* Set the crossover point NX.
21248
21249 Computing MAX */
21250 i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
21251 ftnlen)6, (ftnlen)1);
21252 nx = f2cmax(i__1,i__2);
21253
21254 /* Determine when to switch from blocked to unblocked code. */
21255
21256 if (nx < minmn) {
21257 ws = (real) ((*m + *n) * nb);
21258 if ((real) (*lwork) < ws) {
21259
21260 /* Not enough work space for the optimal NB, consider using
21261 a smaller block size. */
21262
21263 nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
21264 ftnlen)6, (ftnlen)1);
21265 if (*lwork >= (*m + *n) * nbmin) {
21266 nb = *lwork / (*m + *n);
21267 } else {
21268 nb = 1;
21269 nx = minmn;
21270 }
21271 }
21272 }
21273 } else {
21274 nx = minmn;
21275 }
21276
21277 i__1 = minmn - nx;
21278 i__2 = nb;
21279 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
21280
21281 /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
21282 the matrices X and Y which are needed to update the unreduced
21283 part of the matrix */
21284
21285 i__3 = *m - i__ + 1;
21286 i__4 = *n - i__ + 1;
21287 slabrd_(&i__3, &i__4, &nb, &a_ref(i__, i__), lda, &d__[i__], &e[i__],
21288 &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb
21289 + 1], &ldwrky);
21290
21291 /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
21292 of the form A := A - V*Y' - X*U' */
21293
21294 i__3 = *m - i__ - nb + 1;
21295 i__4 = *n - i__ - nb + 1;
21296 sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a_ref(
21297 i__ + nb, i__), lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &
21298 c_b22, &a_ref(i__ + nb, i__ + nb), lda)
21299 ;
21300 i__3 = *m - i__ - nb + 1;
21301 i__4 = *n - i__ - nb + 1;
21302 sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
21303 work[nb + 1], &ldwrkx, &a_ref(i__, i__ + nb), lda, &c_b22, &
21304 a_ref(i__ + nb, i__ + nb), lda);
21305
21306 /* Copy diagonal and off-diagonal elements of B back into A */
21307
21308 if (*m >= *n) {
21309 i__3 = i__ + nb - 1;
21310 for (j = i__; j <= i__3; ++j) {
21311 a_ref(j, j) = d__[j];
21312 a_ref(j, j + 1) = e[j];
21313 /* L10: */
21314 }
21315 } else {
21316 i__3 = i__ + nb - 1;
21317 for (j = i__; j <= i__3; ++j) {
21318 a_ref(j, j) = d__[j];
21319 a_ref(j + 1, j) = e[j];
21320 /* L20: */
21321 }
21322 }
21323 /* L30: */
21324 }
21325
21326 /* Use unblocked code to reduce the remainder of the matrix */
21327
21328 i__2 = *m - i__ + 1;
21329 i__1 = *n - i__ + 1;
21330 sgebd2_(&i__2, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tauq[
21331 i__], &taup[i__], &work[1], &iinfo);
21332 work[1] = ws;
21333 return 0;
21334
21335 /* End of SGEBRD */
21336
21337 } /* sgebrd_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 22808 of file lapackblas.cpp. References a_ref, f2cmax, f2cmin, integer, real, slarf_(), slarfg_(), and xerbla_(). Referenced by sgelqf_(). 22810 {
22811 /* -- LAPACK routine (version 3.0) --
22812 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22813 Courant Institute, Argonne National Lab, and Rice University
22814 February 29, 1992
22815
22816
22817 Purpose
22818 =======
22819
22820 SGELQ2 computes an LQ factorization of a real m by n matrix A:
22821 A = L * Q.
22822
22823 Arguments
22824 =========
22825
22826 M (input) INTEGER
22827 The number of rows of the matrix A. M >= 0.
22828
22829 N (input) INTEGER
22830 The number of columns of the matrix A. N >= 0.
22831
22832 A (input/output) REAL array, dimension (LDA,N)
22833 On entry, the m by n matrix A.
22834 On exit, the elements on and below the diagonal of the array
22835 contain the m by min(m,n) lower trapezoidal matrix L (L is
22836 lower triangular if m <= n); the elements above the diagonal,
22837 with the array TAU, represent the orthogonal matrix Q as a
22838 product of elementary reflectors (see Further Details).
22839
22840 LDA (input) INTEGER
22841 The leading dimension of the array A. LDA >= max(1,M).
22842
22843 TAU (output) REAL array, dimension (min(M,N))
22844 The scalar factors of the elementary reflectors (see Further
22845 Details).
22846
22847 WORK (workspace) REAL array, dimension (M)
22848
22849 INFO (output) INTEGER
22850 = 0: successful exit
22851 < 0: if INFO = -i, the i-th argument had an illegal value
22852
22853 Further Details
22854 ===============
22855
22856 The matrix Q is represented as a product of elementary reflectors
22857
22858 Q = H(k) . . . H(2) H(1), where k = min(m,n).
22859
22860 Each H(i) has the form
22861
22862 H(i) = I - tau * v * v'
22863
22864 where tau is a real scalar, and v is a real vector with
22865 v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
22866 and tau in TAU(i).
22867
22868 =====================================================================
22869
22870
22871 Test the input arguments
22872
22873 Parameter adjustments */
22874 /* System generated locals */
22875 integer a_dim1, a_offset, i__1, i__2, i__3;
22876 /* Local variables */
22877 static integer i__, k;
22878 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *,
22879 integer *, real *, real *, integer *, real *), xerbla_(
22880 const char *, integer *), slarfg_(integer *, real *, real *,
22881 integer *, real *);
22882 static real aii;
22883 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
22884
22885 a_dim1 = *lda;
22886 a_offset = 1 + a_dim1 * 1;
22887 a -= a_offset;
22888 --tau;
22889 --work;
22890
22891 /* Function Body */
22892 *info = 0;
22893 if (*m < 0) {
22894 *info = -1;
22895 } else if (*n < 0) {
22896 *info = -2;
22897 } else if (*lda < f2cmax(1,*m)) {
22898 *info = -4;
22899 }
22900 if (*info != 0) {
22901 i__1 = -(*info);
22902 xerbla_("SGELQ2", &i__1);
22903 return 0;
22904 }
22905
22906 k = f2cmin(*m,*n);
22907
22908 i__1 = k;
22909 for (i__ = 1; i__ <= i__1; ++i__) {
22910
22911 /* Generate elementary reflector H(i) to annihilate A(i,i+1:n)
22912
22913 Computing MIN */
22914 i__2 = i__ + 1;
22915 i__3 = *n - i__ + 1;
22916 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &tau[
22917 i__]);
22918 if (i__ < *m) {
22919
22920 /* Apply H(i) to A(i+1:m,i:n) from the right */
22921
22922 aii = a_ref(i__, i__);
22923 a_ref(i__, i__) = 1.f;
22924 i__2 = *m - i__;
22925 i__3 = *n - i__ + 1;
22926 slarf_("Right", &i__2, &i__3, &a_ref(i__, i__), lda, &tau[i__], &
22927 a_ref(i__ + 1, i__), lda, &work[1]);
22928 a_ref(i__, i__) = aii;
22929 }
22930 /* L10: */
22931 }
22932 return 0;
22933
22934 /* End of SGELQ2 */
22935
22936 } /* sgelq2_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 21958 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, sgelq2_(), slarfb_(), slarft_(), and xerbla_(). Referenced by sgesvd_(). 21960 {
21961 /* -- LAPACK routine (version 3.0) --
21962 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21963 Courant Institute, Argonne National Lab, and Rice University
21964 June 30, 1999
21965
21966
21967 Purpose
21968 =======
21969
21970 SGELQF computes an LQ factorization of a real M-by-N matrix A:
21971 A = L * Q.
21972
21973 Arguments
21974 =========
21975
21976 M (input) INTEGER
21977 The number of rows of the matrix A. M >= 0.
21978
21979 N (input) INTEGER
21980 The number of columns of the matrix A. N >= 0.
21981
21982 A (input/output) REAL array, dimension (LDA,N)
21983 On entry, the M-by-N matrix A.
21984 On exit, the elements on and below the diagonal of the array
21985 contain the m-by-min(m,n) lower trapezoidal matrix L (L is
21986 lower triangular if m <= n); the elements above the diagonal,
21987 with the array TAU, represent the orthogonal matrix Q as a
21988 product of elementary reflectors (see Further Details).
21989
21990 LDA (input) INTEGER
21991 The leading dimension of the array A. LDA >= max(1,M).
21992
21993 TAU (output) REAL array, dimension (min(M,N))
21994 The scalar factors of the elementary reflectors (see Further
21995 Details).
21996
21997 WORK (workspace/output) REAL array, dimension (LWORK)
21998 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
21999
22000 LWORK (input) INTEGER
22001 The dimension of the array WORK. LWORK >= max(1,M).
22002 For optimum performance LWORK >= M*NB, where NB is the
22003 optimal blocksize.
22004
22005 If LWORK = -1, then a workspace query is assumed; the routine
22006 only calculates the optimal size of the WORK array, returns
22007 this value as the first entry of the WORK array, and no error
22008 message related to LWORK is issued by XERBLA.
22009
22010 INFO (output) INTEGER
22011 = 0: successful exit
22012 < 0: if INFO = -i, the i-th argument had an illegal value
22013
22014 Further Details
22015 ===============
22016
22017 The matrix Q is represented as a product of elementary reflectors
22018
22019 Q = H(k) . . . H(2) H(1), where k = min(m,n).
22020
22021 Each H(i) has the form
22022
22023 H(i) = I - tau * v * v'
22024
22025 where tau is a real scalar, and v is a real vector with
22026 v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
22027 and tau in TAU(i).
22028
22029 =====================================================================
22030
22031
22032 Test the input arguments
22033
22034 Parameter adjustments */
22035 /* Table of constant values */
22036 static integer c__1 = 1;
22037 static integer c_n1 = -1;
22038 static integer c__3 = 3;
22039 static integer c__2 = 2;
22040
22041 /* System generated locals */
22042 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
22043 /* Local variables */
22044 static integer i__, k, nbmin, iinfo;
22045 extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer
22046 *, real *, real *, integer *);
22047 static integer ib, nb, nx;
22048 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
22049 integer *, integer *, integer *, real *, integer *, real *,
22050 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
22051 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
22052 integer *, integer *, ftnlen, ftnlen);
22053 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
22054 real *, integer *, real *, real *, integer *);
22055 static integer ldwork, lwkopt;
22056 static logical lquery;
22057 static integer iws;
22058 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
22059
22060
22061 a_dim1 = *lda;
22062 a_offset = 1 + a_dim1 * 1;
22063 a -= a_offset;
22064 --tau;
22065 --work;
22066
22067 /* Function Body */
22068 *info = 0;
22069 nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
22070 1);
22071 lwkopt = *m * nb;
22072 work[1] = (real) lwkopt;
22073 lquery = *lwork == -1;
22074 if (*m < 0) {
22075 *info = -1;
22076 } else if (*n < 0) {
22077 *info = -2;
22078 } else if (*lda < f2cmax(1,*m)) {
22079 *info = -4;
22080 } else if (*lwork < f2cmax(1,*m) && ! lquery) {
22081 *info = -7;
22082 }
22083 if (*info != 0) {
22084 i__1 = -(*info);
22085 xerbla_("SGELQF", &i__1);
22086 return 0;
22087 } else if (lquery) {
22088 return 0;
22089 }
22090
22091 /* Quick return if possible */
22092
22093 k = f2cmin(*m,*n);
22094 if (k == 0) {
22095 work[1] = 1.f;
22096 return 0;
22097 }
22098
22099 nbmin = 2;
22100 nx = 0;
22101 iws = *m;
22102 if (nb > 1 && nb < k) {
22103
22104 /* Determine when to cross over from blocked to unblocked code.
22105
22106 Computing MAX */
22107 i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
22108 ftnlen)6, (ftnlen)1);
22109 nx = f2cmax(i__1,i__2);
22110 if (nx < k) {
22111
22112 /* Determine if workspace is large enough for blocked code. */
22113
22114 ldwork = *m;
22115 iws = ldwork * nb;
22116 if (*lwork < iws) {
22117
22118 /* Not enough workspace to use optimal NB: reduce NB and
22119 determine the minimum value of NB. */
22120
22121 nb = *lwork / ldwork;
22122 /* Computing MAX */
22123 i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
22124 c_n1, (ftnlen)6, (ftnlen)1);
22125 nbmin = f2cmax(i__1,i__2);
22126 }
22127 }
22128 }
22129
22130 if (nb >= nbmin && nb < k && nx < k) {
22131
22132 /* Use blocked code initially */
22133
22134 i__1 = k - nx;
22135 i__2 = nb;
22136 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
22137 /* Computing MIN */
22138 i__3 = k - i__ + 1;
22139 ib = f2cmin(i__3,nb);
22140
22141 /* Compute the LQ factorization of the current block
22142 A(i:i+ib-1,i:n) */
22143
22144 i__3 = *n - i__ + 1;
22145 sgelq2_(&ib, &i__3, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
22146 iinfo);
22147 if (i__ + ib <= *m) {
22148
22149 /* Form the triangular factor of the block reflector
22150 H = H(i) H(i+1) . . . H(i+ib-1) */
22151
22152 i__3 = *n - i__ + 1;
22153 slarft_("Forward", "Rowwise", &i__3, &ib, &a_ref(i__, i__),
22154 lda, &tau[i__], &work[1], &ldwork);
22155
22156 /* Apply H to A(i+ib:m,i:n) from the right */
22157
22158 i__3 = *m - i__ - ib + 1;
22159 i__4 = *n - i__ + 1;
22160 slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
22161 &i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork,
22162 &a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork);
22163 }
22164 /* L10: */
22165 }
22166 } else {
22167 i__ = 1;
22168 }
22169
22170 /* Use unblocked code to factor the last or only block. */
22171
22172 if (i__ <= k) {
22173 i__2 = *m - i__ + 1;
22174 i__1 = *n - i__ + 1;
22175 sgelq2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
22176 iinfo);
22177 }
22178
22179 work[1] = (real) iws;
22180 return 0;
22181
22182 /* End of SGELQF */
22183
22184 } /* sgelqf_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 1312 of file lapackblas.cpp. References a_ref, b, b_ref, c___ref, f2cmax, integer, logical, lsame_(), real, and xerbla_(). Referenced by sgebrd_(), sgesvd_(), slaed0_(), slaed3_(), slaed7_(), slarfb_(), and sstedc_(). 01315 {
01316 /* System generated locals */
01317 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
01318 i__3;
01319 /* Local variables */
01320 static integer info;
01321 static logical nota, notb;
01322 static real temp;
01323 static integer i__, j, l, ncola;
01324 extern logical lsame_(const char *, const char *);
01325 static integer nrowa, nrowb;
01326 extern /* Subroutine */ int xerbla_(const char *, integer *);
01327 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
01328 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
01329 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
01330 /* Purpose
01331 =======
01332 SGEMM performs one of the matrix-matrix operations
01333 C := alpha*op( A )*op( B ) + beta*C,
01334 where op( X ) is one of
01335 op( X ) = X or op( X ) = X',
01336 alpha and beta are scalars, and A, B and C are matrices, with op( A )
01337 an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
01338 Parameters
01339 ==========
01340 TRANSA - CHARACTER*1.
01341 On entry, TRANSA specifies the form of op( A ) to be used in
01342 the matrix multiplication as follows:
01343 TRANSA = 'N' or 'n', op( A ) = A.
01344 TRANSA = 'T' or 't', op( A ) = A'.
01345 TRANSA = 'C' or 'c', op( A ) = A'.
01346 Unchanged on exit.
01347 TRANSB - CHARACTER*1.
01348 On entry, TRANSB specifies the form of op( B ) to be used in
01349 the matrix multiplication as follows:
01350 TRANSB = 'N' or 'n', op( B ) = B.
01351 TRANSB = 'T' or 't', op( B ) = B'.
01352 TRANSB = 'C' or 'c', op( B ) = B'.
01353 Unchanged on exit.
01354 M - INTEGER.
01355 On entry, M specifies the number of rows of the matrix
01356 op( A ) and of the matrix C. M must be at least zero.
01357 Unchanged on exit.
01358 N - INTEGER.
01359 On entry, N specifies the number of columns of the matrix
01360 op( B ) and the number of columns of the matrix C. N must be
01361 at least zero.
01362 Unchanged on exit.
01363 K - INTEGER.
01364 On entry, K specifies the number of columns of the matrix
01365 op( A ) and the number of rows of the matrix op( B ). K must
01366 be at least zero.
01367 Unchanged on exit.
01368 ALPHA - REAL .
01369 On entry, ALPHA specifies the scalar alpha.
01370 Unchanged on exit.
01371 A - REAL array of DIMENSION ( LDA, ka ), where ka is
01372 k when TRANSA = 'N' or 'n', and is m otherwise.
01373 Before entry with TRANSA = 'N' or 'n', the leading m by k
01374 part of the array A must contain the matrix A, otherwise
01375 the leading k by m part of the array A must contain the
01376 matrix A.
01377 Unchanged on exit.
01378 LDA - INTEGER.
01379 On entry, LDA specifies the first dimension of A as declared
01380 in the calling (sub) program. When TRANSA = 'N' or 'n' then
01381 LDA must be at least f2cmax( 1, m ), otherwise LDA must be at
01382 least f2cmax( 1, k ).
01383 Unchanged on exit.
01384 B - REAL array of DIMENSION ( LDB, kb ), where kb is
01385 n when TRANSB = 'N' or 'n', and is k otherwise.
01386 Before entry with TRANSB = 'N' or 'n', the leading k by n
01387 part of the array B must contain the matrix B, otherwise
01388 the leading n by k part of the array B must contain the
01389 matrix B.
01390 Unchanged on exit.
01391 LDB - INTEGER.
01392 On entry, LDB specifies the first dimension of B as declared
01393 in the calling (sub) program. When TRANSB = 'N' or 'n' then
01394 LDB must be at least f2cmax( 1, k ), otherwise LDB must be at
01395 least f2cmax( 1, n ).
01396 Unchanged on exit.
01397 BETA - REAL .
01398 On entry, BETA specifies the scalar beta. When BETA is
01399 supplied as zero then C need not be set on input.
01400 Unchanged on exit.
01401 C - REAL array of DIMENSION ( LDC, n ).
01402 Before entry, the leading m by n part of the array C must
01403 contain the matrix C, except when beta is zero, in which
01404 case C need not be set on entry.
01405 On exit, the array C is overwritten by the m by n matrix
01406 ( alpha*op( A )*op( B ) + beta*C ).
01407 LDC - INTEGER.
01408 On entry, LDC specifies the first dimension of C as declared
01409 in the calling (sub) program. LDC must be at least
01410 f2cmax( 1, m ).
01411 Unchanged on exit.
01412 Level 3 Blas routine.
01413 -- Written on 8-February-1989.
01414 Jack Dongarra, Argonne National Laboratory.
01415 Iain Duff, AERE Harwell.
01416 Jeremy Du Croz, Numerical Algorithms Group Ltd.
01417 Sven Hammarling, Numerical Algorithms Group Ltd.
01418 Set NOTA and NOTB as true if A and B respectively are not
01419 transposed and set NROWA, NCOLA and NROWB as the number of rows
01420 and columns of A and the number of rows of B respectively.
01421 Parameter adjustments */
01422 a_dim1 = *lda;
01423 a_offset = 1 + a_dim1 * 1;
01424 a -= a_offset;
01425 b_dim1 = *ldb;
01426 b_offset = 1 + b_dim1 * 1;
01427 b -= b_offset;
01428 c_dim1 = *ldc;
01429 c_offset = 1 + c_dim1 * 1;
01430 c__ -= c_offset;
01431 /* Function Body */
01432 nota = lsame_(transa, "N");
01433 notb = lsame_(transb, "N");
01434 if (nota) {
01435 nrowa = *m;
01436 ncola = *k;
01437 } else {
01438 nrowa = *k;
01439 ncola = *m;
01440 }
01441 if (notb) {
01442 nrowb = *k;
01443 } else {
01444 nrowb = *n;
01445 }
01446 /* Test the input parameters. */
01447 info = 0;
01448 if (! nota && ! lsame_(transa, "C") && ! lsame_(
01449 transa, "T")) {
01450 info = 1;
01451 } else if (! notb && ! lsame_(transb, "C") && !
01452 lsame_(transb, "T")) {
01453 info = 2;
01454 } else if (*m < 0) {
01455 info = 3;
01456 } else if (*n < 0) {
01457 info = 4;
01458 } else if (*k < 0) {
01459 info = 5;
01460 } else if (*lda < f2cmax(1,nrowa)) {
01461 info = 8;
01462 } else if (*ldb < f2cmax(1,nrowb)) {
01463 info = 10;
01464 } else if (*ldc < f2cmax(1,*m)) {
01465 info = 13;
01466 }
01467 if (info != 0) {
01468 xerbla_("SGEMM ", &info);
01469 return 0;
01470 }
01471 /* Quick return if possible. */
01472 if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
01473 return 0;
01474 }
01475 /* And if alpha.eq.zero. */
01476 if (*alpha == 0.f) {
01477 if (*beta == 0.f) {
01478 i__1 = *n;
01479 for (j = 1; j <= i__1; ++j) {
01480 i__2 = *m;
01481 for (i__ = 1; i__ <= i__2; ++i__) {
01482 c___ref(i__, j) = 0.f;
01483 /* L10: */
01484 }
01485 /* L20: */
01486 }
01487 } else {
01488 i__1 = *n;
01489 for (j = 1; j <= i__1; ++j) {
01490 i__2 = *m;
01491 for (i__ = 1; i__ <= i__2; ++i__) {
01492 c___ref(i__, j) = *beta * c___ref(i__, j);
01493 /* L30: */
01494 }
01495 /* L40: */
01496 }
01497 }
01498 return 0;
01499 }
01500 /* Start the operations. */
01501 if (notb) {
01502 if (nota) {
01503 /* Form C := alpha*A*B + beta*C. */
01504 i__1 = *n;
01505 for (j = 1; j <= i__1; ++j) {
01506 if (*beta == 0.f) {
01507 i__2 = *m;
01508 for (i__ = 1; i__ <= i__2; ++i__) {
01509 c___ref(i__, j) = 0.f;
01510 /* L50: */
01511 }
01512 } else if (*beta != 1.f) {
01513 i__2 = *m;
01514 for (i__ = 1; i__ <= i__2; ++i__) {
01515 c___ref(i__, j) = *beta * c___ref(i__, j);
01516 /* L60: */
01517 }
01518 }
01519 i__2 = *k;
01520 for (l = 1; l <= i__2; ++l) {
01521 if (b_ref(l, j) != 0.f) {
01522 temp = *alpha * b_ref(l, j);
01523 i__3 = *m;
01524 for (i__ = 1; i__ <= i__3; ++i__) {
01525 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
01526 i__, l);
01527 /* L70: */
01528 }
01529 }
01530 /* L80: */
01531 }
01532 /* L90: */
01533 }
01534 } else {
01535 /* Form C := alpha*A'*B + beta*C */
01536 i__1 = *n;
01537 for (j = 1; j <= i__1; ++j) {
01538 i__2 = *m;
01539 for (i__ = 1; i__ <= i__2; ++i__) {
01540 temp = 0.f;
01541 i__3 = *k;
01542 for (l = 1; l <= i__3; ++l) {
01543 temp += a_ref(l, i__) * b_ref(l, j);
01544 /* L100: */
01545 }
01546 if (*beta == 0.f) {
01547 c___ref(i__, j) = *alpha * temp;
01548 } else {
01549 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
01550 j);
01551 }
01552 /* L110: */
01553 }
01554 /* L120: */
01555 }
01556 }
01557 } else {
01558 if (nota) {
01559 /* Form C := alpha*A*B' + beta*C */
01560 i__1 = *n;
01561 for (j = 1; j <= i__1; ++j) {
01562 if (*beta == 0.f) {
01563 i__2 = *m;
01564 for (i__ = 1; i__ <= i__2; ++i__) {
01565 c___ref(i__, j) = 0.f;
01566 /* L130: */
01567 }
01568 } else if (*beta != 1.f) {
01569 i__2 = *m;
01570 for (i__ = 1; i__ <= i__2; ++i__) {
01571 c___ref(i__, j) = *beta * c___ref(i__, j);
01572 /* L140: */
01573 }
01574 }
01575 i__2 = *k;
01576 for (l = 1; l <= i__2; ++l) {
01577 if (b_ref(j, l) != 0.f) {
01578 temp = *alpha * b_ref(j, l);
01579 i__3 = *m;
01580 for (i__ = 1; i__ <= i__3; ++i__) {
01581 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
01582 i__, l);
01583 /* L150: */
01584 }
01585 }
01586 /* L160: */
01587 }
01588 /* L170: */
01589 }
01590 } else {
01591 /* Form C := alpha*A'*B' + beta*C */
01592 i__1 = *n;
01593 for (j = 1; j <= i__1; ++j) {
01594 i__2 = *m;
01595 for (i__ = 1; i__ <= i__2; ++i__) {
01596 temp = 0.f;
01597 i__3 = *k;
01598 for (l = 1; l <= i__3; ++l) {
01599 temp += a_ref(l, i__) * b_ref(j, l);
01600 /* L180: */
01601 }
01602 if (*beta == 0.f) {
01603 c___ref(i__, j) = *alpha * temp;
01604 } else {
01605 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
01606 j);
01607 }
01608 /* L190: */
01609 }
01610 /* L200: */
01611 }
01612 }
01613 }
01614 return 0;
01615 /* End of SGEMM . */
01616 } /* sgemm_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 1624 of file lapackblas.cpp. References a_ref, f2cmax, integer, logical, lsame_(), real, x, xerbla_(), and y. Referenced by EMAN::PCAlarge::analyze(), EMAN::PCA::dopca_lan(), EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos(), slabrd_(), slaeda_(), slarf_(), slarft_(), and slatrd_(). 01627 {
01628 /* System generated locals */
01629 integer a_dim1, a_offset, i__1, i__2;
01630 /* Local variables */
01631 static integer info;
01632 static real temp;
01633 static integer lenx, leny, i__, j;
01634 extern logical lsame_(const char *, const char *);
01635 static integer ix, iy, jx, jy, kx, ky;
01636 extern /* Subroutine */ int xerbla_(const char *, integer *);
01637 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
01638 /* Purpose
01639 =======
01640 SGEMV performs one of the matrix-vector operations
01641 y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
01642 where alpha and beta are scalars, x and y are vectors and A is an
01643 m by n matrix.
01644 Parameters
01645 ==========
01646 TRANS - CHARACTER*1.
01647 On entry, TRANS specifies the operation to be performed as
01648 follows:
01649 TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
01650 TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
01651 TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
01652 Unchanged on exit.
01653 M - INTEGER.
01654 On entry, M specifies the number of rows of the matrix A.
01655 M must be at least zero.
01656 Unchanged on exit.
01657 N - INTEGER.
01658 On entry, N specifies the number of columns of the matrix A.
01659 N must be at least zero.
01660 Unchanged on exit.
01661 ALPHA - REAL .
01662 On entry, ALPHA specifies the scalar alpha.
01663 Unchanged on exit.
01664 A - REAL array of DIMENSION ( LDA, n ).
01665 Before entry, the leading m by n part of the array A must
01666 contain the matrix of coefficients.
01667 Unchanged on exit.
01668 LDA - INTEGER.
01669 On entry, LDA specifies the first dimension of A as declared
01670 in the calling (sub) program. LDA must be at least
01671 f2cmax( 1, m ).
01672 Unchanged on exit.
01673 X - REAL array of DIMENSION at least
01674 ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
01675 and at least
01676 ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
01677 Before entry, the incremented array X must contain the
01678 vector x.
01679 Unchanged on exit.
01680 INCX - INTEGER.
01681 On entry, INCX specifies the increment for the elements of
01682 X. INCX must not be zero.
01683 Unchanged on exit.
01684 BETA - REAL .
01685 On entry, BETA specifies the scalar beta. When BETA is
01686 supplied as zero then Y need not be set on input.
01687 Unchanged on exit.
01688 Y - REAL array of DIMENSION at least
01689 ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
01690 and at least
01691 ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
01692 Before entry with BETA non-zero, the incremented array Y
01693 must contain the vector y. On exit, Y is overwritten by the
01694 updated vector y.
01695 INCY - INTEGER.
01696 On entry, INCY specifies the increment for the elements of
01697 Y. INCY must not be zero.
01698 Unchanged on exit.
01699 Level 2 Blas routine.
01700 -- Written on 22-October-1986.
01701 Jack Dongarra, Argonne National Lab.
01702 Jeremy Du Croz, Nag Central Office.
01703 Sven Hammarling, Nag Central Office.
01704 Richard Hanson, Sandia National Labs.
01705 Test the input parameters.
01706 Parameter adjustments */
01707 a_dim1 = *lda;
01708 a_offset = 1 + a_dim1 * 1;
01709 a -= a_offset;
01710 --x;
01711 --y;
01712 /* Function Body */
01713 info = 0;
01714 if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
01715 ) {
01716 info = 1;
01717 } else if (*m < 0) {
01718 info = 2;
01719 } else if (*n < 0) {
01720 info = 3;
01721 } else if (*lda < f2cmax(1,*m)) {
01722 info = 6;
01723 } else if (*incx == 0) {
01724 info = 8;
01725 } else if (*incy == 0) {
01726 info = 11;
01727 }
01728 if (info != 0) {
01729 xerbla_("SGEMV ", &info);
01730 return 0;
01731 }
01732 /* Quick return if possible. */
01733 if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
01734 return 0;
01735 }
01736 /* Set LENX and LENY, the lengths of the vectors x and y, and set
01737 up the start points in X and Y. */
01738 if (lsame_(trans, "N")) {
01739 lenx = *n;
01740 leny = *m;
01741 } else {
01742 lenx = *m;
01743 leny = *n;
01744 }
01745 if (*incx > 0) {
01746 kx = 1;
01747 } else {
01748 kx = 1 - (lenx - 1) * *incx;
01749 }
01750 if (*incy > 0) {
01751 ky = 1;
01752 } else {
01753 ky = 1 - (leny - 1) * *incy;
01754 }
01755 /* Start the operations. In this version the elements of A are
01756 accessed sequentially with one pass through A.
01757 First form y := beta*y. */
01758 if (*beta != 1.f) {
01759 if (*incy == 1) {
01760 if (*beta == 0.f) {
01761 i__1 = leny;
01762 for (i__ = 1; i__ <= i__1; ++i__) {
01763 y[i__] = 0.f;
01764 /* L10: */
01765 }
01766 } else {
01767 i__1 = leny;
01768 for (i__ = 1; i__ <= i__1; ++i__) {
01769 y[i__] = *beta * y[i__];
01770 /* L20: */
01771 }
01772 }
01773 } else {
01774 iy = ky;
01775 if (*beta == 0.f) {
01776 i__1 = leny;
01777 for (i__ = 1; i__ <= i__1; ++i__) {
01778 y[iy] = 0.f;
01779 iy += *incy;
01780 /* L30: */
01781 }
01782 } else {
01783 i__1 = leny;
01784 for (i__ = 1; i__ <= i__1; ++i__) {
01785 y[iy] = *beta * y[iy];
01786 iy += *incy;
01787 /* L40: */
01788 }
01789 }
01790 }
01791 }
01792 if (*alpha == 0.f) {
01793 return 0;
01794 }
01795 if (lsame_(trans, "N")) {
01796 /* Form y := alpha*A*x + y. */
01797 jx = kx;
01798 if (*incy == 1) {
01799 i__1 = *n;
01800 for (j = 1; j <= i__1; ++j) {
01801 if (x[jx] != 0.f) {
01802 temp = *alpha * x[jx];
01803 i__2 = *m;
01804 for (i__ = 1; i__ <= i__2; ++i__) {
01805 y[i__] += temp * a_ref(i__, j);
01806 /* L50: */
01807 }
01808 }
01809 jx += *incx;
01810 /* L60: */
01811 }
01812 } else {
01813 i__1 = *n;
01814 for (j = 1; j <= i__1; ++j) {
01815 if (x[jx] != 0.f) {
01816 temp = *alpha * x[jx];
01817 iy = ky;
01818 i__2 = *m;
01819 for (i__ = 1; i__ <= i__2; ++i__) {
01820 y[iy] += temp * a_ref(i__, j);
01821 iy += *incy;
01822 /* L70: */
01823 }
01824 }
01825 jx += *incx;
01826 /* L80: */
01827 }
01828 }
01829 } else {
01830 /* Form y := alpha*A'*x + y. */
01831 jy = ky;
01832 if (*incx == 1) {
01833 i__1 = *n;
01834 for (j = 1; j <= i__1; ++j) {
01835 temp = 0.f;
01836 i__2 = *m;
01837 for (i__ = 1; i__ <= i__2; ++i__) {
01838 temp += a_ref(i__, j) * x[i__];
01839 /* L90: */
01840 }
01841 y[jy] += *alpha * temp;
01842 jy += *incy;
01843 /* L100: */
01844 }
01845 } else {
01846 i__1 = *n;
01847 for (j = 1; j <= i__1; ++j) {
01848 temp = 0.f;
01849 ix = kx;
01850 i__2 = *m;
01851 for (i__ = 1; i__ <= i__2; ++i__) {
01852 temp += a_ref(i__, j) * x[ix];
01853 ix += *incx;
01854 /* L110: */
01855 }
01856 y[jy] += *alpha * temp;
01857 jy += *incy;
01858 /* L120: */
01859 }
01860 }
01861 }
01862 return 0;
01863 /* End of SGEMV . */
01864 } /* sgemv_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 24684 of file lapackblas.cpp. References a_ref, c__1, f2cmax, f2cmin, integer, real, slarf_(), slarfg_(), and xerbla_(). Referenced by sgeqrf_(). 24686 {
24687 /* -- LAPACK routine (version 3.0) --
24688 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
24689 Courant Institute, Argonne National Lab, and Rice University
24690 February 29, 1992
24691
24692
24693 Purpose
24694 =======
24695
24696 SGEQR2 computes a QR factorization of a real m by n matrix A:
24697 A = Q * R.
24698
24699 Arguments
24700 =========
24701
24702 M (input) INTEGER
24703 The number of rows of the matrix A. M >= 0.
24704
24705 N (input) INTEGER
24706 The number of columns of the matrix A. N >= 0.
24707
24708 A (input/output) REAL array, dimension (LDA,N)
24709 On entry, the m by n matrix A.
24710 On exit, the elements on and above the diagonal of the array
24711 contain the min(m,n) by n upper trapezoidal matrix R (R is
24712 upper triangular if m >= n); the elements below the diagonal,
24713 with the array TAU, represent the orthogonal matrix Q as a
24714 product of elementary reflectors (see Further Details).
24715
24716 LDA (input) INTEGER
24717 The leading dimension of the array A. LDA >= max(1,M).
24718
24719 TAU (output) REAL array, dimension (min(M,N))
24720 The scalar factors of the elementary reflectors (see Further
24721 Details).
24722
24723 WORK (workspace) REAL array, dimension (N)
24724
24725 INFO (output) INTEGER
24726 = 0: successful exit
24727 < 0: if INFO = -i, the i-th argument had an illegal value
24728
24729 Further Details
24730 ===============
24731
24732 The matrix Q is represented as a product of elementary reflectors
24733
24734 Q = H(1) H(2) . . . H(k), where k = min(m,n).
24735
24736 Each H(i) has the form
24737
24738 H(i) = I - tau * v * v'
24739
24740 where tau is a real scalar, and v is a real vector with
24741 v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
24742 and tau in TAU(i).
24743
24744 =====================================================================
24745
24746
24747 Test the input arguments
24748
24749 Parameter adjustments */
24750 /* Table of constant values */
24751 static integer c__1 = 1;
24752
24753 /* System generated locals */
24754 integer a_dim1, a_offset, i__1, i__2, i__3;
24755 /* Local variables */
24756 static integer i__, k;
24757 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *,
24758 integer *, real *, real *, integer *, real *), xerbla_(
24759 const char *, integer *), slarfg_(integer *, real *, real *,
24760 integer *, real *);
24761 static real aii;
24762 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
24763
24764
24765 a_dim1 = *lda;
24766 a_offset = 1 + a_dim1 * 1;
24767 a -= a_offset;
24768 --tau;
24769 --work;
24770
24771 /* Function Body */
24772 *info = 0;
24773 if (*m < 0) {
24774 *info = -1;
24775 } else if (*n < 0) {
24776 *info = -2;
24777 } else if (*lda < f2cmax(1,*m)) {
24778 *info = -4;
24779 }
24780 if (*info != 0) {
24781 i__1 = -(*info);
24782 xerbla_("SGEQR2", &i__1);
24783 return 0;
24784 }
24785
24786 k = f2cmin(*m,*n);
24787
24788 i__1 = k;
24789 for (i__ = 1; i__ <= i__1; ++i__) {
24790
24791 /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
24792
24793 Computing MIN */
24794 i__2 = i__ + 1;
24795 i__3 = *m - i__ + 1;
24796 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1, &
24797 tau[i__]);
24798 if (i__ < *n) {
24799
24800 /* Apply H(i) to A(i:m,i+1:n) from the left */
24801
24802 aii = a_ref(i__, i__);
24803 a_ref(i__, i__) = 1.f;
24804 i__2 = *m - i__ + 1;
24805 i__3 = *n - i__;
24806 slarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tau[i__], &
24807 a_ref(i__, i__ + 1), lda, &work[1]);
24808 a_ref(i__, i__) = aii;
24809 }
24810 /* L10: */
24811 }
24812 return 0;
24813
24814 /* End of SGEQR2 */
24815
24816 } /* sgeqr2_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 23835 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, sgeqr2_(), slarfb_(), slarft_(), and xerbla_(). Referenced by sgesvd_(). 23837 {
23838 /* -- LAPACK routine (version 3.0) --
23839 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
23840 Courant Institute, Argonne National Lab, and Rice University
23841 June 30, 1999
23842
23843
23844 Purpose
23845 =======
23846
23847 SGEQRF computes a QR factorization of a real M-by-N matrix A:
23848 A = Q * R.
23849
23850 Arguments
23851 =========
23852
23853 M (input) INTEGER
23854 The number of rows of the matrix A. M >= 0.
23855
23856 N (input) INTEGER
23857 The number of columns of the matrix A. N >= 0.
23858
23859 A (input/output) REAL array, dimension (LDA,N)
23860 On entry, the M-by-N matrix A.
23861 On exit, the elements on and above the diagonal of the array
23862 contain the min(M,N)-by-N upper trapezoidal matrix R (R is
23863 upper triangular if m >= n); the elements below the diagonal,
23864 with the array TAU, represent the orthogonal matrix Q as a
23865 product of min(m,n) elementary reflectors (see Further
23866 Details).
23867
23868 LDA (input) INTEGER
23869 The leading dimension of the array A. LDA >= max(1,M).
23870
23871 TAU (output) REAL array, dimension (min(M,N))
23872 The scalar factors of the elementary reflectors (see Further
23873 Details).
23874
23875 WORK (workspace/output) REAL array, dimension (LWORK)
23876 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
23877
23878 LWORK (input) INTEGER
23879 The dimension of the array WORK. LWORK >= max(1,N).
23880 For optimum performance LWORK >= N*NB, where NB is
23881 the optimal blocksize.
23882
23883 If LWORK = -1, then a workspace query is assumed; the routine
23884 only calculates the optimal size of the WORK array, returns
23885 this value as the first entry of the WORK array, and no error
23886 message related to LWORK is issued by XERBLA.
23887
23888 INFO (output) INTEGER
23889 = 0: successful exit
23890 < 0: if INFO = -i, the i-th argument had an illegal value
23891
23892 Further Details
23893 ===============
23894
23895 The matrix Q is represented as a product of elementary reflectors
23896
23897 Q = H(1) H(2) . . . H(k), where k = min(m,n).
23898
23899 Each H(i) has the form
23900
23901 H(i) = I - tau * v * v'
23902
23903 where tau is a real scalar, and v is a real vector with
23904 v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
23905 and tau in TAU(i).
23906
23907 =====================================================================
23908
23909
23910 Test the input arguments
23911
23912 Parameter adjustments */
23913 /* Table of constant values */
23914 static integer c__1 = 1;
23915 static integer c_n1 = -1;
23916 static integer c__3 = 3;
23917 static integer c__2 = 2;
23918
23919 /* System generated locals */
23920 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
23921 /* Local variables */
23922 static integer i__, k, nbmin, iinfo;
23923 extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer
23924 *, real *, real *, integer *);
23925 static integer ib, nb, nx;
23926 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
23927 integer *, integer *, integer *, real *, integer *, real *,
23928 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
23929 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
23930 integer *, integer *, ftnlen, ftnlen);
23931 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
23932 real *, integer *, real *, real *, integer *);
23933 static integer ldwork, lwkopt;
23934 static logical lquery;
23935 static integer iws;
23936 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
23937
23938
23939 a_dim1 = *lda;
23940 a_offset = 1 + a_dim1 * 1;
23941 a -= a_offset;
23942 --tau;
23943 --work;
23944
23945 /* Function Body */
23946 *info = 0;
23947 nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
23948 1);
23949 lwkopt = *n * nb;
23950 work[1] = (real) lwkopt;
23951 lquery = *lwork == -1;
23952 if (*m < 0) {
23953 *info = -1;
23954 } else if (*n < 0) {
23955 *info = -2;
23956 } else if (*lda < f2cmax(1,*m)) {
23957 *info = -4;
23958 } else if (*lwork < f2cmax(1,*n) && ! lquery) {
23959 *info = -7;
23960 }
23961 if (*info != 0) {
23962 i__1 = -(*info);
23963 xerbla_("SGEQRF", &i__1);
23964 return 0;
23965 } else if (lquery) {
23966 return 0;
23967 }
23968
23969 /* Quick return if possible */
23970
23971 k = f2cmin(*m,*n);
23972 if (k == 0) {
23973 work[1] = 1.f;
23974 return 0;
23975 }
23976
23977 nbmin = 2;
23978 nx = 0;
23979 iws = *n;
23980 if (nb > 1 && nb < k) {
23981
23982 /* Determine when to cross over from blocked to unblocked code.
23983
23984 Computing MAX */
23985 i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
23986 ftnlen)6, (ftnlen)1);
23987 nx = f2cmax(i__1,i__2);
23988 if (nx < k) {
23989
23990 /* Determine if workspace is large enough for blocked code. */
23991
23992 ldwork = *n;
23993 iws = ldwork * nb;
23994 if (*lwork < iws) {
23995
23996 /* Not enough workspace to use optimal NB: reduce NB and
23997 determine the minimum value of NB. */
23998
23999 nb = *lwork / ldwork;
24000 /* Computing MAX */
24001 i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
24002 c_n1, (ftnlen)6, (ftnlen)1);
24003 nbmin = f2cmax(i__1,i__2);
24004 }
24005 }
24006 }
24007
24008 if (nb >= nbmin && nb < k && nx < k) {
24009
24010 /* Use blocked code initially */
24011
24012 i__1 = k - nx;
24013 i__2 = nb;
24014 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
24015 /* Computing MIN */
24016 i__3 = k - i__ + 1;
24017 ib = f2cmin(i__3,nb);
24018
24019 /* Compute the QR factorization of the current block
24020 A(i:m,i:i+ib-1) */
24021
24022 i__3 = *m - i__ + 1;
24023 sgeqr2_(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
24024 iinfo);
24025 if (i__ + ib <= *n) {
24026
24027 /* Form the triangular factor of the block reflector
24028 H = H(i) H(i+1) . . . H(i+ib-1) */
24029
24030 i__3 = *m - i__ + 1;
24031 slarft_("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__),
24032 lda, &tau[i__], &work[1], &ldwork);
24033
24034 /* Apply H' to A(i:m,i+ib:n) from the left */
24035
24036 i__3 = *m - i__ + 1;
24037 i__4 = *n - i__ - ib + 1;
24038 slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
24039 i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
24040 a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork);
24041 }
24042 /* L10: */
24043 }
24044 } else {
24045 i__ = 1;
24046 }
24047
24048 /* Use unblocked code to factor the last or only block. */
24049
24050 if (i__ <= k) {
24051 i__2 = *m - i__ + 1;
24052 i__1 = *n - i__ + 1;
24053 sgeqr2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
24054 iinfo);
24055 }
24056
24057 work[1] = (real) iws;
24058 return 0;
24059
24060 /* End of SGEQRF */
24061
24062 } /* sgeqrf_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 1870 of file lapackblas.cpp. References a_ref, f2cmax, integer, real, x, xerbla_(), and y. Referenced by slarf_(). 01872 {
01873 /* System generated locals */
01874 integer a_dim1, a_offset, i__1, i__2;
01875 /* Local variables */
01876 static integer info;
01877 static real temp;
01878 static integer i__, j, ix, jy, kx;
01879 extern /* Subroutine */ int xerbla_(const char *, integer *);
01880 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
01881 /* Purpose
01882 =======
01883 SGER performs the rank 1 operation
01884 A := alpha*x*y' + A,
01885 where alpha is a scalar, x is an m element vector, y is an n element
01886 vector and A is an m by n matrix.
01887 Parameters
01888 ==========
01889 M - INTEGER.
01890 On entry, M specifies the number of rows of the matrix A.
01891 M must be at least zero.
01892 Unchanged on exit.
01893 N - INTEGER.
01894 On entry, N specifies the number of columns of the matrix A.
01895 N must be at least zero.
01896 Unchanged on exit.
01897 ALPHA - REAL .
01898 On entry, ALPHA specifies the scalar alpha.
01899 Unchanged on exit.
01900 X - REAL array of dimension at least
01901 ( 1 + ( m - 1 )*abs( INCX ) ).
01902 Before entry, the incremented array X must contain the m
01903 element vector x.
01904 Unchanged on exit.
01905 INCX - INTEGER.
01906 On entry, INCX specifies the increment for the elements of
01907 X. INCX must not be zero.
01908 Unchanged on exit.
01909 Y - REAL array of dimension at least
01910 ( 1 + ( n - 1 )*abs( INCY ) ).
01911 Before entry, the incremented array Y must contain the n
01912 element vector y.
01913 Unchanged on exit.
01914 INCY - INTEGER.
01915 On entry, INCY specifies the increment for the elements of
01916 Y. INCY must not be zero.
01917 Unchanged on exit.
01918 A - REAL array of DIMENSION ( LDA, n ).
01919 Before entry, the leading m by n part of the array A must
01920 contain the matrix of coefficients. On exit, A is
01921 overwritten by the updated matrix.
01922 LDA - INTEGER.
01923 On entry, LDA specifies the first dimension of A as declared
01924 in the calling (sub) program. LDA must be at least
01925 f2cmax( 1, m ).
01926 Unchanged on exit.
01927 Level 2 Blas routine.
01928 -- Written on 22-October-1986.
01929 Jack Dongarra, Argonne National Lab.
01930 Jeremy Du Croz, Nag Central Office.
01931 Sven Hammarling, Nag Central Office.
01932 Richard Hanson, Sandia National Labs.
01933 Test the input parameters.
01934 Parameter adjustments */
01935 --x;
01936 --y;
01937 a_dim1 = *lda;
01938 a_offset = 1 + a_dim1 * 1;
01939 a -= a_offset;
01940 /* Function Body */
01941 info = 0;
01942 if (*m < 0) {
01943 info = 1;
01944 } else if (*n < 0) {
01945 info = 2;
01946 } else if (*incx == 0) {
01947 info = 5;
01948 } else if (*incy == 0) {
01949 info = 7;
01950 } else if (*lda < f2cmax(1,*m)) {
01951 info = 9;
01952 }
01953 if (info != 0) {
01954 xerbla_("SGER ", &info);
01955 return 0;
01956 }
01957 /* Quick return if possible. */
01958 if (*m == 0 || *n == 0 || *alpha == 0.f) {
01959 return 0;
01960 }
01961 /* Start the operations. In this version the elements of A are
01962 accessed sequentially with one pass through A. */
01963 if (*incy > 0) {
01964 jy = 1;
01965 } else {
01966 jy = 1 - (*n - 1) * *incy;
01967 }
01968 if (*incx == 1) {
01969 i__1 = *n;
01970 for (j = 1; j <= i__1; ++j) {
01971 if (y[jy] != 0.f) {
01972 temp = *alpha * y[jy];
01973 i__2 = *m;
01974 for (i__ = 1; i__ <= i__2; ++i__) {
01975 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp;
01976 /* L10: */
01977 }
01978 }
01979 jy += *incy;
01980 /* L20: */
01981 }
01982 } else {
01983 if (*incx > 0) {
01984 kx = 1;
01985 } else {
01986 kx = 1 - (*m - 1) * *incx;
01987 }
01988 i__1 = *n;
01989 for (j = 1; j <= i__1; ++j) {
01990 if (y[jy] != 0.f) {
01991 temp = *alpha * y[jy];
01992 ix = kx;
01993 i__2 = *m;
01994 for (i__ = 1; i__ <= i__2; ++i__) {
01995 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp;
01996 ix += *incx;
01997 /* L30: */
01998 }
01999 }
02000 jy += *incy;
02001 /* L40: */
02002 }
02003 }
02004 return 0;
02005 /* End of SGER . */
02006 } /* sger_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 16378 of file lapackblas.cpp. References a_ref, c__0, c__1, c__2, c__6, c_b416, c_b438, c_n1, doublereal, f2cmax, f2cmin, ftnlen, ierr, ilaenv_(), integer, logical, lsame_(), real, s_cat(), sbdsqr_(), sgebrd_(), sgelqf_(), sgemm_(), sgeqrf_(), slacpy_(), slamch_(), slange_(), slascl_(), slaset_(), sorgbr_(), sorglq_(), sorgqr_(), sormbr_(), sqrt(), u_ref, vt_ref, and xerbla_(). Referenced by svd(). 16381 {
16382 /* System generated locals */
16383 typedef const char *address;
16384
16385 address a__1[2];
16386 integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2],
16387 i__2, i__3, i__4;
16388 char ch__1[2];
16389
16390 /* Builtin functions
16391 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen);
16392 //double sqrt(doublereal);
16393
16394 /* Local variables */
16395 static integer iscl;
16396 static real anrm;
16397 static integer ierr, itau, ncvt, nrvt, i__;
16398 extern logical lsame_(const char *, const char *);
16399 static integer chunk;
16400 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
16401 integer *, real *, real *, integer *, real *, integer *, real *,
16402 real *, integer *);
16403 static integer minmn, wrkbl, itaup, itauq, mnthr, iwork;
16404 static logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
16405 static integer ie, ir, bdspac, iu;
16406 extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer
16407 *, real *, real *, real *, real *, real *, integer *, integer *);
16408 extern doublereal slamch_(const char *), slange_(const char *, integer *,
16409 integer *, real *, integer *, real *);
16410 extern /* Subroutine */ int xerbla_(const char *, integer *);
16411 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
16412 integer *, integer *, ftnlen, ftnlen);
16413 static real bignum;
16414 extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
16415 *, real *, real *, integer *, integer *), slascl_(const char *, integer
16416 *, integer *, real *, real *, integer *, integer *, real *,
16417 integer *, integer *), sgeqrf_(integer *, integer *, real
16418 *, integer *, real *, real *, integer *, integer *), slacpy_(const char
16419 *, integer *, integer *, real *, integer *, real *, integer *), slaset_(const char *, integer *, integer *, real *, real *,
16420 real *, integer *), sbdsqr_(const char *, integer *, integer *,
16421 integer *, integer *, real *, real *, real *, integer *, real *,
16422 integer *, real *, integer *, real *, integer *), sorgbr_(
16423 const char *, integer *, integer *, integer *, real *, integer *, real *
16424 , real *, integer *, integer *), sormbr_(const char *, const char *,
16425 const char *, integer *, integer *, integer *, real *, integer *, real *
16426 , real *, integer *, real *, integer *, integer *);
16427 static integer ldwrkr, minwrk, ldwrku, maxwrk;
16428 extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
16429 *, integer *, real *, real *, integer *, integer *);
16430 static real smlnum;
16431 extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
16432 *, integer *, real *, real *, integer *, integer *);
16433 static logical lquery, wntuas, wntvas;
16434 static integer blk, ncu;
16435 static real dum[1], eps;
16436 static integer nru;
16437
16438
16439 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
16440 #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
16441 #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
16442
16443
16444 /* -- LAPACK driver routine (version 3.0) --
16445 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
16446 Courant Institute, Argonne National Lab, and Rice University
16447 October 31, 1999
16448
16449
16450 Purpose
16451 =======
16452
16453 SGESVD computes the singular value decomposition (SVD) of a real
16454 M-by-N matrix A, optionally computing the left and/or right singular
16455 vectors. The SVD is written
16456
16457 A = U * SIGMA * transpose(V)
16458
16459 where SIGMA is an M-by-N matrix which is zero except for its
16460 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
16461 V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
16462 are the singular values of A; they are real and non-negative, and
16463 are returned in descending order. The first min(m,n) columns of
16464 U and V are the left and right singular vectors of A.
16465
16466 Note that the routine returns V**T, not V.
16467
16468 Arguments
16469 =========
16470
16471 JOBU (input) CHARACTER*1
16472 Specifies options for computing all or part of the matrix U:
16473 = 'A': all M columns of U are returned in array U:
16474 = 'S': the first min(m,n) columns of U (the left singular
16475 vectors) are returned in the array U;
16476 = 'O': the first min(m,n) columns of U (the left singular
16477 vectors) are overwritten on the array A;
16478 = 'N': no columns of U (no left singular vectors) are
16479 computed.
16480
16481 JOBVT (input) CHARACTER*1
16482 Specifies options for computing all or part of the matrix
16483 V**T:
16484 = 'A': all N rows of V**T are returned in the array VT;
16485 = 'S': the first min(m,n) rows of V**T (the right singular
16486 vectors) are returned in the array VT;
16487 = 'O': the first min(m,n) rows of V**T (the right singular
16488 vectors) are overwritten on the array A;
16489 = 'N': no rows of V**T (no right singular vectors) are
16490 computed.
16491
16492 JOBVT and JOBU cannot both be 'O'.
16493
16494 M (input) INTEGER
16495 The number of rows of the input matrix A. M >= 0.
16496
16497 N (input) INTEGER
16498 The number of columns of the input matrix A. N >= 0.
16499
16500 A (input/output) REAL array, dimension (LDA,N)
16501 On entry, the M-by-N matrix A.
16502 On exit,
16503 if JOBU = 'O', A is overwritten with the first min(m,n)
16504 columns of U (the left singular vectors,
16505 stored columnwise);
16506 if JOBVT = 'O', A is overwritten with the first min(m,n)
16507 rows of V**T (the right singular vectors,
16508 stored rowwise);
16509 if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
16510 are destroyed.
16511
16512 LDA (input) INTEGER
16513 The leading dimension of the array A. LDA >= max(1,M).
16514
16515 S (output) REAL array, dimension (min(M,N))
16516 The singular values of A, sorted so that S(i) >= S(i+1).
16517
16518 U (output) REAL array, dimension (LDU,UCOL)
16519 (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
16520 If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
16521 if JOBU = 'S', U contains the first min(m,n) columns of U
16522 (the left singular vectors, stored columnwise);
16523 if JOBU = 'N' or 'O', U is not referenced.
16524
16525 LDU (input) INTEGER
16526 The leading dimension of the array U. LDU >= 1; if
16527 JOBU = 'S' or 'A', LDU >= M.
16528
16529 VT (output) REAL array, dimension (LDVT,N)
16530 If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
16531 V**T;
16532 if JOBVT = 'S', VT contains the first min(m,n) rows of
16533 V**T (the right singular vectors, stored rowwise);
16534 if JOBVT = 'N' or 'O', VT is not referenced.
16535
16536 LDVT (input) INTEGER
16537 The leading dimension of the array VT. LDVT >= 1; if
16538 JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
16539
16540 WORK (workspace/output) REAL array, dimension (LWORK)
16541 On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
16542 if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
16543 superdiagonal elements of an upper bidiagonal matrix B
16544 whose diagonal is in S (not necessarily sorted). B
16545 satisfies A = U * B * VT, so it has the same singular values
16546 as A, and singular vectors related by U and VT.
16547
16548 LWORK (input) INTEGER
16549 The dimension of the array WORK. LWORK >= 1.
16550 LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
16551 For good performance, LWORK should generally be larger.
16552
16553 If LWORK = -1, then a workspace query is assumed; the routine
16554 only calculates the optimal size of the WORK array, returns
16555 this value as the first entry of the WORK array, and no error
16556 message related to LWORK is issued by XERBLA.
16557
16558 INFO (output) INTEGER
16559 = 0: successful exit.
16560 < 0: if INFO = -i, the i-th argument had an illegal value.
16561 > 0: if SBDSQR did not converge, INFO specifies how many
16562 superdiagonals of an intermediate bidiagonal form B
16563 did not converge to zero. See the description of WORK
16564 above for details.
16565
16566 =====================================================================
16567
16568
16569 Test the input arguments
16570
16571 Parameter adjustments */
16572 a_dim1 = *lda;
16573 a_offset = 1 + a_dim1 * 1;
16574 a -= a_offset;
16575 --s;
16576 u_dim1 = *ldu;
16577 u_offset = 1 + u_dim1 * 1;
16578 u -= u_offset;
16579 vt_dim1 = *ldvt;
16580 vt_offset = 1 + vt_dim1 * 1;
16581 vt -= vt_offset;
16582 --work;
16583
16584 /* Function Body */
16585 *info = 0;
16586 minmn = f2cmin(*m,*n);
16587 /* Writing concatenation */
16588 i__1[0] = 1, a__1[0] = jobu;
16589 i__1[1] = 1, a__1[1] = jobvt;
16590 s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
16591 mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (ftnlen)6, (
16592 ftnlen)2);
16593 wntua = lsame_(jobu, "A");
16594 wntus = lsame_(jobu, "S");
16595 wntuas = wntua || wntus;
16596 wntuo = lsame_(jobu, "O");
16597 wntun = lsame_(jobu, "N");
16598 wntva = lsame_(jobvt, "A");
16599 wntvs = lsame_(jobvt, "S");
16600 wntvas = wntva || wntvs;
16601 wntvo = lsame_(jobvt, "O");
16602 wntvn = lsame_(jobvt, "N");
16603 minwrk = 1;
16604 lquery = *lwork == -1;
16605
16606 if (! (wntua || wntus || wntuo || wntun)) {
16607 *info = -1;
16608 } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
16609 *info = -2;
16610 } else if (*m < 0) {
16611 *info = -3;
16612 } else if (*n < 0) {
16613 *info = -4;
16614 } else if (*lda < f2cmax(1,*m)) {
16615 *info = -6;
16616 } else if (*ldu < 1 || wntuas && *ldu < *m) {
16617 *info = -9;
16618 } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
16619 *info = -11;
16620 }
16621
16622 /* Compute workspace
16623 (Note: Comments in the code beginning "Workspace:" describe the
16624 minimal amount of workspace needed at that point in the code,
16625 as well as the preferred amount for good performance.
16626 NB refers to the optimal block size for the immediately
16627 following subroutine, as returned by ILAENV.) */
16628
16629 if (*info == 0 && (*lwork >= 1 || lquery) && *m > 0 && *n > 0) {
16630 if (*m >= *n) {
16631
16632 /* Compute space needed for SBDSQR */
16633
16634 bdspac = *n * 5;
16635 if (*m >= mnthr) {
16636 if (wntun) {
16637
16638 /* Path 1 (M much larger than N, JOBU='N') */
16639
16640 maxwrk = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16641 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16642 /* Computing MAX */
16643 i__2 = maxwrk, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16644 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16645 ftnlen)1);
16646 maxwrk = f2cmax(i__2,i__3);
16647 if (wntvo || wntvas) {
16648 /* Computing MAX */
16649 i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(&
16650 c__1, "SORGBR", "P", n, n, n, &c_n1, (ftnlen)
16651 6, (ftnlen)1);
16652 maxwrk = f2cmax(i__2,i__3);
16653 }
16654 maxwrk = f2cmax(maxwrk,bdspac);
16655 /* Computing MAX */
16656 i__2 = *n << 2;
16657 minwrk = f2cmax(i__2,bdspac);
16658 maxwrk = f2cmax(maxwrk,minwrk);
16659 } else if (wntuo && wntvn) {
16660
16661 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
16662
16663 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16664 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16665 /* Computing MAX */
16666 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR",
16667 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16668 wrkbl = f2cmax(i__2,i__3);
16669 /* Computing MAX */
16670 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16671 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16672 ftnlen)1);
16673 wrkbl = f2cmax(i__2,i__3);
16674 /* Computing MAX */
16675 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16676 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16677 wrkbl = f2cmax(i__2,i__3);
16678 wrkbl = f2cmax(wrkbl,bdspac);
16679 /* Computing MAX */
16680 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
16681 maxwrk = f2cmax(i__2,i__3);
16682 /* Computing MAX */
16683 i__2 = *n * 3 + *m;
16684 minwrk = f2cmax(i__2,bdspac);
16685 maxwrk = f2cmax(maxwrk,minwrk);
16686 } else if (wntuo && wntvas) {
16687
16688 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
16689 'A') */
16690
16691 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16692 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16693 /* Computing MAX */
16694 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR",
16695 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16696 wrkbl = f2cmax(i__2,i__3);
16697 /* Computing MAX */
16698 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16699 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16700 ftnlen)1);
16701 wrkbl = f2cmax(i__2,i__3);
16702 /* Computing MAX */
16703 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16704 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16705 wrkbl = f2cmax(i__2,i__3);
16706 /* Computing MAX */
16707 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16708 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16709 1);
16710 wrkbl = f2cmax(i__2,i__3);
16711 wrkbl = f2cmax(wrkbl,bdspac);
16712 /* Computing MAX */
16713 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
16714 maxwrk = f2cmax(i__2,i__3);
16715 /* Computing MAX */
16716 i__2 = *n * 3 + *m;
16717 minwrk = f2cmax(i__2,bdspac);
16718 maxwrk = f2cmax(maxwrk,minwrk);
16719 } else if (wntus && wntvn) {
16720
16721 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
16722
16723 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16724 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16725 /* Computing MAX */
16726 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR",
16727 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16728 wrkbl = f2cmax(i__2,i__3);
16729 /* Computing MAX */
16730 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16731 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16732 ftnlen)1);
16733 wrkbl = f2cmax(i__2,i__3);
16734 /* Computing MAX */
16735 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16736 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16737 wrkbl = f2cmax(i__2,i__3);
16738 wrkbl = f2cmax(wrkbl,bdspac);
16739 maxwrk = *n * *n + wrkbl;
16740 /* Computing MAX */
16741 i__2 = *n * 3 + *m;
16742 minwrk = f2cmax(i__2,bdspac);
16743 maxwrk = f2cmax(maxwrk,minwrk);
16744 } else if (wntus && wntvo) {
16745
16746 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
16747
16748 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16749 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16750 /* Computing MAX */
16751 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR",
16752 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16753 wrkbl = f2cmax(i__2,i__3);
16754 /* Computing MAX */
16755 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16756 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16757 ftnlen)1);
16758 wrkbl = f2cmax(i__2,i__3);
16759 /* Computing MAX */
16760 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16761 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16762 wrkbl = f2cmax(i__2,i__3);
16763 /* Computing MAX */
16764 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16765 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16766 1);
16767 wrkbl = f2cmax(i__2,i__3);
16768 wrkbl = f2cmax(wrkbl,bdspac);
16769 maxwrk = (*n << 1) * *n + wrkbl;
16770 /* Computing MAX */
16771 i__2 = *n * 3 + *m;
16772 minwrk = f2cmax(i__2,bdspac);
16773 maxwrk = f2cmax(maxwrk,minwrk);
16774 } else if (wntus && wntvas) {
16775
16776 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
16777 'A') */
16778
16779 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16780 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16781 /* Computing MAX */
16782 i__2 = wrkbl, i__3 = *n + *n * ilaenv_(&c__1, "SORGQR",
16783 " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16784 wrkbl = f2cmax(i__2,i__3);
16785 /* Computing MAX */
16786 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16787 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16788 ftnlen)1);
16789 wrkbl = f2cmax(i__2,i__3);
16790 /* Computing MAX */
16791 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16792 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16793 wrkbl = f2cmax(i__2,i__3);
16794 /* Computing MAX */
16795 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16796 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16797 1);
16798 wrkbl = f2cmax(i__2,i__3);
16799 wrkbl = f2cmax(wrkbl,bdspac);
16800 maxwrk = *n * *n + wrkbl;
16801 /* Computing MAX */
16802 i__2 = *n * 3 + *m;
16803 minwrk = f2cmax(i__2,bdspac);
16804 maxwrk = f2cmax(maxwrk,minwrk);
16805 } else if (wntua && wntvn) {
16806
16807 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
16808
16809 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16810 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16811 /* Computing MAX */
16812 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR",
16813 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
16814 wrkbl = f2cmax(i__2,i__3);
16815 /* Computing MAX */
16816 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16817 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16818 ftnlen)1);
16819 wrkbl = f2cmax(i__2,i__3);
16820 /* Computing MAX */
16821 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16822 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16823 wrkbl = f2cmax(i__2,i__3);
16824 wrkbl = f2cmax(wrkbl,bdspac);
16825 maxwrk = *n * *n + wrkbl;
16826 /* Computing MAX */
16827 i__2 = *n * 3 + *m;
16828 minwrk = f2cmax(i__2,bdspac);
16829 maxwrk = f2cmax(maxwrk,minwrk);
16830 } else if (wntua && wntvo) {
16831
16832 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
16833
16834 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16835 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16836 /* Computing MAX */
16837 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR",
16838 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
16839 wrkbl = f2cmax(i__2,i__3);
16840 /* Computing MAX */
16841 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16842 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16843 ftnlen)1);
16844 wrkbl = f2cmax(i__2,i__3);
16845 /* Computing MAX */
16846 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16847 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16848 wrkbl = f2cmax(i__2,i__3);
16849 /* Computing MAX */
16850 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16851 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16852 1);
16853 wrkbl = f2cmax(i__2,i__3);
16854 wrkbl = f2cmax(wrkbl,bdspac);
16855 maxwrk = (*n << 1) * *n + wrkbl;
16856 /* Computing MAX */
16857 i__2 = *n * 3 + *m;
16858 minwrk = f2cmax(i__2,bdspac);
16859 maxwrk = f2cmax(maxwrk,minwrk);
16860 } else if (wntua && wntvas) {
16861
16862 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
16863 'A') */
16864
16865 wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
16866 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16867 /* Computing MAX */
16868 i__2 = wrkbl, i__3 = *n + *m * ilaenv_(&c__1, "SORGQR",
16869 " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
16870 wrkbl = f2cmax(i__2,i__3);
16871 /* Computing MAX */
16872 i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
16873 "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
16874 ftnlen)1);
16875 wrkbl = f2cmax(i__2,i__3);
16876 /* Computing MAX */
16877 i__2 = wrkbl, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORGBR"
16878 , "Q", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16879 wrkbl = f2cmax(i__2,i__3);
16880 /* Computing MAX */
16881 i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16882 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16883 1);
16884 wrkbl = f2cmax(i__2,i__3);
16885 wrkbl = f2cmax(wrkbl,bdspac);
16886 maxwrk = *n * *n + wrkbl;
16887 /* Computing MAX */
16888 i__2 = *n * 3 + *m;
16889 minwrk = f2cmax(i__2,bdspac);
16890 maxwrk = f2cmax(maxwrk,minwrk);
16891 }
16892 } else {
16893
16894 /* Path 10 (M at least N, but not much larger) */
16895
16896 maxwrk = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
16897 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16898 if (wntus || wntuo) {
16899 /* Computing MAX */
16900 i__2 = maxwrk, i__3 = *n * 3 + *n * ilaenv_(&c__1, "SORG"
16901 "BR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
16902 maxwrk = f2cmax(i__2,i__3);
16903 }
16904 if (wntua) {
16905 /* Computing MAX */
16906 i__2 = maxwrk, i__3 = *n * 3 + *m * ilaenv_(&c__1, "SORG"
16907 "BR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
16908 maxwrk = f2cmax(i__2,i__3);
16909 }
16910 if (! wntvn) {
16911 /* Computing MAX */
16912 i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * ilaenv_(&c__1,
16913 "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
16914 1);
16915 maxwrk = f2cmax(i__2,i__3);
16916 }
16917 maxwrk = f2cmax(maxwrk,bdspac);
16918 /* Computing MAX */
16919 i__2 = *n * 3 + *m;
16920 minwrk = f2cmax(i__2,bdspac);
16921 maxwrk = f2cmax(maxwrk,minwrk);
16922 }
16923 } else {
16924
16925 /* Compute space needed for SBDSQR */
16926
16927 bdspac = *m * 5;
16928 if (*n >= mnthr) {
16929 if (wntvn) {
16930
16931 /* Path 1t(N much larger than M, JOBVT='N') */
16932
16933 maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
16934 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16935 /* Computing MAX */
16936 i__2 = maxwrk, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
16937 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
16938 ftnlen)1);
16939 maxwrk = f2cmax(i__2,i__3);
16940 if (wntuo || wntuas) {
16941 /* Computing MAX */
16942 i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1,
16943 "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, (
16944 ftnlen)1);
16945 maxwrk = f2cmax(i__2,i__3);
16946 }
16947 maxwrk = f2cmax(maxwrk,bdspac);
16948 /* Computing MAX */
16949 i__2 = *m << 2;
16950 minwrk = f2cmax(i__2,bdspac);
16951 maxwrk = f2cmax(maxwrk,minwrk);
16952 } else if (wntvo && wntun) {
16953
16954 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
16955
16956 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
16957 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16958 /* Computing MAX */
16959 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ",
16960 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
16961 wrkbl = f2cmax(i__2,i__3);
16962 /* Computing MAX */
16963 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
16964 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
16965 ftnlen)1);
16966 wrkbl = f2cmax(i__2,i__3);
16967 /* Computing MAX */
16968 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
16969 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
16970 1);
16971 wrkbl = f2cmax(i__2,i__3);
16972 wrkbl = f2cmax(wrkbl,bdspac);
16973 /* Computing MAX */
16974 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
16975 maxwrk = f2cmax(i__2,i__3);
16976 /* Computing MAX */
16977 i__2 = *m * 3 + *n;
16978 minwrk = f2cmax(i__2,bdspac);
16979 maxwrk = f2cmax(maxwrk,minwrk);
16980 } else if (wntvo && wntuas) {
16981
16982 /* Path 3t(N much larger than M, JOBU='S' or 'A',
16983 JOBVT='O') */
16984
16985 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
16986 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
16987 /* Computing MAX */
16988 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ",
16989 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
16990 wrkbl = f2cmax(i__2,i__3);
16991 /* Computing MAX */
16992 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
16993 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
16994 ftnlen)1);
16995 wrkbl = f2cmax(i__2,i__3);
16996 /* Computing MAX */
16997 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
16998 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
16999 1);
17000 wrkbl = f2cmax(i__2,i__3);
17001 /* Computing MAX */
17002 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
17003 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
17004 wrkbl = f2cmax(i__2,i__3);
17005 wrkbl = f2cmax(wrkbl,bdspac);
17006 /* Computing MAX */
17007 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
17008 maxwrk = f2cmax(i__2,i__3);
17009 /* Computing MAX */
17010 i__2 = *m * 3 + *n;
17011 minwrk = f2cmax(i__2,bdspac);
17012 maxwrk = f2cmax(maxwrk,minwrk);
17013 } else if (wntvs && wntun) {
17014
17015 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
17016
17017 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17018 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17019 /* Computing MAX */
17020 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ",
17021 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17022 wrkbl = f2cmax(i__2,i__3);
17023 /* Computing MAX */
17024 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17025 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17026 ftnlen)1);
17027 wrkbl = f2cmax(i__2,i__3);
17028 /* Computing MAX */
17029 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17030 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17031 1);
17032 wrkbl = f2cmax(i__2,i__3);
17033 wrkbl = f2cmax(wrkbl,bdspac);
17034 maxwrk = *m * *m + wrkbl;
17035 /* Computing MAX */
17036 i__2 = *m * 3 + *n;
17037 minwrk = f2cmax(i__2,bdspac);
17038 maxwrk = f2cmax(maxwrk,minwrk);
17039 } else if (wntvs && wntuo) {
17040
17041 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
17042
17043 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17044 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17045 /* Computing MAX */
17046 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ",
17047 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17048 wrkbl = f2cmax(i__2,i__3);
17049 /* Computing MAX */
17050 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17051 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17052 ftnlen)1);
17053 wrkbl = f2cmax(i__2,i__3);
17054 /* Computing MAX */
17055 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17056 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17057 1);
17058 wrkbl = f2cmax(i__2,i__3);
17059 /* Computing MAX */
17060 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
17061 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
17062 wrkbl = f2cmax(i__2,i__3);
17063 wrkbl = f2cmax(wrkbl,bdspac);
17064 maxwrk = (*m << 1) * *m + wrkbl;
17065 /* Computing MAX */
17066 i__2 = *m * 3 + *n;
17067 minwrk = f2cmax(i__2,bdspac);
17068 maxwrk = f2cmax(maxwrk,minwrk);
17069 } else if (wntvs && wntuas) {
17070
17071 /* Path 6t(N much larger than M, JOBU='S' or 'A',
17072 JOBVT='S') */
17073
17074 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17075 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17076 /* Computing MAX */
17077 i__2 = wrkbl, i__3 = *m + *m * ilaenv_(&c__1, "SORGLQ",
17078 " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17079 wrkbl = f2cmax(i__2,i__3);
17080 /* Computing MAX */
17081 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17082 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17083 ftnlen)1);
17084 wrkbl = f2cmax(i__2,i__3);
17085 /* Computing MAX */
17086 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17087 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17088 1);
17089 wrkbl = f2cmax(i__2,i__3);
17090 /* Computing MAX */
17091 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
17092 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
17093 wrkbl = f2cmax(i__2,i__3);
17094 wrkbl = f2cmax(wrkbl,bdspac);
17095 maxwrk = *m * *m + wrkbl;
17096 /* Computing MAX */
17097 i__2 = *m * 3 + *n;
17098 minwrk = f2cmax(i__2,bdspac);
17099 maxwrk = f2cmax(maxwrk,minwrk);
17100 } else if (wntva && wntun) {
17101
17102 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
17103
17104 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17105 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17106 /* Computing MAX */
17107 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ",
17108 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17109 wrkbl = f2cmax(i__2,i__3);
17110 /* Computing MAX */
17111 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17112 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17113 ftnlen)1);
17114 wrkbl = f2cmax(i__2,i__3);
17115 /* Computing MAX */
17116 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17117 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17118 1);
17119 wrkbl = f2cmax(i__2,i__3);
17120 wrkbl = f2cmax(wrkbl,bdspac);
17121 maxwrk = *m * *m + wrkbl;
17122 /* Computing MAX */
17123 i__2 = *m * 3 + *n;
17124 minwrk = f2cmax(i__2,bdspac);
17125 maxwrk = f2cmax(maxwrk,minwrk);
17126 } else if (wntva && wntuo) {
17127
17128 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
17129
17130 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17131 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17132 /* Computing MAX */
17133 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ",
17134 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17135 wrkbl = f2cmax(i__2,i__3);
17136 /* Computing MAX */
17137 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17138 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17139 ftnlen)1);
17140 wrkbl = f2cmax(i__2,i__3);
17141 /* Computing MAX */
17142 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17143 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17144 1);
17145 wrkbl = f2cmax(i__2,i__3);
17146 /* Computing MAX */
17147 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
17148 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
17149 wrkbl = f2cmax(i__2,i__3);
17150 wrkbl = f2cmax(wrkbl,bdspac);
17151 maxwrk = (*m << 1) * *m + wrkbl;
17152 /* Computing MAX */
17153 i__2 = *m * 3 + *n;
17154 minwrk = f2cmax(i__2,bdspac);
17155 maxwrk = f2cmax(maxwrk,minwrk);
17156 } else if (wntva && wntuas) {
17157
17158 /* Path 9t(N much larger than M, JOBU='S' or 'A',
17159 JOBVT='A') */
17160
17161 wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
17162 c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17163 /* Computing MAX */
17164 i__2 = wrkbl, i__3 = *m + *n * ilaenv_(&c__1, "SORGLQ",
17165 " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17166 wrkbl = f2cmax(i__2,i__3);
17167 /* Computing MAX */
17168 i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
17169 "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
17170 ftnlen)1);
17171 wrkbl = f2cmax(i__2,i__3);
17172 /* Computing MAX */
17173 i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17174 "SORGBR", "P", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17175 1);
17176 wrkbl = f2cmax(i__2,i__3);
17177 /* Computing MAX */
17178 i__2 = wrkbl, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR"
17179 , "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)1);
17180 wrkbl = f2cmax(i__2,i__3);
17181 wrkbl = f2cmax(wrkbl,bdspac);
17182 maxwrk = *m * *m + wrkbl;
17183 /* Computing MAX */
17184 i__2 = *m * 3 + *n;
17185 minwrk = f2cmax(i__2,bdspac);
17186 maxwrk = f2cmax(maxwrk,minwrk);
17187 }
17188 } else {
17189
17190 /* Path 10t(N greater than M, but not much larger) */
17191
17192 maxwrk = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
17193 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
17194 if (wntvs || wntvo) {
17195 /* Computing MAX */
17196 i__2 = maxwrk, i__3 = *m * 3 + *m * ilaenv_(&c__1, "SORG"
17197 "BR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17198 maxwrk = f2cmax(i__2,i__3);
17199 }
17200 if (wntva) {
17201 /* Computing MAX */
17202 i__2 = maxwrk, i__3 = *m * 3 + *n * ilaenv_(&c__1, "SORG"
17203 "BR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
17204 maxwrk = f2cmax(i__2,i__3);
17205 }
17206 if (! wntun) {
17207 /* Computing MAX */
17208 i__2 = maxwrk, i__3 = *m * 3 + (*m - 1) * ilaenv_(&c__1,
17209 "SORGBR", "Q", m, m, m, &c_n1, (ftnlen)6, (ftnlen)
17210 1);
17211 maxwrk = f2cmax(i__2,i__3);
17212 }
17213 maxwrk = f2cmax(maxwrk,bdspac);
17214 /* Computing MAX */
17215 i__2 = *m * 3 + *n;
17216 minwrk = f2cmax(i__2,bdspac);
17217 maxwrk = f2cmax(maxwrk,minwrk);
17218 }
17219 }
17220 work[1] = (real) maxwrk;
17221 }
17222
17223 if (*lwork < minwrk && ! lquery) {
17224 *info = -13;
17225 }
17226 if (*info != 0) {
17227 i__2 = -(*info);
17228 xerbla_("SGESVD", &i__2);
17229 return 0;
17230 } else if (lquery) {
17231 return 0;
17232 }
17233
17234 /* Quick return if possible */
17235
17236 if (*m == 0 || *n == 0) {
17237 if (*lwork >= 1) {
17238 work[1] = 1.f;
17239 }
17240 return 0;
17241 }
17242
17243 /* Get machine constants */
17244
17245 eps = slamch_("P");
17246 smlnum = sqrt(slamch_("S")) / eps;
17247 bignum = 1.f / smlnum;
17248
17249 /* Scale A if max element outside range [SMLNUM,BIGNUM] */
17250
17251 anrm = slange_("M", m, n, &a[a_offset], lda, dum);
17252 iscl = 0;
17253 if (anrm > 0.f && anrm < smlnum) {
17254 iscl = 1;
17255 slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
17256 ierr);
17257 } else if (anrm > bignum) {
17258 iscl = 1;
17259 slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
17260 ierr);
17261 }
17262
17263 if (*m >= *n) {
17264
17265 /* A has at least as many rows as columns. If A has sufficiently
17266 more rows than columns, first reduce using the QR
17267 decomposition (if sufficient workspace available) */
17268
17269 if (*m >= mnthr) {
17270
17271 if (wntun) {
17272
17273 /* Path 1 (M much larger than N, JOBU='N')
17274 No left singular vectors to be computed */
17275
17276 itau = 1;
17277 iwork = itau + *n;
17278
17279 /* Compute A=Q*R
17280 (Workspace: need 2*N, prefer N+N*NB) */
17281
17282 i__2 = *lwork - iwork + 1;
17283 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
17284 i__2, &ierr);
17285
17286 /* Zero out below R */
17287
17288 i__2 = *n - 1;
17289 i__3 = *n - 1;
17290 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2, 1),
17291 lda);
17292 ie = 1;
17293 itauq = ie + *n;
17294 itaup = itauq + *n;
17295 iwork = itaup + *n;
17296
17297 /* Bidiagonalize R in A
17298 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
17299
17300 i__2 = *lwork - iwork + 1;
17301 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
17302 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
17303 ncvt = 0;
17304 if (wntvo || wntvas) {
17305
17306 /* If right singular vectors desired, generate P'.
17307 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
17308
17309 i__2 = *lwork - iwork + 1;
17310 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
17311 work[iwork], &i__2, &ierr);
17312 ncvt = *n;
17313 }
17314 iwork = ie + *n;
17315
17316 /* Perform bidiagonal QR iteration, computing right
17317 singular vectors of A in A if desired
17318 (Workspace: need BDSPAC) */
17319
17320 sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
17321 a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork],
17322 info);
17323
17324 /* If right singular vectors desired in VT, copy them there */
17325
17326 if (wntvas) {
17327 slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset],
17328 ldvt);
17329 }
17330
17331 } else if (wntuo && wntvn) {
17332
17333 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
17334 N left singular vectors to be overwritten on A and
17335 no right singular vectors to be computed
17336
17337 Computing MAX */
17338 i__2 = *n << 2;
17339 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
17340
17341 /* Sufficient workspace for a fast algorithm */
17342
17343 ir = 1;
17344 /* Computing MAX */
17345 i__2 = wrkbl, i__3 = *lda * *n + *n;
17346 if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
17347
17348 /* WORK(IU) is LDA by N, WORK(IR) is LDA by N */
17349
17350 ldwrku = *lda;
17351 ldwrkr = *lda;
17352 } else /* if(complicated condition) */ {
17353 /* Computing MAX */
17354 i__2 = wrkbl, i__3 = *lda * *n + *n;
17355 if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
17356
17357 /* WORK(IU) is LDA by N, WORK(IR) is N by N */
17358
17359 ldwrku = *lda;
17360 ldwrkr = *n;
17361 } else {
17362
17363 /* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
17364
17365 ldwrku = (*lwork - *n * *n - *n) / *n;
17366 ldwrkr = *n;
17367 }
17368 }
17369 itau = ir + ldwrkr * *n;
17370 iwork = itau + *n;
17371
17372 /* Compute A=Q*R
17373 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17374
17375 i__2 = *lwork - iwork + 1;
17376 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
17377 , &i__2, &ierr);
17378
17379 /* Copy R to WORK(IR) and zero out below it */
17380
17381 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
17382 i__2 = *n - 1;
17383 i__3 = *n - 1;
17384 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir + 1]
17385 , &ldwrkr);
17386
17387 /* Generate Q in A
17388 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17389
17390 i__2 = *lwork - iwork + 1;
17391 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
17392 iwork], &i__2, &ierr);
17393 ie = itau;
17394 itauq = ie + *n;
17395 itaup = itauq + *n;
17396 iwork = itaup + *n;
17397
17398 /* Bidiagonalize R in WORK(IR)
17399 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
17400
17401 i__2 = *lwork - iwork + 1;
17402 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
17403 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
17404
17405 /* Generate left vectors bidiagonalizing R
17406 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
17407
17408 i__2 = *lwork - iwork + 1;
17409 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
17410 work[iwork], &i__2, &ierr);
17411 iwork = ie + *n;
17412
17413 /* Perform bidiagonal QR iteration, computing left
17414 singular vectors of R in WORK(IR)
17415 (Workspace: need N*N+BDSPAC) */
17416
17417 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
17418 c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
17419 , info);
17420 iu = ie + *n;
17421
17422 /* Multiply Q in A by left singular vectors of R in
17423 WORK(IR), storing result in WORK(IU) and copying to A
17424 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
17425
17426 i__2 = *m;
17427 i__3 = ldwrku;
17428 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
17429 i__3) {
17430 /* Computing MIN */
17431 i__4 = *m - i__ + 1;
17432 chunk = f2cmin(i__4,ldwrku);
17433 sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1)
17434 , lda, &work[ir], &ldwrkr, &c_b416, &work[iu],
17435 &ldwrku);
17436 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref(
17437 i__, 1), lda);
17438 /* L10: */
17439 }
17440
17441 } else {
17442
17443 /* Insufficient workspace for a fast algorithm */
17444
17445 ie = 1;
17446 itauq = ie + *n;
17447 itaup = itauq + *n;
17448 iwork = itaup + *n;
17449
17450 /* Bidiagonalize A
17451 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
17452
17453 i__3 = *lwork - iwork + 1;
17454 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
17455 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
17456
17457 /* Generate left vectors bidiagonalizing A
17458 (Workspace: need 4*N, prefer 3*N+N*NB) */
17459
17460 i__3 = *lwork - iwork + 1;
17461 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
17462 work[iwork], &i__3, &ierr);
17463 iwork = ie + *n;
17464
17465 /* Perform bidiagonal QR iteration, computing left
17466 singular vectors of A in A
17467 (Workspace: need BDSPAC) */
17468
17469 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
17470 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
17471 info);
17472
17473 }
17474
17475 } else if (wntuo && wntvas) {
17476
17477 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
17478 N left singular vectors to be overwritten on A and
17479 N right singular vectors to be computed in VT
17480
17481 Computing MAX */
17482 i__3 = *n << 2;
17483 if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {
17484
17485 /* Sufficient workspace for a fast algorithm */
17486
17487 ir = 1;
17488 /* Computing MAX */
17489 i__3 = wrkbl, i__2 = *lda * *n + *n;
17490 if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
17491
17492 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
17493
17494 ldwrku = *lda;
17495 ldwrkr = *lda;
17496 } else /* if(complicated condition) */ {
17497 /* Computing MAX */
17498 i__3 = wrkbl, i__2 = *lda * *n + *n;
17499 if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
17500
17501 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
17502
17503 ldwrku = *lda;
17504 ldwrkr = *n;
17505 } else {
17506
17507 /* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
17508
17509 ldwrku = (*lwork - *n * *n - *n) / *n;
17510 ldwrkr = *n;
17511 }
17512 }
17513 itau = ir + ldwrkr * *n;
17514 iwork = itau + *n;
17515
17516 /* Compute A=Q*R
17517 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17518
17519 i__3 = *lwork - iwork + 1;
17520 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
17521 , &i__3, &ierr);
17522
17523 /* Copy R to VT, zeroing out below it */
17524
17525 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
17526 ldvt);
17527 i__3 = *n - 1;
17528 i__2 = *n - 1;
17529 slaset_("L", &i__3, &i__2, &c_b416, &c_b416, &vt_ref(2, 1)
17530 , ldvt);
17531
17532 /* Generate Q in A
17533 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17534
17535 i__3 = *lwork - iwork + 1;
17536 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
17537 iwork], &i__3, &ierr);
17538 ie = itau;
17539 itauq = ie + *n;
17540 itaup = itauq + *n;
17541 iwork = itaup + *n;
17542
17543 /* Bidiagonalize R in VT, copying result to WORK(IR)
17544 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
17545
17546 i__3 = *lwork - iwork + 1;
17547 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
17548 work[itauq], &work[itaup], &work[iwork], &i__3, &
17549 ierr);
17550 slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
17551 ldwrkr);
17552
17553 /* Generate left vectors bidiagonalizing R in WORK(IR)
17554 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
17555
17556 i__3 = *lwork - iwork + 1;
17557 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
17558 work[iwork], &i__3, &ierr);
17559
17560 /* Generate right vectors bidiagonalizing R in VT
17561 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */
17562
17563 i__3 = *lwork - iwork + 1;
17564 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup],
17565 &work[iwork], &i__3, &ierr);
17566 iwork = ie + *n;
17567
17568 /* Perform bidiagonal QR iteration, computing left
17569 singular vectors of R in WORK(IR) and computing right
17570 singular vectors of R in VT
17571 (Workspace: need N*N+BDSPAC) */
17572
17573 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
17574 vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1,
17575 &work[iwork], info);
17576 iu = ie + *n;
17577
17578 /* Multiply Q in A by left singular vectors of R in
17579 WORK(IR), storing result in WORK(IU) and copying to A
17580 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
17581
17582 i__3 = *m;
17583 i__2 = ldwrku;
17584 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
17585 i__2) {
17586 /* Computing MIN */
17587 i__4 = *m - i__ + 1;
17588 chunk = f2cmin(i__4,ldwrku);
17589 sgemm_("N", "N", &chunk, n, n, &c_b438, &a_ref(i__, 1)
17590 , lda, &work[ir], &ldwrkr, &c_b416, &work[iu],
17591 &ldwrku);
17592 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a_ref(
17593 i__, 1), lda);
17594 /* L20: */
17595 }
17596
17597 } else {
17598
17599 /* Insufficient workspace for a fast algorithm */
17600
17601 itau = 1;
17602 iwork = itau + *n;
17603
17604 /* Compute A=Q*R
17605 (Workspace: need 2*N, prefer N+N*NB) */
17606
17607 i__2 = *lwork - iwork + 1;
17608 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
17609 , &i__2, &ierr);
17610
17611 /* Copy R to VT, zeroing out below it */
17612
17613 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
17614 ldvt);
17615 i__2 = *n - 1;
17616 i__3 = *n - 1;
17617 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(2, 1)
17618 , ldvt);
17619
17620 /* Generate Q in A
17621 (Workspace: need 2*N, prefer N+N*NB) */
17622
17623 i__2 = *lwork - iwork + 1;
17624 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
17625 iwork], &i__2, &ierr);
17626 ie = itau;
17627 itauq = ie + *n;
17628 itaup = itauq + *n;
17629 iwork = itaup + *n;
17630
17631 /* Bidiagonalize R in VT
17632 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
17633
17634 i__2 = *lwork - iwork + 1;
17635 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
17636 work[itauq], &work[itaup], &work[iwork], &i__2, &
17637 ierr);
17638
17639 /* Multiply Q in A by left vectors bidiagonalizing R
17640 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
17641
17642 i__2 = *lwork - iwork + 1;
17643 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
17644 work[itauq], &a[a_offset], lda, &work[iwork], &
17645 i__2, &ierr);
17646
17647 /* Generate right vectors bidiagonalizing R in VT
17648 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
17649
17650 i__2 = *lwork - iwork + 1;
17651 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup],
17652 &work[iwork], &i__2, &ierr);
17653 iwork = ie + *n;
17654
17655 /* Perform bidiagonal QR iteration, computing left
17656 singular vectors of A in A and computing right
17657 singular vectors of A in VT
17658 (Workspace: need BDSPAC) */
17659
17660 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
17661 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
17662 work[iwork], info);
17663
17664 }
17665
17666 } else if (wntus) {
17667
17668 if (wntvn) {
17669
17670 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
17671 N left singular vectors to be computed in U and
17672 no right singular vectors to be computed
17673
17674 Computing MAX */
17675 i__2 = *n << 2;
17676 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
17677
17678 /* Sufficient workspace for a fast algorithm */
17679
17680 ir = 1;
17681 if (*lwork >= wrkbl + *lda * *n) {
17682
17683 /* WORK(IR) is LDA by N */
17684
17685 ldwrkr = *lda;
17686 } else {
17687
17688 /* WORK(IR) is N by N */
17689
17690 ldwrkr = *n;
17691 }
17692 itau = ir + ldwrkr * *n;
17693 iwork = itau + *n;
17694
17695 /* Compute A=Q*R
17696 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17697
17698 i__2 = *lwork - iwork + 1;
17699 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
17700 iwork], &i__2, &ierr);
17701
17702 /* Copy R to WORK(IR), zeroing out below it */
17703
17704 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
17705 ldwrkr);
17706 i__2 = *n - 1;
17707 i__3 = *n - 1;
17708 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir
17709 + 1], &ldwrkr);
17710
17711 /* Generate Q in A
17712 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
17713
17714 i__2 = *lwork - iwork + 1;
17715 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
17716 work[iwork], &i__2, &ierr);
17717 ie = itau;
17718 itauq = ie + *n;
17719 itaup = itauq + *n;
17720 iwork = itaup + *n;
17721
17722 /* Bidiagonalize R in WORK(IR)
17723 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
17724
17725 i__2 = *lwork - iwork + 1;
17726 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
17727 work[itauq], &work[itaup], &work[iwork], &
17728 i__2, &ierr);
17729
17730 /* Generate left vectors bidiagonalizing R in WORK(IR)
17731 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
17732
17733 i__2 = *lwork - iwork + 1;
17734 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
17735 , &work[iwork], &i__2, &ierr);
17736 iwork = ie + *n;
17737
17738 /* Perform bidiagonal QR iteration, computing left
17739 singular vectors of R in WORK(IR)
17740 (Workspace: need N*N+BDSPAC) */
17741
17742 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie],
17743 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
17744 work[iwork], info);
17745
17746 /* Multiply Q in A by left singular vectors of R in
17747 WORK(IR), storing result in U
17748 (Workspace: need N*N) */
17749
17750 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda,
17751 &work[ir], &ldwrkr, &c_b416, &u[u_offset],
17752 ldu);
17753
17754 } else {
17755
17756 /* Insufficient workspace for a fast algorithm */
17757
17758 itau = 1;
17759 iwork = itau + *n;
17760
17761 /* Compute A=Q*R, copying result to U
17762 (Workspace: need 2*N, prefer N+N*NB) */
17763
17764 i__2 = *lwork - iwork + 1;
17765 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
17766 iwork], &i__2, &ierr);
17767 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
17768 ldu);
17769
17770 /* Generate Q in U
17771 (Workspace: need 2*N, prefer N+N*NB) */
17772
17773 i__2 = *lwork - iwork + 1;
17774 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
17775 work[iwork], &i__2, &ierr);
17776 ie = itau;
17777 itauq = ie + *n;
17778 itaup = itauq + *n;
17779 iwork = itaup + *n;
17780
17781 /* Zero out below R in A */
17782
17783 i__2 = *n - 1;
17784 i__3 = *n - 1;
17785 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
17786 1), lda);
17787
17788 /* Bidiagonalize R in A
17789 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
17790
17791 i__2 = *lwork - iwork + 1;
17792 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
17793 work[itauq], &work[itaup], &work[iwork], &
17794 i__2, &ierr);
17795
17796 /* Multiply Q in U by left vectors bidiagonalizing R
17797 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
17798
17799 i__2 = *lwork - iwork + 1;
17800 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
17801 work[itauq], &u[u_offset], ldu, &work[iwork],
17802 &i__2, &ierr)
17803 ;
17804 iwork = ie + *n;
17805
17806 /* Perform bidiagonal QR iteration, computing left
17807 singular vectors of A in U
17808 (Workspace: need BDSPAC) */
17809
17810 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie],
17811 dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
17812 work[iwork], info);
17813
17814 }
17815
17816 } else if (wntvo) {
17817
17818 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
17819 N left singular vectors to be computed in U and
17820 N right singular vectors to be overwritten on A
17821
17822 Computing MAX */
17823 i__2 = *n << 2;
17824 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
17825
17826 /* Sufficient workspace for a fast algorithm */
17827
17828 iu = 1;
17829 if (*lwork >= wrkbl + (*lda << 1) * *n) {
17830
17831 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
17832
17833 ldwrku = *lda;
17834 ir = iu + ldwrku * *n;
17835 ldwrkr = *lda;
17836 } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
17837
17838 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
17839
17840 ldwrku = *lda;
17841 ir = iu + ldwrku * *n;
17842 ldwrkr = *n;
17843 } else {
17844
17845 /* WORK(IU) is N by N and WORK(IR) is N by N */
17846
17847 ldwrku = *n;
17848 ir = iu + ldwrku * *n;
17849 ldwrkr = *n;
17850 }
17851 itau = ir + ldwrkr * *n;
17852 iwork = itau + *n;
17853
17854 /* Compute A=Q*R
17855 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
17856
17857 i__2 = *lwork - iwork + 1;
17858 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
17859 iwork], &i__2, &ierr);
17860
17861 /* Copy R to WORK(IU), zeroing out below it */
17862
17863 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
17864 ldwrku);
17865 i__2 = *n - 1;
17866 i__3 = *n - 1;
17867 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu
17868 + 1], &ldwrku);
17869
17870 /* Generate Q in A
17871 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
17872
17873 i__2 = *lwork - iwork + 1;
17874 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
17875 work[iwork], &i__2, &ierr);
17876 ie = itau;
17877 itauq = ie + *n;
17878 itaup = itauq + *n;
17879 iwork = itaup + *n;
17880
17881 /* Bidiagonalize R in WORK(IU), copying result to
17882 WORK(IR)
17883 (Workspace: need 2*N*N+4*N,
17884 prefer 2*N*N+3*N+2*N*NB) */
17885
17886 i__2 = *lwork - iwork + 1;
17887 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
17888 work[itauq], &work[itaup], &work[iwork], &
17889 i__2, &ierr);
17890 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
17891 ldwrkr);
17892
17893 /* Generate left bidiagonalizing vectors in WORK(IU)
17894 (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
17895
17896 i__2 = *lwork - iwork + 1;
17897 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
17898 , &work[iwork], &i__2, &ierr);
17899
17900 /* Generate right bidiagonalizing vectors in WORK(IR)
17901 (Workspace: need 2*N*N+4*N-1,
17902 prefer 2*N*N+3*N+(N-1)*NB) */
17903
17904 i__2 = *lwork - iwork + 1;
17905 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
17906 , &work[iwork], &i__2, &ierr);
17907 iwork = ie + *n;
17908
17909 /* Perform bidiagonal QR iteration, computing left
17910 singular vectors of R in WORK(IU) and computing
17911 right singular vectors of R in WORK(IR)
17912 (Workspace: need 2*N*N+BDSPAC) */
17913
17914 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
17915 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1,
17916 &work[iwork], info);
17917
17918 /* Multiply Q in A by left singular vectors of R in
17919 WORK(IU), storing result in U
17920 (Workspace: need N*N) */
17921
17922 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda,
17923 &work[iu], &ldwrku, &c_b416, &u[u_offset],
17924 ldu);
17925
17926 /* Copy right singular vectors of R to A
17927 (Workspace: need N*N) */
17928
17929 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset],
17930 lda);
17931
17932 } else {
17933
17934 /* Insufficient workspace for a fast algorithm */
17935
17936 itau = 1;
17937 iwork = itau + *n;
17938
17939 /* Compute A=Q*R, copying result to U
17940 (Workspace: need 2*N, prefer N+N*NB) */
17941
17942 i__2 = *lwork - iwork + 1;
17943 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
17944 iwork], &i__2, &ierr);
17945 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
17946 ldu);
17947
17948 /* Generate Q in U
17949 (Workspace: need 2*N, prefer N+N*NB) */
17950
17951 i__2 = *lwork - iwork + 1;
17952 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
17953 work[iwork], &i__2, &ierr);
17954 ie = itau;
17955 itauq = ie + *n;
17956 itaup = itauq + *n;
17957 iwork = itaup + *n;
17958
17959 /* Zero out below R in A */
17960
17961 i__2 = *n - 1;
17962 i__3 = *n - 1;
17963 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
17964 1), lda);
17965
17966 /* Bidiagonalize R in A
17967 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
17968
17969 i__2 = *lwork - iwork + 1;
17970 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
17971 work[itauq], &work[itaup], &work[iwork], &
17972 i__2, &ierr);
17973
17974 /* Multiply Q in U by left vectors bidiagonalizing R
17975 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
17976
17977 i__2 = *lwork - iwork + 1;
17978 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
17979 work[itauq], &u[u_offset], ldu, &work[iwork],
17980 &i__2, &ierr)
17981 ;
17982
17983 /* Generate right vectors bidiagonalizing R in A
17984 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
17985
17986 i__2 = *lwork - iwork + 1;
17987 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
17988 &work[iwork], &i__2, &ierr);
17989 iwork = ie + *n;
17990
17991 /* Perform bidiagonal QR iteration, computing left
17992 singular vectors of A in U and computing right
17993 singular vectors of A in A
17994 (Workspace: need BDSPAC) */
17995
17996 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
17997 a_offset], lda, &u[u_offset], ldu, dum, &c__1,
17998 &work[iwork], info);
17999
18000 }
18001
18002 } else if (wntvas) {
18003
18004 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
18005 or 'A')
18006 N left singular vectors to be computed in U and
18007 N right singular vectors to be computed in VT
18008
18009 Computing MAX */
18010 i__2 = *n << 2;
18011 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
18012
18013 /* Sufficient workspace for a fast algorithm */
18014
18015 iu = 1;
18016 if (*lwork >= wrkbl + *lda * *n) {
18017
18018 /* WORK(IU) is LDA by N */
18019
18020 ldwrku = *lda;
18021 } else {
18022
18023 /* WORK(IU) is N by N */
18024
18025 ldwrku = *n;
18026 }
18027 itau = iu + ldwrku * *n;
18028 iwork = itau + *n;
18029
18030 /* Compute A=Q*R
18031 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
18032
18033 i__2 = *lwork - iwork + 1;
18034 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18035 iwork], &i__2, &ierr);
18036
18037 /* Copy R to WORK(IU), zeroing out below it */
18038
18039 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
18040 ldwrku);
18041 i__2 = *n - 1;
18042 i__3 = *n - 1;
18043 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu
18044 + 1], &ldwrku);
18045
18046 /* Generate Q in A
18047 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
18048
18049 i__2 = *lwork - iwork + 1;
18050 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
18051 work[iwork], &i__2, &ierr);
18052 ie = itau;
18053 itauq = ie + *n;
18054 itaup = itauq + *n;
18055 iwork = itaup + *n;
18056
18057 /* Bidiagonalize R in WORK(IU), copying result to VT
18058 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
18059
18060 i__2 = *lwork - iwork + 1;
18061 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
18062 work[itauq], &work[itaup], &work[iwork], &
18063 i__2, &ierr);
18064 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
18065 ldvt);
18066
18067 /* Generate left bidiagonalizing vectors in WORK(IU)
18068 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
18069
18070 i__2 = *lwork - iwork + 1;
18071 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
18072 , &work[iwork], &i__2, &ierr);
18073
18074 /* Generate right bidiagonalizing vectors in VT
18075 (Workspace: need N*N+4*N-1,
18076 prefer N*N+3*N+(N-1)*NB) */
18077
18078 i__2 = *lwork - iwork + 1;
18079 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
18080 itaup], &work[iwork], &i__2, &ierr)
18081 ;
18082 iwork = ie + *n;
18083
18084 /* Perform bidiagonal QR iteration, computing left
18085 singular vectors of R in WORK(IU) and computing
18086 right singular vectors of R in VT
18087 (Workspace: need N*N+BDSPAC) */
18088
18089 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
18090 vt_offset], ldvt, &work[iu], &ldwrku, dum, &
18091 c__1, &work[iwork], info);
18092
18093 /* Multiply Q in A by left singular vectors of R in
18094 WORK(IU), storing result in U
18095 (Workspace: need N*N) */
18096
18097 sgemm_("N", "N", m, n, n, &c_b438, &a[a_offset], lda,
18098 &work[iu], &ldwrku, &c_b416, &u[u_offset],
18099 ldu);
18100
18101 } else {
18102
18103 /* Insufficient workspace for a fast algorithm */
18104
18105 itau = 1;
18106 iwork = itau + *n;
18107
18108 /* Compute A=Q*R, copying result to U
18109 (Workspace: need 2*N, prefer N+N*NB) */
18110
18111 i__2 = *lwork - iwork + 1;
18112 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18113 iwork], &i__2, &ierr);
18114 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18115 ldu);
18116
18117 /* Generate Q in U
18118 (Workspace: need 2*N, prefer N+N*NB) */
18119
18120 i__2 = *lwork - iwork + 1;
18121 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
18122 work[iwork], &i__2, &ierr);
18123
18124 /* Copy R to VT, zeroing out below it */
18125
18126 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
18127 ldvt);
18128 i__2 = *n - 1;
18129 i__3 = *n - 1;
18130 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(
18131 2, 1), ldvt);
18132 ie = itau;
18133 itauq = ie + *n;
18134 itaup = itauq + *n;
18135 iwork = itaup + *n;
18136
18137 /* Bidiagonalize R in VT
18138 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
18139
18140 i__2 = *lwork - iwork + 1;
18141 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie],
18142 &work[itauq], &work[itaup], &work[iwork], &
18143 i__2, &ierr);
18144
18145 /* Multiply Q in U by left bidiagonalizing vectors
18146 in VT
18147 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
18148
18149 i__2 = *lwork - iwork + 1;
18150 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt,
18151 &work[itauq], &u[u_offset], ldu, &work[iwork],
18152 &i__2, &ierr);
18153
18154 /* Generate right bidiagonalizing vectors in VT
18155 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
18156
18157 i__2 = *lwork - iwork + 1;
18158 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
18159 itaup], &work[iwork], &i__2, &ierr)
18160 ;
18161 iwork = ie + *n;
18162
18163 /* Perform bidiagonal QR iteration, computing left
18164 singular vectors of A in U and computing right
18165 singular vectors of A in VT
18166 (Workspace: need BDSPAC) */
18167
18168 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
18169 vt_offset], ldvt, &u[u_offset], ldu, dum, &
18170 c__1, &work[iwork], info);
18171
18172 }
18173
18174 }
18175
18176 } else if (wntua) {
18177
18178 if (wntvn) {
18179
18180 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
18181 M left singular vectors to be computed in U and
18182 no right singular vectors to be computed
18183
18184 Computing MAX */
18185 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
18186 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
18187
18188 /* Sufficient workspace for a fast algorithm */
18189
18190 ir = 1;
18191 if (*lwork >= wrkbl + *lda * *n) {
18192
18193 /* WORK(IR) is LDA by N */
18194
18195 ldwrkr = *lda;
18196 } else {
18197
18198 /* WORK(IR) is N by N */
18199
18200 ldwrkr = *n;
18201 }
18202 itau = ir + ldwrkr * *n;
18203 iwork = itau + *n;
18204
18205 /* Compute A=Q*R, copying result to U
18206 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
18207
18208 i__2 = *lwork - iwork + 1;
18209 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18210 iwork], &i__2, &ierr);
18211 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18212 ldu);
18213
18214 /* Copy R to WORK(IR), zeroing out below it */
18215
18216 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
18217 ldwrkr);
18218 i__2 = *n - 1;
18219 i__3 = *n - 1;
18220 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[ir
18221 + 1], &ldwrkr);
18222
18223 /* Generate Q in U
18224 (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
18225
18226 i__2 = *lwork - iwork + 1;
18227 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18228 work[iwork], &i__2, &ierr);
18229 ie = itau;
18230 itauq = ie + *n;
18231 itaup = itauq + *n;
18232 iwork = itaup + *n;
18233
18234 /* Bidiagonalize R in WORK(IR)
18235 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
18236
18237 i__2 = *lwork - iwork + 1;
18238 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
18239 work[itauq], &work[itaup], &work[iwork], &
18240 i__2, &ierr);
18241
18242 /* Generate left bidiagonalizing vectors in WORK(IR)
18243 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
18244
18245 i__2 = *lwork - iwork + 1;
18246 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
18247 , &work[iwork], &i__2, &ierr);
18248 iwork = ie + *n;
18249
18250 /* Perform bidiagonal QR iteration, computing left
18251 singular vectors of R in WORK(IR)
18252 (Workspace: need N*N+BDSPAC) */
18253
18254 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie],
18255 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
18256 work[iwork], info);
18257
18258 /* Multiply Q in U by left singular vectors of R in
18259 WORK(IR), storing result in A
18260 (Workspace: need N*N) */
18261
18262 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu,
18263 &work[ir], &ldwrkr, &c_b416, &a[a_offset],
18264 lda);
18265
18266 /* Copy left singular vectors of A from A to U */
18267
18268 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
18269 ldu);
18270
18271 } else {
18272
18273 /* Insufficient workspace for a fast algorithm */
18274
18275 itau = 1;
18276 iwork = itau + *n;
18277
18278 /* Compute A=Q*R, copying result to U
18279 (Workspace: need 2*N, prefer N+N*NB) */
18280
18281 i__2 = *lwork - iwork + 1;
18282 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18283 iwork], &i__2, &ierr);
18284 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18285 ldu);
18286
18287 /* Generate Q in U
18288 (Workspace: need N+M, prefer N+M*NB) */
18289
18290 i__2 = *lwork - iwork + 1;
18291 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18292 work[iwork], &i__2, &ierr);
18293 ie = itau;
18294 itauq = ie + *n;
18295 itaup = itauq + *n;
18296 iwork = itaup + *n;
18297
18298 /* Zero out below R in A */
18299
18300 i__2 = *n - 1;
18301 i__3 = *n - 1;
18302 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
18303 1), lda);
18304
18305 /* Bidiagonalize R in A
18306 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
18307
18308 i__2 = *lwork - iwork + 1;
18309 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
18310 work[itauq], &work[itaup], &work[iwork], &
18311 i__2, &ierr);
18312
18313 /* Multiply Q in U by left bidiagonalizing vectors
18314 in A
18315 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
18316
18317 i__2 = *lwork - iwork + 1;
18318 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
18319 work[itauq], &u[u_offset], ldu, &work[iwork],
18320 &i__2, &ierr)
18321 ;
18322 iwork = ie + *n;
18323
18324 /* Perform bidiagonal QR iteration, computing left
18325 singular vectors of A in U
18326 (Workspace: need BDSPAC) */
18327
18328 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie],
18329 dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
18330 work[iwork], info);
18331
18332 }
18333
18334 } else if (wntvo) {
18335
18336 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
18337 M left singular vectors to be computed in U and
18338 N right singular vectors to be overwritten on A
18339
18340 Computing MAX */
18341 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
18342 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
18343
18344 /* Sufficient workspace for a fast algorithm */
18345
18346 iu = 1;
18347 if (*lwork >= wrkbl + (*lda << 1) * *n) {
18348
18349 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
18350
18351 ldwrku = *lda;
18352 ir = iu + ldwrku * *n;
18353 ldwrkr = *lda;
18354 } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
18355
18356 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
18357
18358 ldwrku = *lda;
18359 ir = iu + ldwrku * *n;
18360 ldwrkr = *n;
18361 } else {
18362
18363 /* WORK(IU) is N by N and WORK(IR) is N by N */
18364
18365 ldwrku = *n;
18366 ir = iu + ldwrku * *n;
18367 ldwrkr = *n;
18368 }
18369 itau = ir + ldwrkr * *n;
18370 iwork = itau + *n;
18371
18372 /* Compute A=Q*R, copying result to U
18373 (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
18374
18375 i__2 = *lwork - iwork + 1;
18376 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18377 iwork], &i__2, &ierr);
18378 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18379 ldu);
18380
18381 /* Generate Q in U
18382 (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */
18383
18384 i__2 = *lwork - iwork + 1;
18385 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18386 work[iwork], &i__2, &ierr);
18387
18388 /* Copy R to WORK(IU), zeroing out below it */
18389
18390 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
18391 ldwrku);
18392 i__2 = *n - 1;
18393 i__3 = *n - 1;
18394 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu
18395 + 1], &ldwrku);
18396 ie = itau;
18397 itauq = ie + *n;
18398 itaup = itauq + *n;
18399 iwork = itaup + *n;
18400
18401 /* Bidiagonalize R in WORK(IU), copying result to
18402 WORK(IR)
18403 (Workspace: need 2*N*N+4*N,
18404 prefer 2*N*N+3*N+2*N*NB) */
18405
18406 i__2 = *lwork - iwork + 1;
18407 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
18408 work[itauq], &work[itaup], &work[iwork], &
18409 i__2, &ierr);
18410 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
18411 ldwrkr);
18412
18413 /* Generate left bidiagonalizing vectors in WORK(IU)
18414 (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
18415
18416 i__2 = *lwork - iwork + 1;
18417 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
18418 , &work[iwork], &i__2, &ierr);
18419
18420 /* Generate right bidiagonalizing vectors in WORK(IR)
18421 (Workspace: need 2*N*N+4*N-1,
18422 prefer 2*N*N+3*N+(N-1)*NB) */
18423
18424 i__2 = *lwork - iwork + 1;
18425 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
18426 , &work[iwork], &i__2, &ierr);
18427 iwork = ie + *n;
18428
18429 /* Perform bidiagonal QR iteration, computing left
18430 singular vectors of R in WORK(IU) and computing
18431 right singular vectors of R in WORK(IR)
18432 (Workspace: need 2*N*N+BDSPAC) */
18433
18434 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
18435 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1,
18436 &work[iwork], info);
18437
18438 /* Multiply Q in U by left singular vectors of R in
18439 WORK(IU), storing result in A
18440 (Workspace: need N*N) */
18441
18442 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu,
18443 &work[iu], &ldwrku, &c_b416, &a[a_offset],
18444 lda);
18445
18446 /* Copy left singular vectors of A from A to U */
18447
18448 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
18449 ldu);
18450
18451 /* Copy right singular vectors of R from WORK(IR) to A */
18452
18453 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset],
18454 lda);
18455
18456 } else {
18457
18458 /* Insufficient workspace for a fast algorithm */
18459
18460 itau = 1;
18461 iwork = itau + *n;
18462
18463 /* Compute A=Q*R, copying result to U
18464 (Workspace: need 2*N, prefer N+N*NB) */
18465
18466 i__2 = *lwork - iwork + 1;
18467 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18468 iwork], &i__2, &ierr);
18469 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18470 ldu);
18471
18472 /* Generate Q in U
18473 (Workspace: need N+M, prefer N+M*NB) */
18474
18475 i__2 = *lwork - iwork + 1;
18476 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18477 work[iwork], &i__2, &ierr);
18478 ie = itau;
18479 itauq = ie + *n;
18480 itaup = itauq + *n;
18481 iwork = itaup + *n;
18482
18483 /* Zero out below R in A */
18484
18485 i__2 = *n - 1;
18486 i__3 = *n - 1;
18487 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &a_ref(2,
18488 1), lda);
18489
18490 /* Bidiagonalize R in A
18491 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
18492
18493 i__2 = *lwork - iwork + 1;
18494 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
18495 work[itauq], &work[itaup], &work[iwork], &
18496 i__2, &ierr);
18497
18498 /* Multiply Q in U by left bidiagonalizing vectors
18499 in A
18500 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
18501
18502 i__2 = *lwork - iwork + 1;
18503 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
18504 work[itauq], &u[u_offset], ldu, &work[iwork],
18505 &i__2, &ierr)
18506 ;
18507
18508 /* Generate right bidiagonalizing vectors in A
18509 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
18510
18511 i__2 = *lwork - iwork + 1;
18512 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
18513 &work[iwork], &i__2, &ierr);
18514 iwork = ie + *n;
18515
18516 /* Perform bidiagonal QR iteration, computing left
18517 singular vectors of A in U and computing right
18518 singular vectors of A in A
18519 (Workspace: need BDSPAC) */
18520
18521 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
18522 a_offset], lda, &u[u_offset], ldu, dum, &c__1,
18523 &work[iwork], info);
18524
18525 }
18526
18527 } else if (wntvas) {
18528
18529 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
18530 or 'A')
18531 M left singular vectors to be computed in U and
18532 N right singular vectors to be computed in VT
18533
18534 Computing MAX */
18535 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
18536 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
18537
18538 /* Sufficient workspace for a fast algorithm */
18539
18540 iu = 1;
18541 if (*lwork >= wrkbl + *lda * *n) {
18542
18543 /* WORK(IU) is LDA by N */
18544
18545 ldwrku = *lda;
18546 } else {
18547
18548 /* WORK(IU) is N by N */
18549
18550 ldwrku = *n;
18551 }
18552 itau = iu + ldwrku * *n;
18553 iwork = itau + *n;
18554
18555 /* Compute A=Q*R, copying result to U
18556 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
18557
18558 i__2 = *lwork - iwork + 1;
18559 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18560 iwork], &i__2, &ierr);
18561 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18562 ldu);
18563
18564 /* Generate Q in U
18565 (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
18566
18567 i__2 = *lwork - iwork + 1;
18568 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18569 work[iwork], &i__2, &ierr);
18570
18571 /* Copy R to WORK(IU), zeroing out below it */
18572
18573 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
18574 ldwrku);
18575 i__2 = *n - 1;
18576 i__3 = *n - 1;
18577 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &work[iu
18578 + 1], &ldwrku);
18579 ie = itau;
18580 itauq = ie + *n;
18581 itaup = itauq + *n;
18582 iwork = itaup + *n;
18583
18584 /* Bidiagonalize R in WORK(IU), copying result to VT
18585 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
18586
18587 i__2 = *lwork - iwork + 1;
18588 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
18589 work[itauq], &work[itaup], &work[iwork], &
18590 i__2, &ierr);
18591 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
18592 ldvt);
18593
18594 /* Generate left bidiagonalizing vectors in WORK(IU)
18595 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
18596
18597 i__2 = *lwork - iwork + 1;
18598 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
18599 , &work[iwork], &i__2, &ierr);
18600
18601 /* Generate right bidiagonalizing vectors in VT
18602 (Workspace: need N*N+4*N-1,
18603 prefer N*N+3*N+(N-1)*NB) */
18604
18605 i__2 = *lwork - iwork + 1;
18606 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
18607 itaup], &work[iwork], &i__2, &ierr)
18608 ;
18609 iwork = ie + *n;
18610
18611 /* Perform bidiagonal QR iteration, computing left
18612 singular vectors of R in WORK(IU) and computing
18613 right singular vectors of R in VT
18614 (Workspace: need N*N+BDSPAC) */
18615
18616 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
18617 vt_offset], ldvt, &work[iu], &ldwrku, dum, &
18618 c__1, &work[iwork], info);
18619
18620 /* Multiply Q in U by left singular vectors of R in
18621 WORK(IU), storing result in A
18622 (Workspace: need N*N) */
18623
18624 sgemm_("N", "N", m, n, n, &c_b438, &u[u_offset], ldu,
18625 &work[iu], &ldwrku, &c_b416, &a[a_offset],
18626 lda);
18627
18628 /* Copy left singular vectors of A from A to U */
18629
18630 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
18631 ldu);
18632
18633 } else {
18634
18635 /* Insufficient workspace for a fast algorithm */
18636
18637 itau = 1;
18638 iwork = itau + *n;
18639
18640 /* Compute A=Q*R, copying result to U
18641 (Workspace: need 2*N, prefer N+N*NB) */
18642
18643 i__2 = *lwork - iwork + 1;
18644 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
18645 iwork], &i__2, &ierr);
18646 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
18647 ldu);
18648
18649 /* Generate Q in U
18650 (Workspace: need N+M, prefer N+M*NB) */
18651
18652 i__2 = *lwork - iwork + 1;
18653 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
18654 work[iwork], &i__2, &ierr);
18655
18656 /* Copy R from A to VT, zeroing out below it */
18657
18658 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
18659 ldvt);
18660 i__2 = *n - 1;
18661 i__3 = *n - 1;
18662 slaset_("L", &i__2, &i__3, &c_b416, &c_b416, &vt_ref(
18663 2, 1), ldvt);
18664 ie = itau;
18665 itauq = ie + *n;
18666 itaup = itauq + *n;
18667 iwork = itaup + *n;
18668
18669 /* Bidiagonalize R in VT
18670 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
18671
18672 i__2 = *lwork - iwork + 1;
18673 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie],
18674 &work[itauq], &work[itaup], &work[iwork], &
18675 i__2, &ierr);
18676
18677 /* Multiply Q in U by left bidiagonalizing vectors
18678 in VT
18679 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
18680
18681 i__2 = *lwork - iwork + 1;
18682 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt,
18683 &work[itauq], &u[u_offset], ldu, &work[iwork],
18684 &i__2, &ierr);
18685
18686 /* Generate right bidiagonalizing vectors in VT
18687 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
18688
18689 i__2 = *lwork - iwork + 1;
18690 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
18691 itaup], &work[iwork], &i__2, &ierr)
18692 ;
18693 iwork = ie + *n;
18694
18695 /* Perform bidiagonal QR iteration, computing left
18696 singular vectors of A in U and computing right
18697 singular vectors of A in VT
18698 (Workspace: need BDSPAC) */
18699
18700 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
18701 vt_offset], ldvt, &u[u_offset], ldu, dum, &
18702 c__1, &work[iwork], info);
18703
18704 }
18705
18706 }
18707
18708 }
18709
18710 } else {
18711
18712 /* M .LT. MNTHR
18713
18714 Path 10 (M at least N, but not much larger)
18715 Reduce to bidiagonal form without QR decomposition */
18716
18717 ie = 1;
18718 itauq = ie + *n;
18719 itaup = itauq + *n;
18720 iwork = itaup + *n;
18721
18722 /* Bidiagonalize A
18723 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
18724
18725 i__2 = *lwork - iwork + 1;
18726 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
18727 work[itaup], &work[iwork], &i__2, &ierr);
18728 if (wntuas) {
18729
18730 /* If left singular vectors desired in U, copy result to U
18731 and generate left bidiagonalizing vectors in U
18732 (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */
18733
18734 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
18735 if (wntus) {
18736 ncu = *n;
18737 }
18738 if (wntua) {
18739 ncu = *m;
18740 }
18741 i__2 = *lwork - iwork + 1;
18742 sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
18743 work[iwork], &i__2, &ierr);
18744 }
18745 if (wntvas) {
18746
18747 /* If right singular vectors desired in VT, copy result to
18748 VT and generate right bidiagonalizing vectors in VT
18749 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
18750
18751 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
18752 i__2 = *lwork - iwork + 1;
18753 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
18754 work[iwork], &i__2, &ierr);
18755 }
18756 if (wntuo) {
18757
18758 /* If left singular vectors desired in A, generate left
18759 bidiagonalizing vectors in A
18760 (Workspace: need 4*N, prefer 3*N+N*NB) */
18761
18762 i__2 = *lwork - iwork + 1;
18763 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
18764 iwork], &i__2, &ierr);
18765 }
18766 if (wntvo) {
18767
18768 /* If right singular vectors desired in A, generate right
18769 bidiagonalizing vectors in A
18770 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
18771
18772 i__2 = *lwork - iwork + 1;
18773 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
18774 iwork], &i__2, &ierr);
18775 }
18776 iwork = ie + *n;
18777 if (wntuas || wntuo) {
18778 nru = *m;
18779 }
18780 if (wntun) {
18781 nru = 0;
18782 }
18783 if (wntvas || wntvo) {
18784 ncvt = *n;
18785 }
18786 if (wntvn) {
18787 ncvt = 0;
18788 }
18789 if (! wntuo && ! wntvo) {
18790
18791 /* Perform bidiagonal QR iteration, if desired, computing
18792 left singular vectors in U and computing right singular
18793 vectors in VT
18794 (Workspace: need BDSPAC) */
18795
18796 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
18797 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
18798 work[iwork], info);
18799 } else if (! wntuo && wntvo) {
18800
18801 /* Perform bidiagonal QR iteration, if desired, computing
18802 left singular vectors in U and computing right singular
18803 vectors in A
18804 (Workspace: need BDSPAC) */
18805
18806 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
18807 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
18808 iwork], info);
18809 } else {
18810
18811 /* Perform bidiagonal QR iteration, if desired, computing
18812 left singular vectors in A and computing right singular
18813 vectors in VT
18814 (Workspace: need BDSPAC) */
18815
18816 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
18817 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
18818 work[iwork], info);
18819 }
18820
18821 }
18822
18823 } else {
18824
18825 /* A has more columns than rows. If A has sufficiently more
18826 columns than rows, first reduce using the LQ decomposition (if
18827 sufficient workspace available) */
18828
18829 if (*n >= mnthr) {
18830
18831 if (wntvn) {
18832
18833 /* Path 1t(N much larger than M, JOBVT='N')
18834 No right singular vectors to be computed */
18835
18836 itau = 1;
18837 iwork = itau + *m;
18838
18839 /* Compute A=L*Q
18840 (Workspace: need 2*M, prefer M+M*NB) */
18841
18842 i__2 = *lwork - iwork + 1;
18843 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
18844 i__2, &ierr);
18845
18846 /* Zero out above L */
18847
18848 i__2 = *m - 1;
18849 i__3 = *m - 1;
18850 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1, 2),
18851 lda);
18852 ie = 1;
18853 itauq = ie + *m;
18854 itaup = itauq + *m;
18855 iwork = itaup + *m;
18856
18857 /* Bidiagonalize L in A
18858 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
18859
18860 i__2 = *lwork - iwork + 1;
18861 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
18862 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
18863 if (wntuo || wntuas) {
18864
18865 /* If left singular vectors desired, generate Q
18866 (Workspace: need 4*M, prefer 3*M+M*NB) */
18867
18868 i__2 = *lwork - iwork + 1;
18869 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
18870 work[iwork], &i__2, &ierr);
18871 }
18872 iwork = ie + *m;
18873 nru = 0;
18874 if (wntuo || wntuas) {
18875 nru = *m;
18876 }
18877
18878 /* Perform bidiagonal QR iteration, computing left singular
18879 vectors of A in A if desired
18880 (Workspace: need BDSPAC) */
18881
18882 sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
18883 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
18884 info);
18885
18886 /* If left singular vectors desired in U, copy them there */
18887
18888 if (wntuas) {
18889 slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
18890 }
18891
18892 } else if (wntvo && wntun) {
18893
18894 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
18895 M right singular vectors to be overwritten on A and
18896 no left singular vectors to be computed
18897
18898 Computing MAX */
18899 i__2 = *m << 2;
18900 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
18901
18902 /* Sufficient workspace for a fast algorithm */
18903
18904 ir = 1;
18905 /* Computing MAX */
18906 i__2 = wrkbl, i__3 = *lda * *n + *m;
18907 if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
18908
18909 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */
18910
18911 ldwrku = *lda;
18912 chunk = *n;
18913 ldwrkr = *lda;
18914 } else /* if(complicated condition) */ {
18915 /* Computing MAX */
18916 i__2 = wrkbl, i__3 = *lda * *n + *m;
18917 if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
18918
18919 /* WORK(IU) is LDA by N and WORK(IR) is M by M */
18920
18921 ldwrku = *lda;
18922 chunk = *n;
18923 ldwrkr = *m;
18924 } else {
18925
18926 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */
18927
18928 ldwrku = *m;
18929 chunk = (*lwork - *m * *m - *m) / *m;
18930 ldwrkr = *m;
18931 }
18932 }
18933 itau = ir + ldwrkr * *m;
18934 iwork = itau + *m;
18935
18936 /* Compute A=L*Q
18937 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
18938
18939 i__2 = *lwork - iwork + 1;
18940 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
18941 , &i__2, &ierr);
18942
18943 /* Copy L to WORK(IR) and zero out above it */
18944
18945 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
18946 i__2 = *m - 1;
18947 i__3 = *m - 1;
18948 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir +
18949 ldwrkr], &ldwrkr);
18950
18951 /* Generate Q in A
18952 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
18953
18954 i__2 = *lwork - iwork + 1;
18955 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
18956 iwork], &i__2, &ierr);
18957 ie = itau;
18958 itauq = ie + *m;
18959 itaup = itauq + *m;
18960 iwork = itaup + *m;
18961
18962 /* Bidiagonalize L in WORK(IR)
18963 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
18964
18965 i__2 = *lwork - iwork + 1;
18966 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
18967 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
18968
18969 /* Generate right vectors bidiagonalizing L
18970 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
18971
18972 i__2 = *lwork - iwork + 1;
18973 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
18974 work[iwork], &i__2, &ierr);
18975 iwork = ie + *m;
18976
18977 /* Perform bidiagonal QR iteration, computing right
18978 singular vectors of L in WORK(IR)
18979 (Workspace: need M*M+BDSPAC) */
18980
18981 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
18982 ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
18983 , info);
18984 iu = ie + *m;
18985
18986 /* Multiply right singular vectors of L in WORK(IR) by Q
18987 in A, storing result in WORK(IU) and copying to A
18988 (Workspace: need M*M+2*M, prefer M*M+M*N+M) */
18989
18990 i__2 = *n;
18991 i__3 = chunk;
18992 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
18993 i__3) {
18994 /* Computing MIN */
18995 i__4 = *n - i__ + 1;
18996 blk = f2cmin(i__4,chunk);
18997 sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], &
18998 ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[
18999 iu], &ldwrku);
19000 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1,
19001 i__), lda);
19002 /* L30: */
19003 }
19004
19005 } else {
19006
19007 /* Insufficient workspace for a fast algorithm */
19008
19009 ie = 1;
19010 itauq = ie + *m;
19011 itaup = itauq + *m;
19012 iwork = itaup + *m;
19013
19014 /* Bidiagonalize A
19015 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
19016
19017 i__3 = *lwork - iwork + 1;
19018 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
19019 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
19020
19021 /* Generate right vectors bidiagonalizing A
19022 (Workspace: need 4*M, prefer 3*M+M*NB) */
19023
19024 i__3 = *lwork - iwork + 1;
19025 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
19026 work[iwork], &i__3, &ierr);
19027 iwork = ie + *m;
19028
19029 /* Perform bidiagonal QR iteration, computing right
19030 singular vectors of A in A
19031 (Workspace: need BDSPAC) */
19032
19033 sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
19034 a_offset], lda, dum, &c__1, dum, &c__1, &work[
19035 iwork], info);
19036
19037 }
19038
19039 } else if (wntvo && wntuas) {
19040
19041 /* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
19042 M right singular vectors to be overwritten on A and
19043 M left singular vectors to be computed in U
19044
19045 Computing MAX */
19046 i__3 = *m << 2;
19047 if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {
19048
19049 /* Sufficient workspace for a fast algorithm */
19050
19051 ir = 1;
19052 /* Computing MAX */
19053 i__3 = wrkbl, i__2 = *lda * *n + *m;
19054 if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
19055
19056 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */
19057
19058 ldwrku = *lda;
19059 chunk = *n;
19060 ldwrkr = *lda;
19061 } else /* if(complicated condition) */ {
19062 /* Computing MAX */
19063 i__3 = wrkbl, i__2 = *lda * *n + *m;
19064 if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
19065
19066 /* WORK(IU) is LDA by N and WORK(IR) is M by M */
19067
19068 ldwrku = *lda;
19069 chunk = *n;
19070 ldwrkr = *m;
19071 } else {
19072
19073 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */
19074
19075 ldwrku = *m;
19076 chunk = (*lwork - *m * *m - *m) / *m;
19077 ldwrkr = *m;
19078 }
19079 }
19080 itau = ir + ldwrkr * *m;
19081 iwork = itau + *m;
19082
19083 /* Compute A=L*Q
19084 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19085
19086 i__3 = *lwork - iwork + 1;
19087 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
19088 , &i__3, &ierr);
19089
19090 /* Copy L to U, zeroing about above it */
19091
19092 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
19093 i__3 = *m - 1;
19094 i__2 = *m - 1;
19095 slaset_("U", &i__3, &i__2, &c_b416, &c_b416, &u_ref(1, 2),
19096 ldu);
19097
19098 /* Generate Q in A
19099 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19100
19101 i__3 = *lwork - iwork + 1;
19102 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
19103 iwork], &i__3, &ierr);
19104 ie = itau;
19105 itauq = ie + *m;
19106 itaup = itauq + *m;
19107 iwork = itaup + *m;
19108
19109 /* Bidiagonalize L in U, copying result to WORK(IR)
19110 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
19111
19112 i__3 = *lwork - iwork + 1;
19113 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
19114 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
19115 slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
19116
19117 /* Generate right vectors bidiagonalizing L in WORK(IR)
19118 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
19119
19120 i__3 = *lwork - iwork + 1;
19121 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
19122 work[iwork], &i__3, &ierr);
19123
19124 /* Generate left vectors bidiagonalizing L in U
19125 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
19126
19127 i__3 = *lwork - iwork + 1;
19128 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
19129 work[iwork], &i__3, &ierr);
19130 iwork = ie + *m;
19131
19132 /* Perform bidiagonal QR iteration, computing left
19133 singular vectors of L in U, and computing right
19134 singular vectors of L in WORK(IR)
19135 (Workspace: need M*M+BDSPAC) */
19136
19137 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir],
19138 &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
19139 iwork], info);
19140 iu = ie + *m;
19141
19142 /* Multiply right singular vectors of L in WORK(IR) by Q
19143 in A, storing result in WORK(IU) and copying to A
19144 (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */
19145
19146 i__3 = *n;
19147 i__2 = chunk;
19148 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
19149 i__2) {
19150 /* Computing MIN */
19151 i__4 = *n - i__ + 1;
19152 blk = f2cmin(i__4,chunk);
19153 sgemm_("N", "N", m, &blk, m, &c_b438, &work[ir], &
19154 ldwrkr, &a_ref(1, i__), lda, &c_b416, &work[
19155 iu], &ldwrku);
19156 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a_ref(1,
19157 i__), lda);
19158 /* L40: */
19159 }
19160
19161 } else {
19162
19163 /* Insufficient workspace for a fast algorithm */
19164
19165 itau = 1;
19166 iwork = itau + *m;
19167
19168 /* Compute A=L*Q
19169 (Workspace: need 2*M, prefer M+M*NB) */
19170
19171 i__2 = *lwork - iwork + 1;
19172 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
19173 , &i__2, &ierr);
19174
19175 /* Copy L to U, zeroing out above it */
19176
19177 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
19178 i__2 = *m - 1;
19179 i__3 = *m - 1;
19180 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1, 2),
19181 ldu);
19182
19183 /* Generate Q in A
19184 (Workspace: need 2*M, prefer M+M*NB) */
19185
19186 i__2 = *lwork - iwork + 1;
19187 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
19188 iwork], &i__2, &ierr);
19189 ie = itau;
19190 itauq = ie + *m;
19191 itaup = itauq + *m;
19192 iwork = itaup + *m;
19193
19194 /* Bidiagonalize L in U
19195 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
19196
19197 i__2 = *lwork - iwork + 1;
19198 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
19199 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
19200
19201 /* Multiply right vectors bidiagonalizing L by Q in A
19202 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
19203
19204 i__2 = *lwork - iwork + 1;
19205 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
19206 itaup], &a[a_offset], lda, &work[iwork], &i__2, &
19207 ierr);
19208
19209 /* Generate left vectors bidiagonalizing L in U
19210 (Workspace: need 4*M, prefer 3*M+M*NB) */
19211
19212 i__2 = *lwork - iwork + 1;
19213 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
19214 work[iwork], &i__2, &ierr);
19215 iwork = ie + *m;
19216
19217 /* Perform bidiagonal QR iteration, computing left
19218 singular vectors of A in U and computing right
19219 singular vectors of A in A
19220 (Workspace: need BDSPAC) */
19221
19222 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
19223 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
19224 work[iwork], info);
19225
19226 }
19227
19228 } else if (wntvs) {
19229
19230 if (wntun) {
19231
19232 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
19233 M right singular vectors to be computed in VT and
19234 no left singular vectors to be computed
19235
19236 Computing MAX */
19237 i__2 = *m << 2;
19238 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
19239
19240 /* Sufficient workspace for a fast algorithm */
19241
19242 ir = 1;
19243 if (*lwork >= wrkbl + *lda * *m) {
19244
19245 /* WORK(IR) is LDA by M */
19246
19247 ldwrkr = *lda;
19248 } else {
19249
19250 /* WORK(IR) is M by M */
19251
19252 ldwrkr = *m;
19253 }
19254 itau = ir + ldwrkr * *m;
19255 iwork = itau + *m;
19256
19257 /* Compute A=L*Q
19258 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19259
19260 i__2 = *lwork - iwork + 1;
19261 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19262 iwork], &i__2, &ierr);
19263
19264 /* Copy L to WORK(IR), zeroing out above it */
19265
19266 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
19267 ldwrkr);
19268 i__2 = *m - 1;
19269 i__3 = *m - 1;
19270 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir
19271 + ldwrkr], &ldwrkr);
19272
19273 /* Generate Q in A
19274 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19275
19276 i__2 = *lwork - iwork + 1;
19277 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
19278 work[iwork], &i__2, &ierr);
19279 ie = itau;
19280 itauq = ie + *m;
19281 itaup = itauq + *m;
19282 iwork = itaup + *m;
19283
19284 /* Bidiagonalize L in WORK(IR)
19285 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
19286
19287 i__2 = *lwork - iwork + 1;
19288 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
19289 work[itauq], &work[itaup], &work[iwork], &
19290 i__2, &ierr);
19291
19292 /* Generate right vectors bidiagonalizing L in
19293 WORK(IR)
19294 (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
19295
19296 i__2 = *lwork - iwork + 1;
19297 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
19298 , &work[iwork], &i__2, &ierr);
19299 iwork = ie + *m;
19300
19301 /* Perform bidiagonal QR iteration, computing right
19302 singular vectors of L in WORK(IR)
19303 (Workspace: need M*M+BDSPAC) */
19304
19305 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
19306 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
19307 work[iwork], info);
19308
19309 /* Multiply right singular vectors of L in WORK(IR) by
19310 Q in A, storing result in VT
19311 (Workspace: need M*M) */
19312
19313 sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr,
19314 &a[a_offset], lda, &c_b416, &vt[vt_offset],
19315 ldvt);
19316
19317 } else {
19318
19319 /* Insufficient workspace for a fast algorithm */
19320
19321 itau = 1;
19322 iwork = itau + *m;
19323
19324 /* Compute A=L*Q
19325 (Workspace: need 2*M, prefer M+M*NB) */
19326
19327 i__2 = *lwork - iwork + 1;
19328 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19329 iwork], &i__2, &ierr);
19330
19331 /* Copy result to VT */
19332
19333 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19334 ldvt);
19335
19336 /* Generate Q in VT
19337 (Workspace: need 2*M, prefer M+M*NB) */
19338
19339 i__2 = *lwork - iwork + 1;
19340 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
19341 work[iwork], &i__2, &ierr);
19342 ie = itau;
19343 itauq = ie + *m;
19344 itaup = itauq + *m;
19345 iwork = itaup + *m;
19346
19347 /* Zero out above L in A */
19348
19349 i__2 = *m - 1;
19350 i__3 = *m - 1;
19351 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
19352 2), lda);
19353
19354 /* Bidiagonalize L in A
19355 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
19356
19357 i__2 = *lwork - iwork + 1;
19358 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
19359 work[itauq], &work[itaup], &work[iwork], &
19360 i__2, &ierr);
19361
19362 /* Multiply right vectors bidiagonalizing L by Q in VT
19363 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
19364
19365 i__2 = *lwork - iwork + 1;
19366 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
19367 work[itaup], &vt[vt_offset], ldvt, &work[
19368 iwork], &i__2, &ierr);
19369 iwork = ie + *m;
19370
19371 /* Perform bidiagonal QR iteration, computing right
19372 singular vectors of A in VT
19373 (Workspace: need BDSPAC) */
19374
19375 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
19376 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
19377 work[iwork], info);
19378
19379 }
19380
19381 } else if (wntuo) {
19382
19383 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
19384 M right singular vectors to be computed in VT and
19385 M left singular vectors to be overwritten on A
19386
19387 Computing MAX */
19388 i__2 = *m << 2;
19389 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
19390
19391 /* Sufficient workspace for a fast algorithm */
19392
19393 iu = 1;
19394 if (*lwork >= wrkbl + (*lda << 1) * *m) {
19395
19396 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */
19397
19398 ldwrku = *lda;
19399 ir = iu + ldwrku * *m;
19400 ldwrkr = *lda;
19401 } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
19402
19403 /* WORK(IU) is LDA by M and WORK(IR) is M by M */
19404
19405 ldwrku = *lda;
19406 ir = iu + ldwrku * *m;
19407 ldwrkr = *m;
19408 } else {
19409
19410 /* WORK(IU) is M by M and WORK(IR) is M by M */
19411
19412 ldwrku = *m;
19413 ir = iu + ldwrku * *m;
19414 ldwrkr = *m;
19415 }
19416 itau = ir + ldwrkr * *m;
19417 iwork = itau + *m;
19418
19419 /* Compute A=L*Q
19420 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
19421
19422 i__2 = *lwork - iwork + 1;
19423 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19424 iwork], &i__2, &ierr);
19425
19426 /* Copy L to WORK(IU), zeroing out below it */
19427
19428 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
19429 ldwrku);
19430 i__2 = *m - 1;
19431 i__3 = *m - 1;
19432 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu
19433 + ldwrku], &ldwrku);
19434
19435 /* Generate Q in A
19436 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
19437
19438 i__2 = *lwork - iwork + 1;
19439 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
19440 work[iwork], &i__2, &ierr);
19441 ie = itau;
19442 itauq = ie + *m;
19443 itaup = itauq + *m;
19444 iwork = itaup + *m;
19445
19446 /* Bidiagonalize L in WORK(IU), copying result to
19447 WORK(IR)
19448 (Workspace: need 2*M*M+4*M,
19449 prefer 2*M*M+3*M+2*M*NB) */
19450
19451 i__2 = *lwork - iwork + 1;
19452 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
19453 work[itauq], &work[itaup], &work[iwork], &
19454 i__2, &ierr);
19455 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
19456 ldwrkr);
19457
19458 /* Generate right bidiagonalizing vectors in WORK(IU)
19459 (Workspace: need 2*M*M+4*M-1,
19460 prefer 2*M*M+3*M+(M-1)*NB) */
19461
19462 i__2 = *lwork - iwork + 1;
19463 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
19464 , &work[iwork], &i__2, &ierr);
19465
19466 /* Generate left bidiagonalizing vectors in WORK(IR)
19467 (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
19468
19469 i__2 = *lwork - iwork + 1;
19470 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
19471 , &work[iwork], &i__2, &ierr);
19472 iwork = ie + *m;
19473
19474 /* Perform bidiagonal QR iteration, computing left
19475 singular vectors of L in WORK(IR) and computing
19476 right singular vectors of L in WORK(IU)
19477 (Workspace: need 2*M*M+BDSPAC) */
19478
19479 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
19480 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1,
19481 &work[iwork], info);
19482
19483 /* Multiply right singular vectors of L in WORK(IU) by
19484 Q in A, storing result in VT
19485 (Workspace: need M*M) */
19486
19487 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
19488 &a[a_offset], lda, &c_b416, &vt[vt_offset],
19489 ldvt);
19490
19491 /* Copy left singular vectors of L to A
19492 (Workspace: need M*M) */
19493
19494 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset],
19495 lda);
19496
19497 } else {
19498
19499 /* Insufficient workspace for a fast algorithm */
19500
19501 itau = 1;
19502 iwork = itau + *m;
19503
19504 /* Compute A=L*Q, copying result to VT
19505 (Workspace: need 2*M, prefer M+M*NB) */
19506
19507 i__2 = *lwork - iwork + 1;
19508 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19509 iwork], &i__2, &ierr);
19510 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19511 ldvt);
19512
19513 /* Generate Q in VT
19514 (Workspace: need 2*M, prefer M+M*NB) */
19515
19516 i__2 = *lwork - iwork + 1;
19517 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
19518 work[iwork], &i__2, &ierr);
19519 ie = itau;
19520 itauq = ie + *m;
19521 itaup = itauq + *m;
19522 iwork = itaup + *m;
19523
19524 /* Zero out above L in A */
19525
19526 i__2 = *m - 1;
19527 i__3 = *m - 1;
19528 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
19529 2), lda);
19530
19531 /* Bidiagonalize L in A
19532 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
19533
19534 i__2 = *lwork - iwork + 1;
19535 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
19536 work[itauq], &work[itaup], &work[iwork], &
19537 i__2, &ierr);
19538
19539 /* Multiply right vectors bidiagonalizing L by Q in VT
19540 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
19541
19542 i__2 = *lwork - iwork + 1;
19543 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
19544 work[itaup], &vt[vt_offset], ldvt, &work[
19545 iwork], &i__2, &ierr);
19546
19547 /* Generate left bidiagonalizing vectors of L in A
19548 (Workspace: need 4*M, prefer 3*M+M*NB) */
19549
19550 i__2 = *lwork - iwork + 1;
19551 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
19552 &work[iwork], &i__2, &ierr);
19553 iwork = ie + *m;
19554
19555 /* Perform bidiagonal QR iteration, compute left
19556 singular vectors of A in A and compute right
19557 singular vectors of A in VT
19558 (Workspace: need BDSPAC) */
19559
19560 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
19561 vt_offset], ldvt, &a[a_offset], lda, dum, &
19562 c__1, &work[iwork], info);
19563
19564 }
19565
19566 } else if (wntuas) {
19567
19568 /* Path 6t(N much larger than M, JOBU='S' or 'A',
19569 JOBVT='S')
19570 M right singular vectors to be computed in VT and
19571 M left singular vectors to be computed in U
19572
19573 Computing MAX */
19574 i__2 = *m << 2;
19575 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
19576
19577 /* Sufficient workspace for a fast algorithm */
19578
19579 iu = 1;
19580 if (*lwork >= wrkbl + *lda * *m) {
19581
19582 /* WORK(IU) is LDA by N */
19583
19584 ldwrku = *lda;
19585 } else {
19586
19587 /* WORK(IU) is LDA by M */
19588
19589 ldwrku = *m;
19590 }
19591 itau = iu + ldwrku * *m;
19592 iwork = itau + *m;
19593
19594 /* Compute A=L*Q
19595 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19596
19597 i__2 = *lwork - iwork + 1;
19598 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19599 iwork], &i__2, &ierr);
19600
19601 /* Copy L to WORK(IU), zeroing out above it */
19602
19603 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
19604 ldwrku);
19605 i__2 = *m - 1;
19606 i__3 = *m - 1;
19607 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu
19608 + ldwrku], &ldwrku);
19609
19610 /* Generate Q in A
19611 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19612
19613 i__2 = *lwork - iwork + 1;
19614 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
19615 work[iwork], &i__2, &ierr);
19616 ie = itau;
19617 itauq = ie + *m;
19618 itaup = itauq + *m;
19619 iwork = itaup + *m;
19620
19621 /* Bidiagonalize L in WORK(IU), copying result to U
19622 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
19623
19624 i__2 = *lwork - iwork + 1;
19625 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
19626 work[itauq], &work[itaup], &work[iwork], &
19627 i__2, &ierr);
19628 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset],
19629 ldu);
19630
19631 /* Generate right bidiagonalizing vectors in WORK(IU)
19632 (Workspace: need M*M+4*M-1,
19633 prefer M*M+3*M+(M-1)*NB) */
19634
19635 i__2 = *lwork - iwork + 1;
19636 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
19637 , &work[iwork], &i__2, &ierr);
19638
19639 /* Generate left bidiagonalizing vectors in U
19640 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
19641
19642 i__2 = *lwork - iwork + 1;
19643 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
19644 &work[iwork], &i__2, &ierr);
19645 iwork = ie + *m;
19646
19647 /* Perform bidiagonal QR iteration, computing left
19648 singular vectors of L in U and computing right
19649 singular vectors of L in WORK(IU)
19650 (Workspace: need M*M+BDSPAC) */
19651
19652 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
19653 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
19654 work[iwork], info);
19655
19656 /* Multiply right singular vectors of L in WORK(IU) by
19657 Q in A, storing result in VT
19658 (Workspace: need M*M) */
19659
19660 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
19661 &a[a_offset], lda, &c_b416, &vt[vt_offset],
19662 ldvt);
19663
19664 } else {
19665
19666 /* Insufficient workspace for a fast algorithm */
19667
19668 itau = 1;
19669 iwork = itau + *m;
19670
19671 /* Compute A=L*Q, copying result to VT
19672 (Workspace: need 2*M, prefer M+M*NB) */
19673
19674 i__2 = *lwork - iwork + 1;
19675 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19676 iwork], &i__2, &ierr);
19677 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19678 ldvt);
19679
19680 /* Generate Q in VT
19681 (Workspace: need 2*M, prefer M+M*NB) */
19682
19683 i__2 = *lwork - iwork + 1;
19684 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
19685 work[iwork], &i__2, &ierr);
19686
19687 /* Copy L to U, zeroing out above it */
19688
19689 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset],
19690 ldu);
19691 i__2 = *m - 1;
19692 i__3 = *m - 1;
19693 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1,
19694 2), ldu);
19695 ie = itau;
19696 itauq = ie + *m;
19697 itaup = itauq + *m;
19698 iwork = itaup + *m;
19699
19700 /* Bidiagonalize L in U
19701 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
19702
19703 i__2 = *lwork - iwork + 1;
19704 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
19705 work[itauq], &work[itaup], &work[iwork], &
19706 i__2, &ierr);
19707
19708 /* Multiply right bidiagonalizing vectors in U by Q
19709 in VT
19710 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
19711
19712 i__2 = *lwork - iwork + 1;
19713 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
19714 work[itaup], &vt[vt_offset], ldvt, &work[
19715 iwork], &i__2, &ierr);
19716
19717 /* Generate left bidiagonalizing vectors in U
19718 (Workspace: need 4*M, prefer 3*M+M*NB) */
19719
19720 i__2 = *lwork - iwork + 1;
19721 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
19722 &work[iwork], &i__2, &ierr);
19723 iwork = ie + *m;
19724
19725 /* Perform bidiagonal QR iteration, computing left
19726 singular vectors of A in U and computing right
19727 singular vectors of A in VT
19728 (Workspace: need BDSPAC) */
19729
19730 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
19731 vt_offset], ldvt, &u[u_offset], ldu, dum, &
19732 c__1, &work[iwork], info);
19733
19734 }
19735
19736 }
19737
19738 } else if (wntva) {
19739
19740 if (wntun) {
19741
19742 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
19743 N right singular vectors to be computed in VT and
19744 no left singular vectors to be computed
19745
19746 Computing MAX */
19747 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
19748 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
19749
19750 /* Sufficient workspace for a fast algorithm */
19751
19752 ir = 1;
19753 if (*lwork >= wrkbl + *lda * *m) {
19754
19755 /* WORK(IR) is LDA by M */
19756
19757 ldwrkr = *lda;
19758 } else {
19759
19760 /* WORK(IR) is M by M */
19761
19762 ldwrkr = *m;
19763 }
19764 itau = ir + ldwrkr * *m;
19765 iwork = itau + *m;
19766
19767 /* Compute A=L*Q, copying result to VT
19768 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
19769
19770 i__2 = *lwork - iwork + 1;
19771 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19772 iwork], &i__2, &ierr);
19773 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19774 ldvt);
19775
19776 /* Copy L to WORK(IR), zeroing out above it */
19777
19778 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
19779 ldwrkr);
19780 i__2 = *m - 1;
19781 i__3 = *m - 1;
19782 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[ir
19783 + ldwrkr], &ldwrkr);
19784
19785 /* Generate Q in VT
19786 (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
19787
19788 i__2 = *lwork - iwork + 1;
19789 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
19790 work[iwork], &i__2, &ierr);
19791 ie = itau;
19792 itauq = ie + *m;
19793 itaup = itauq + *m;
19794 iwork = itaup + *m;
19795
19796 /* Bidiagonalize L in WORK(IR)
19797 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
19798
19799 i__2 = *lwork - iwork + 1;
19800 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
19801 work[itauq], &work[itaup], &work[iwork], &
19802 i__2, &ierr);
19803
19804 /* Generate right bidiagonalizing vectors in WORK(IR)
19805 (Workspace: need M*M+4*M-1,
19806 prefer M*M+3*M+(M-1)*NB) */
19807
19808 i__2 = *lwork - iwork + 1;
19809 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
19810 , &work[iwork], &i__2, &ierr);
19811 iwork = ie + *m;
19812
19813 /* Perform bidiagonal QR iteration, computing right
19814 singular vectors of L in WORK(IR)
19815 (Workspace: need M*M+BDSPAC) */
19816
19817 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
19818 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
19819 work[iwork], info);
19820
19821 /* Multiply right singular vectors of L in WORK(IR) by
19822 Q in VT, storing result in A
19823 (Workspace: need M*M) */
19824
19825 sgemm_("N", "N", m, n, m, &c_b438, &work[ir], &ldwrkr,
19826 &vt[vt_offset], ldvt, &c_b416, &a[a_offset],
19827 lda);
19828
19829 /* Copy right singular vectors of A from A to VT */
19830
19831 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
19832 ldvt);
19833
19834 } else {
19835
19836 /* Insufficient workspace for a fast algorithm */
19837
19838 itau = 1;
19839 iwork = itau + *m;
19840
19841 /* Compute A=L*Q, copying result to VT
19842 (Workspace: need 2*M, prefer M+M*NB) */
19843
19844 i__2 = *lwork - iwork + 1;
19845 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19846 iwork], &i__2, &ierr);
19847 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19848 ldvt);
19849
19850 /* Generate Q in VT
19851 (Workspace: need M+N, prefer M+N*NB) */
19852
19853 i__2 = *lwork - iwork + 1;
19854 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
19855 work[iwork], &i__2, &ierr);
19856 ie = itau;
19857 itauq = ie + *m;
19858 itaup = itauq + *m;
19859 iwork = itaup + *m;
19860
19861 /* Zero out above L in A */
19862
19863 i__2 = *m - 1;
19864 i__3 = *m - 1;
19865 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
19866 2), lda);
19867
19868 /* Bidiagonalize L in A
19869 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
19870
19871 i__2 = *lwork - iwork + 1;
19872 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
19873 work[itauq], &work[itaup], &work[iwork], &
19874 i__2, &ierr);
19875
19876 /* Multiply right bidiagonalizing vectors in A by Q
19877 in VT
19878 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
19879
19880 i__2 = *lwork - iwork + 1;
19881 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
19882 work[itaup], &vt[vt_offset], ldvt, &work[
19883 iwork], &i__2, &ierr);
19884 iwork = ie + *m;
19885
19886 /* Perform bidiagonal QR iteration, computing right
19887 singular vectors of A in VT
19888 (Workspace: need BDSPAC) */
19889
19890 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
19891 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
19892 work[iwork], info);
19893
19894 }
19895
19896 } else if (wntuo) {
19897
19898 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
19899 N right singular vectors to be computed in VT and
19900 M left singular vectors to be overwritten on A
19901
19902 Computing MAX */
19903 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
19904 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
19905
19906 /* Sufficient workspace for a fast algorithm */
19907
19908 iu = 1;
19909 if (*lwork >= wrkbl + (*lda << 1) * *m) {
19910
19911 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */
19912
19913 ldwrku = *lda;
19914 ir = iu + ldwrku * *m;
19915 ldwrkr = *lda;
19916 } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
19917
19918 /* WORK(IU) is LDA by M and WORK(IR) is M by M */
19919
19920 ldwrku = *lda;
19921 ir = iu + ldwrku * *m;
19922 ldwrkr = *m;
19923 } else {
19924
19925 /* WORK(IU) is M by M and WORK(IR) is M by M */
19926
19927 ldwrku = *m;
19928 ir = iu + ldwrku * *m;
19929 ldwrkr = *m;
19930 }
19931 itau = ir + ldwrkr * *m;
19932 iwork = itau + *m;
19933
19934 /* Compute A=L*Q, copying result to VT
19935 (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
19936
19937 i__2 = *lwork - iwork + 1;
19938 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
19939 iwork], &i__2, &ierr);
19940 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
19941 ldvt);
19942
19943 /* Generate Q in VT
19944 (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */
19945
19946 i__2 = *lwork - iwork + 1;
19947 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
19948 work[iwork], &i__2, &ierr);
19949
19950 /* Copy L to WORK(IU), zeroing out above it */
19951
19952 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
19953 ldwrku);
19954 i__2 = *m - 1;
19955 i__3 = *m - 1;
19956 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu
19957 + ldwrku], &ldwrku);
19958 ie = itau;
19959 itauq = ie + *m;
19960 itaup = itauq + *m;
19961 iwork = itaup + *m;
19962
19963 /* Bidiagonalize L in WORK(IU), copying result to
19964 WORK(IR)
19965 (Workspace: need 2*M*M+4*M,
19966 prefer 2*M*M+3*M+2*M*NB) */
19967
19968 i__2 = *lwork - iwork + 1;
19969 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
19970 work[itauq], &work[itaup], &work[iwork], &
19971 i__2, &ierr);
19972 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
19973 ldwrkr);
19974
19975 /* Generate right bidiagonalizing vectors in WORK(IU)
19976 (Workspace: need 2*M*M+4*M-1,
19977 prefer 2*M*M+3*M+(M-1)*NB) */
19978
19979 i__2 = *lwork - iwork + 1;
19980 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
19981 , &work[iwork], &i__2, &ierr);
19982
19983 /* Generate left bidiagonalizing vectors in WORK(IR)
19984 (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
19985
19986 i__2 = *lwork - iwork + 1;
19987 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
19988 , &work[iwork], &i__2, &ierr);
19989 iwork = ie + *m;
19990
19991 /* Perform bidiagonal QR iteration, computing left
19992 singular vectors of L in WORK(IR) and computing
19993 right singular vectors of L in WORK(IU)
19994 (Workspace: need 2*M*M+BDSPAC) */
19995
19996 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
19997 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1,
19998 &work[iwork], info);
19999
20000 /* Multiply right singular vectors of L in WORK(IU) by
20001 Q in VT, storing result in A
20002 (Workspace: need M*M) */
20003
20004 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
20005 &vt[vt_offset], ldvt, &c_b416, &a[a_offset],
20006 lda);
20007
20008 /* Copy right singular vectors of A from A to VT */
20009
20010 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
20011 ldvt);
20012
20013 /* Copy left singular vectors of A from WORK(IR) to A */
20014
20015 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset],
20016 lda);
20017
20018 } else {
20019
20020 /* Insufficient workspace for a fast algorithm */
20021
20022 itau = 1;
20023 iwork = itau + *m;
20024
20025 /* Compute A=L*Q, copying result to VT
20026 (Workspace: need 2*M, prefer M+M*NB) */
20027
20028 i__2 = *lwork - iwork + 1;
20029 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
20030 iwork], &i__2, &ierr);
20031 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
20032 ldvt);
20033
20034 /* Generate Q in VT
20035 (Workspace: need M+N, prefer M+N*NB) */
20036
20037 i__2 = *lwork - iwork + 1;
20038 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
20039 work[iwork], &i__2, &ierr);
20040 ie = itau;
20041 itauq = ie + *m;
20042 itaup = itauq + *m;
20043 iwork = itaup + *m;
20044
20045 /* Zero out above L in A */
20046
20047 i__2 = *m - 1;
20048 i__3 = *m - 1;
20049 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &a_ref(1,
20050 2), lda);
20051
20052 /* Bidiagonalize L in A
20053 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
20054
20055 i__2 = *lwork - iwork + 1;
20056 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
20057 work[itauq], &work[itaup], &work[iwork], &
20058 i__2, &ierr);
20059
20060 /* Multiply right bidiagonalizing vectors in A by Q
20061 in VT
20062 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
20063
20064 i__2 = *lwork - iwork + 1;
20065 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
20066 work[itaup], &vt[vt_offset], ldvt, &work[
20067 iwork], &i__2, &ierr);
20068
20069 /* Generate left bidiagonalizing vectors in A
20070 (Workspace: need 4*M, prefer 3*M+M*NB) */
20071
20072 i__2 = *lwork - iwork + 1;
20073 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
20074 &work[iwork], &i__2, &ierr);
20075 iwork = ie + *m;
20076
20077 /* Perform bidiagonal QR iteration, computing left
20078 singular vectors of A in A and computing right
20079 singular vectors of A in VT
20080 (Workspace: need BDSPAC) */
20081
20082 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
20083 vt_offset], ldvt, &a[a_offset], lda, dum, &
20084 c__1, &work[iwork], info);
20085
20086 }
20087
20088 } else if (wntuas) {
20089
20090 /* Path 9t(N much larger than M, JOBU='S' or 'A',
20091 JOBVT='A')
20092 N right singular vectors to be computed in VT and
20093 M left singular vectors to be computed in U
20094
20095 Computing MAX */
20096 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
20097 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
20098
20099 /* Sufficient workspace for a fast algorithm */
20100
20101 iu = 1;
20102 if (*lwork >= wrkbl + *lda * *m) {
20103
20104 /* WORK(IU) is LDA by M */
20105
20106 ldwrku = *lda;
20107 } else {
20108
20109 /* WORK(IU) is M by M */
20110
20111 ldwrku = *m;
20112 }
20113 itau = iu + ldwrku * *m;
20114 iwork = itau + *m;
20115
20116 /* Compute A=L*Q, copying result to VT
20117 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
20118
20119 i__2 = *lwork - iwork + 1;
20120 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
20121 iwork], &i__2, &ierr);
20122 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
20123 ldvt);
20124
20125 /* Generate Q in VT
20126 (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
20127
20128 i__2 = *lwork - iwork + 1;
20129 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
20130 work[iwork], &i__2, &ierr);
20131
20132 /* Copy L to WORK(IU), zeroing out above it */
20133
20134 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
20135 ldwrku);
20136 i__2 = *m - 1;
20137 i__3 = *m - 1;
20138 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &work[iu
20139 + ldwrku], &ldwrku);
20140 ie = itau;
20141 itauq = ie + *m;
20142 itaup = itauq + *m;
20143 iwork = itaup + *m;
20144
20145 /* Bidiagonalize L in WORK(IU), copying result to U
20146 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
20147
20148 i__2 = *lwork - iwork + 1;
20149 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
20150 work[itauq], &work[itaup], &work[iwork], &
20151 i__2, &ierr);
20152 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset],
20153 ldu);
20154
20155 /* Generate right bidiagonalizing vectors in WORK(IU)
20156 (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
20157
20158 i__2 = *lwork - iwork + 1;
20159 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
20160 , &work[iwork], &i__2, &ierr);
20161
20162 /* Generate left bidiagonalizing vectors in U
20163 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
20164
20165 i__2 = *lwork - iwork + 1;
20166 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
20167 &work[iwork], &i__2, &ierr);
20168 iwork = ie + *m;
20169
20170 /* Perform bidiagonal QR iteration, computing left
20171 singular vectors of L in U and computing right
20172 singular vectors of L in WORK(IU)
20173 (Workspace: need M*M+BDSPAC) */
20174
20175 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
20176 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
20177 work[iwork], info);
20178
20179 /* Multiply right singular vectors of L in WORK(IU) by
20180 Q in VT, storing result in A
20181 (Workspace: need M*M) */
20182
20183 sgemm_("N", "N", m, n, m, &c_b438, &work[iu], &ldwrku,
20184 &vt[vt_offset], ldvt, &c_b416, &a[a_offset],
20185 lda);
20186
20187 /* Copy right singular vectors of A from A to VT */
20188
20189 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
20190 ldvt);
20191
20192 } else {
20193
20194 /* Insufficient workspace for a fast algorithm */
20195
20196 itau = 1;
20197 iwork = itau + *m;
20198
20199 /* Compute A=L*Q, copying result to VT
20200 (Workspace: need 2*M, prefer M+M*NB) */
20201
20202 i__2 = *lwork - iwork + 1;
20203 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
20204 iwork], &i__2, &ierr);
20205 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
20206 ldvt);
20207
20208 /* Generate Q in VT
20209 (Workspace: need M+N, prefer M+N*NB) */
20210
20211 i__2 = *lwork - iwork + 1;
20212 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
20213 work[iwork], &i__2, &ierr);
20214
20215 /* Copy L to U, zeroing out above it */
20216
20217 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset],
20218 ldu);
20219 i__2 = *m - 1;
20220 i__3 = *m - 1;
20221 slaset_("U", &i__2, &i__3, &c_b416, &c_b416, &u_ref(1,
20222 2), ldu);
20223 ie = itau;
20224 itauq = ie + *m;
20225 itaup = itauq + *m;
20226 iwork = itaup + *m;
20227
20228 /* Bidiagonalize L in U
20229 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
20230
20231 i__2 = *lwork - iwork + 1;
20232 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
20233 work[itauq], &work[itaup], &work[iwork], &
20234 i__2, &ierr);
20235
20236 /* Multiply right bidiagonalizing vectors in U by Q
20237 in VT
20238 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
20239
20240 i__2 = *lwork - iwork + 1;
20241 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
20242 work[itaup], &vt[vt_offset], ldvt, &work[
20243 iwork], &i__2, &ierr);
20244
20245 /* Generate left bidiagonalizing vectors in U
20246 (Workspace: need 4*M, prefer 3*M+M*NB) */
20247
20248 i__2 = *lwork - iwork + 1;
20249 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
20250 &work[iwork], &i__2, &ierr);
20251 iwork = ie + *m;
20252
20253 /* Perform bidiagonal QR iteration, computing left
20254 singular vectors of A in U and computing right
20255 singular vectors of A in VT
20256 (Workspace: need BDSPAC) */
20257
20258 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
20259 vt_offset], ldvt, &u[u_offset], ldu, dum, &
20260 c__1, &work[iwork], info);
20261
20262 }
20263
20264 }
20265
20266 }
20267
20268 } else {
20269
20270 /* N .LT. MNTHR
20271
20272 Path 10t(N greater than M, but not much larger)
20273 Reduce to bidiagonal form without LQ decomposition */
20274
20275 ie = 1;
20276 itauq = ie + *m;
20277 itaup = itauq + *m;
20278 iwork = itaup + *m;
20279
20280 /* Bidiagonalize A
20281 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
20282
20283 i__2 = *lwork - iwork + 1;
20284 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
20285 work[itaup], &work[iwork], &i__2, &ierr);
20286 if (wntuas) {
20287
20288 /* If left singular vectors desired in U, copy result to U
20289 and generate left bidiagonalizing vectors in U
20290 (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
20291
20292 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
20293 i__2 = *lwork - iwork + 1;
20294 sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
20295 iwork], &i__2, &ierr);
20296 }
20297 if (wntvas) {
20298
20299 /* If right singular vectors desired in VT, copy result to
20300 VT and generate right bidiagonalizing vectors in VT
20301 (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */
20302
20303 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
20304 if (wntva) {
20305 nrvt = *n;
20306 }
20307 if (wntvs) {
20308 nrvt = *m;
20309 }
20310 i__2 = *lwork - iwork + 1;
20311 sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup],
20312 &work[iwork], &i__2, &ierr);
20313 }
20314 if (wntuo) {
20315
20316 /* If left singular vectors desired in A, generate left
20317 bidiagonalizing vectors in A
20318 (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
20319
20320 i__2 = *lwork - iwork + 1;
20321 sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
20322 iwork], &i__2, &ierr);
20323 }
20324 if (wntvo) {
20325
20326 /* If right singular vectors desired in A, generate right
20327 bidiagonalizing vectors in A
20328 (Workspace: need 4*M, prefer 3*M+M*NB) */
20329
20330 i__2 = *lwork - iwork + 1;
20331 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
20332 iwork], &i__2, &ierr);
20333 }
20334 iwork = ie + *m;
20335 if (wntuas || wntuo) {
20336 nru = *m;
20337 }
20338 if (wntun) {
20339 nru = 0;
20340 }
20341 if (wntvas || wntvo) {
20342 ncvt = *n;
20343 }
20344 if (wntvn) {
20345 ncvt = 0;
20346 }
20347 if (! wntuo && ! wntvo) {
20348
20349 /* Perform bidiagonal QR iteration, if desired, computing
20350 left singular vectors in U and computing right singular
20351 vectors in VT
20352 (Workspace: need BDSPAC) */
20353
20354 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
20355 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
20356 work[iwork], info);
20357 } else if (! wntuo && wntvo) {
20358
20359 /* Perform bidiagonal QR iteration, if desired, computing
20360 left singular vectors in U and computing right singular
20361 vectors in A
20362 (Workspace: need BDSPAC) */
20363
20364 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
20365 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
20366 iwork], info);
20367 } else {
20368
20369 /* Perform bidiagonal QR iteration, if desired, computing
20370 left singular vectors in A and computing right singular
20371 vectors in VT
20372 (Workspace: need BDSPAC) */
20373
20374 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
20375 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
20376 work[iwork], info);
20377 }
20378
20379 }
20380
20381 }
20382
20383 /* If SBDSQR failed to converge, copy unconverged superdiagonals
20384 to WORK( 2:MINMN ) */
20385
20386 if (*info != 0) {
20387 if (ie > 2) {
20388 i__2 = minmn - 1;
20389 for (i__ = 1; i__ <= i__2; ++i__) {
20390 work[i__ + 1] = work[i__ + ie - 1];
20391 /* L50: */
20392 }
20393 }
20394 if (ie < 2) {
20395 for (i__ = minmn - 1; i__ >= 1; --i__) {
20396 work[i__ + 1] = work[i__ + ie - 1];
20397 /* L60: */
20398 }
20399 }
20400 }
20401
20402 /* Undo scaling if necessary */
20403
20404 if (iscl == 1) {
20405 if (anrm > bignum) {
20406 slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
20407 minmn, &ierr);
20408 }
20409 if (*info != 0 && anrm > bignum) {
20410 i__2 = minmn - 1;
20411 slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
20412 &minmn, &ierr);
20413 }
20414 if (anrm < smlnum) {
20415 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
20416 minmn, &ierr);
20417 }
20418 if (*info != 0 && anrm < smlnum) {
20419 i__2 = minmn - 1;
20420 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
20421 &minmn, &ierr);
20422 }
20423 }
20424
20425 /* Return optimal workspace in WORK(1) */
20426
20427 work[1] = (real) maxwrk;
20428
20429 return 0;
20430
20431 /* End of SGESVD */
20432
20433 } /* sgesvd_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 24278 of file lapackblas.cpp. References a_ref, c__1, f2cmin, integer, real, sgemv_(), slarfg_(), sscal_(), x, x_ref, y, and y_ref. Referenced by sgebrd_(). 24281 {
24282 /* -- LAPACK auxiliary routine (version 3.0) --
24283 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
24284 Courant Institute, Argonne National Lab, and Rice University
24285 February 29, 1992
24286
24287
24288 Purpose
24289 =======
24290
24291 SLABRD reduces the first NB rows and columns of a real general
24292 m by n matrix A to upper or lower bidiagonal form by an orthogonal
24293 transformation Q' * A * P, and returns the matrices X and Y which
24294 are needed to apply the transformation to the unreduced part of A.
24295
24296 If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
24297 bidiagonal form.
24298
24299 This is an auxiliary routine called by SGEBRD
24300
24301 Arguments
24302 =========
24303
24304 M (input) INTEGER
24305 The number of rows in the matrix A.
24306
24307 N (input) INTEGER
24308 The number of columns in the matrix A.
24309
24310 NB (input) INTEGER
24311 The number of leading rows and columns of A to be reduced.
24312
24313 A (input/output) REAL array, dimension (LDA,N)
24314 On entry, the m by n general matrix to be reduced.
24315 On exit, the first NB rows and columns of the matrix are
24316 overwritten; the rest of the array is unchanged.
24317 If m >= n, elements on and below the diagonal in the first NB
24318 columns, with the array TAUQ, represent the orthogonal
24319 matrix Q as a product of elementary reflectors; and
24320 elements above the diagonal in the first NB rows, with the
24321 array TAUP, represent the orthogonal matrix P as a product
24322 of elementary reflectors.
24323 If m < n, elements below the diagonal in the first NB
24324 columns, with the array TAUQ, represent the orthogonal
24325 matrix Q as a product of elementary reflectors, and
24326 elements on and above the diagonal in the first NB rows,
24327 with the array TAUP, represent the orthogonal matrix P as
24328 a product of elementary reflectors.
24329 See Further Details.
24330
24331 LDA (input) INTEGER
24332 The leading dimension of the array A. LDA >= max(1,M).
24333
24334 D (output) REAL array, dimension (NB)
24335 The diagonal elements of the first NB rows and columns of
24336 the reduced matrix. D(i) = A(i,i).
24337
24338 E (output) REAL array, dimension (NB)
24339 The off-diagonal elements of the first NB rows and columns of
24340 the reduced matrix.
24341
24342 TAUQ (output) REAL array dimension (NB)
24343 The scalar factors of the elementary reflectors which
24344 represent the orthogonal matrix Q. See Further Details.
24345
24346 TAUP (output) REAL array, dimension (NB)
24347 The scalar factors of the elementary reflectors which
24348 represent the orthogonal matrix P. See Further Details.
24349
24350 X (output) REAL array, dimension (LDX,NB)
24351 The m-by-nb matrix X required to update the unreduced part
24352 of A.
24353
24354 LDX (input) INTEGER
24355 The leading dimension of the array X. LDX >= M.
24356
24357 Y (output) REAL array, dimension (LDY,NB)
24358 The n-by-nb matrix Y required to update the unreduced part
24359 of A.
24360
24361 LDY (output) INTEGER
24362 The leading dimension of the array Y. LDY >= N.
24363
24364 Further Details
24365 ===============
24366
24367 The matrices Q and P are represented as products of elementary
24368 reflectors:
24369
24370 Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
24371
24372 Each H(i) and G(i) has the form:
24373
24374 H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
24375
24376 where tauq and taup are real scalars, and v and u are real vectors.
24377
24378 If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
24379 A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
24380 A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
24381
24382 If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
24383 A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
24384 A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
24385
24386 The elements of the vectors v and u together form the m-by-nb matrix
24387 V and the nb-by-n matrix U' which are needed, with X and Y, to apply
24388 the transformation to the unreduced part of the matrix, using a block
24389 update of the form: A := A - V*Y' - X*U'.
24390
24391 The contents of A on exit are illustrated by the following examples
24392 with nb = 2:
24393
24394 m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
24395
24396 ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
24397 ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
24398 ( v1 v2 a a a ) ( v1 1 a a a a )
24399 ( v1 v2 a a a ) ( v1 v2 a a a a )
24400 ( v1 v2 a a a ) ( v1 v2 a a a a )
24401 ( v1 v2 a a a )
24402
24403 where a denotes an element of the original matrix which is unchanged,
24404 vi denotes an element of the vector defining H(i), and ui an element
24405 of the vector defining G(i).
24406
24407 =====================================================================
24408
24409
24410 Quick return if possible
24411
24412 Parameter adjustments */
24413 /* Table of constant values */
24414 static real c_b4 = -1.f;
24415 static real c_b5 = 1.f;
24416 static integer c__1 = 1;
24417 static real c_b16 = 0.f;
24418
24419 /* System generated locals */
24420 integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
24421 i__3;
24422 /* Local variables */
24423 static integer i__;
24424 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
24425 sgemv_(const char *, integer *, integer *, real *, real *, integer *,
24426 real *, integer *, real *, real *, integer *), slarfg_(
24427 integer *, real *, real *, integer *, real *);
24428 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
24429 #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
24430 #define y_ref(a_1,a_2) y[(a_2)*y_dim1 + a_1]
24431
24432
24433 a_dim1 = *lda;
24434 a_offset = 1 + a_dim1 * 1;
24435 a -= a_offset;
24436 --d__;
24437 --e;
24438 --tauq;
24439 --taup;
24440 x_dim1 = *ldx;
24441 x_offset = 1 + x_dim1 * 1;
24442 x -= x_offset;
24443 y_dim1 = *ldy;
24444 y_offset = 1 + y_dim1 * 1;
24445 y -= y_offset;
24446
24447 /* Function Body */
24448 if (*m <= 0 || *n <= 0) {
24449 return 0;
24450 }
24451
24452 if (*m >= *n) {
24453
24454 /* Reduce to upper bidiagonal form */
24455
24456 i__1 = *nb;
24457 for (i__ = 1; i__ <= i__1; ++i__) {
24458
24459 /* Update A(i:m,i) */
24460
24461 i__2 = *m - i__ + 1;
24462 i__3 = i__ - 1;
24463 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__, 1), lda, &
24464 y_ref(i__, 1), ldy, &c_b5, &a_ref(i__, i__), &c__1);
24465 i__2 = *m - i__ + 1;
24466 i__3 = i__ - 1;
24467 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__, 1), ldx, &
24468 a_ref(1, i__), &c__1, &c_b5, &a_ref(i__, i__), &c__1);
24469
24470 /* Generate reflection Q(i) to annihilate A(i+1:m,i)
24471
24472 Computing MIN */
24473 i__2 = i__ + 1;
24474 i__3 = *m - i__ + 1;
24475 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(f2cmin(i__2,*m), i__), &c__1,
24476 &tauq[i__]);
24477 d__[i__] = a_ref(i__, i__);
24478 if (i__ < *n) {
24479 a_ref(i__, i__) = 1.f;
24480
24481 /* Compute Y(i+1:n,i) */
24482
24483 i__2 = *m - i__ + 1;
24484 i__3 = *n - i__;
24485 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, i__ + 1),
24486 lda, &a_ref(i__, i__), &c__1, &c_b16, &y_ref(i__ + 1,
24487 i__), &c__1);
24488 i__2 = *m - i__ + 1;
24489 i__3 = i__ - 1;
24490 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda,
24491 &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), &
24492 c__1);
24493 i__2 = *n - i__;
24494 i__3 = i__ - 1;
24495 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1)
24496 , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1,
24497 i__), &c__1);
24498 i__2 = *m - i__ + 1;
24499 i__3 = i__ - 1;
24500 sgemv_("Transpose", &i__2, &i__3, &c_b5, &x_ref(i__, 1), ldx,
24501 &a_ref(i__, i__), &c__1, &c_b16, &y_ref(1, i__), &
24502 c__1);
24503 i__2 = i__ - 1;
24504 i__3 = *n - i__;
24505 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1),
24506 lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1,
24507 i__), &c__1);
24508 i__2 = *n - i__;
24509 sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1);
24510
24511 /* Update A(i,i+1:n) */
24512
24513 i__2 = *n - i__;
24514 sgemv_("No transpose", &i__2, &i__, &c_b4, &y_ref(i__ + 1, 1),
24515 ldy, &a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__ + 1)
24516 , lda);
24517 i__2 = i__ - 1;
24518 i__3 = *n - i__;
24519 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__ + 1),
24520 lda, &x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__ + 1),
24521 lda);
24522
24523 /* Generate reflection P(i) to annihilate A(i,i+2:n)
24524
24525 Computing MIN */
24526 i__2 = i__ + 2;
24527 i__3 = *n - i__;
24528 slarfg_(&i__3, &a_ref(i__, i__ + 1), &a_ref(i__, f2cmin(i__2,*n))
24529 , lda, &taup[i__]);
24530 e[i__] = a_ref(i__, i__ + 1);
24531 a_ref(i__, i__ + 1) = 1.f;
24532
24533 /* Compute X(i+1:m,i) */
24534
24535 i__2 = *m - i__;
24536 i__3 = *n - i__;
24537 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1,
24538 i__ + 1), lda, &a_ref(i__, i__ + 1), lda, &c_b16, &
24539 x_ref(i__ + 1, i__), &c__1);
24540 i__2 = *n - i__;
24541 sgemv_("Transpose", &i__2, &i__, &c_b5, &y_ref(i__ + 1, 1),
24542 ldy, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1, i__)
24543 , &c__1);
24544 i__2 = *m - i__;
24545 sgemv_("No transpose", &i__2, &i__, &c_b4, &a_ref(i__ + 1, 1),
24546 lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1,
24547 i__), &c__1);
24548 i__2 = i__ - 1;
24549 i__3 = *n - i__;
24550 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ + 1)
24551 , lda, &a_ref(i__, i__ + 1), lda, &c_b16, &x_ref(1,
24552 i__), &c__1);
24553 i__2 = *m - i__;
24554 i__3 = i__ - 1;
24555 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1)
24556 , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1,
24557 i__), &c__1);
24558 i__2 = *m - i__;
24559 sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1);
24560 }
24561 /* L10: */
24562 }
24563 } else {
24564
24565 /* Reduce to lower bidiagonal form */
24566
24567 i__1 = *nb;
24568 for (i__ = 1; i__ <= i__1; ++i__) {
24569
24570 /* Update A(i,i:n) */
24571
24572 i__2 = *n - i__ + 1;
24573 i__3 = i__ - 1;
24574 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__, 1), ldy, &
24575 a_ref(i__, 1), lda, &c_b5, &a_ref(i__, i__), lda);
24576 i__2 = i__ - 1;
24577 i__3 = *n - i__ + 1;
24578 sgemv_("Transpose", &i__2, &i__3, &c_b4, &a_ref(1, i__), lda, &
24579 x_ref(i__, 1), ldx, &c_b5, &a_ref(i__, i__), lda);
24580
24581 /* Generate reflection P(i) to annihilate A(i,i+1:n)
24582
24583 Computing MIN */
24584 i__2 = i__ + 1;
24585 i__3 = *n - i__ + 1;
24586 slarfg_(&i__3, &a_ref(i__, i__), &a_ref(i__, f2cmin(i__2,*n)), lda, &
24587 taup[i__]);
24588 d__[i__] = a_ref(i__, i__);
24589 if (i__ < *m) {
24590 a_ref(i__, i__) = 1.f;
24591
24592 /* Compute X(i+1:m,i) */
24593
24594 i__2 = *m - i__;
24595 i__3 = *n - i__ + 1;
24596 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1,
24597 i__), lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(i__
24598 + 1, i__), &c__1);
24599 i__2 = *n - i__ + 1;
24600 i__3 = i__ - 1;
24601 sgemv_("Transpose", &i__2, &i__3, &c_b5, &y_ref(i__, 1), ldy,
24602 &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), &c__1);
24603 i__2 = *m - i__;
24604 i__3 = i__ - 1;
24605 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1)
24606 , lda, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1,
24607 i__), &c__1);
24608 i__2 = i__ - 1;
24609 i__3 = *n - i__ + 1;
24610 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__),
24611 lda, &a_ref(i__, i__), lda, &c_b16, &x_ref(1, i__), &
24612 c__1);
24613 i__2 = *m - i__;
24614 i__3 = i__ - 1;
24615 sgemv_("No transpose", &i__2, &i__3, &c_b4, &x_ref(i__ + 1, 1)
24616 , ldx, &x_ref(1, i__), &c__1, &c_b5, &x_ref(i__ + 1,
24617 i__), &c__1);
24618 i__2 = *m - i__;
24619 sscal_(&i__2, &taup[i__], &x_ref(i__ + 1, i__), &c__1);
24620
24621 /* Update A(i+1:m,i) */
24622
24623 i__2 = *m - i__;
24624 i__3 = i__ - 1;
24625 sgemv_("No transpose", &i__2, &i__3, &c_b4, &a_ref(i__ + 1, 1)
24626 , lda, &y_ref(i__, 1), ldy, &c_b5, &a_ref(i__ + 1,
24627 i__), &c__1);
24628 i__2 = *m - i__;
24629 sgemv_("No transpose", &i__2, &i__, &c_b4, &x_ref(i__ + 1, 1),
24630 ldx, &a_ref(1, i__), &c__1, &c_b5, &a_ref(i__ + 1,
24631 i__), &c__1);
24632
24633 /* Generate reflection Q(i) to annihilate A(i+2:m,i)
24634
24635 Computing MIN */
24636 i__2 = i__ + 2;
24637 i__3 = *m - i__;
24638 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*m), i__)
24639 , &c__1, &tauq[i__]);
24640 e[i__] = a_ref(i__ + 1, i__);
24641 a_ref(i__ + 1, i__) = 1.f;
24642
24643 /* Compute Y(i+1:n,i) */
24644
24645 i__2 = *m - i__;
24646 i__3 = *n - i__;
24647 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, i__
24648 + 1), lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &
24649 y_ref(i__ + 1, i__), &c__1);
24650 i__2 = *m - i__;
24651 i__3 = i__ - 1;
24652 sgemv_("Transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1),
24653 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1,
24654 i__), &c__1);
24655 i__2 = *n - i__;
24656 i__3 = i__ - 1;
24657 sgemv_("No transpose", &i__2, &i__3, &c_b4, &y_ref(i__ + 1, 1)
24658 , ldy, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1,
24659 i__), &c__1);
24660 i__2 = *m - i__;
24661 sgemv_("Transpose", &i__2, &i__, &c_b5, &x_ref(i__ + 1, 1),
24662 ldx, &a_ref(i__ + 1, i__), &c__1, &c_b16, &y_ref(1,
24663 i__), &c__1);
24664 i__2 = *n - i__;
24665 sgemv_("Transpose", &i__, &i__2, &c_b4, &a_ref(1, i__ + 1),
24666 lda, &y_ref(1, i__), &c__1, &c_b5, &y_ref(i__ + 1,
24667 i__), &c__1);
24668 i__2 = *n - i__;
24669 sscal_(&i__2, &tauq[i__], &y_ref(i__ + 1, i__), &c__1);
24670 }
24671 /* L20: */
24672 }
24673 }
24674 return 0;
24675
24676 /* End of SLABRD */
24677
24678 } /* slabrd_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 12889 of file lapackblas.cpp. References a_ref, b, b_ref, f2cmin, integer, logical, and lsame_(). Referenced by sgesvd_(), slaed0_(), slaed2_(), slaed3_(), slaed8_(), and sstedc_(). 12891 {
12892 /* -- LAPACK auxiliary routine (version 3.0) --
12893 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12894 Courant Institute, Argonne National Lab, and Rice University
12895 February 29, 1992
12896
12897
12898 Purpose
12899 =======
12900
12901 SLACPY copies all or part of a two-dimensional matrix A to another
12902 matrix B.
12903
12904 Arguments
12905 =========
12906
12907 UPLO (input) CHARACTER*1
12908 Specifies the part of the matrix A to be copied to B.
12909 = 'U': Upper triangular part
12910 = 'L': Lower triangular part
12911 Otherwise: All of the matrix A
12912
12913 M (input) INTEGER
12914 The number of rows of the matrix A. M >= 0.
12915
12916 N (input) INTEGER
12917 The number of columns of the matrix A. N >= 0.
12918
12919 A (input) REAL array, dimension (LDA,N)
12920 The m by n matrix A. If UPLO = 'U', only the upper triangle
12921 or trapezoid is accessed; if UPLO = 'L', only the lower
12922 triangle or trapezoid is accessed.
12923
12924 LDA (input) INTEGER
12925 The leading dimension of the array A. LDA >= max(1,M).
12926
12927 B (output) REAL array, dimension (LDB,N)
12928 On exit, B = A in the locations specified by UPLO.
12929
12930 LDB (input) INTEGER
12931 The leading dimension of the array B. LDB >= max(1,M).
12932
12933 =====================================================================
12934
12935
12936 Parameter adjustments */
12937 /* System generated locals */
12938 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
12939 /* Local variables */
12940 static integer i__, j;
12941 extern logical lsame_(const char *, const char *);
12942 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
12943 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
12944
12945 a_dim1 = *lda;
12946 a_offset = 1 + a_dim1 * 1;
12947 a -= a_offset;
12948 b_dim1 = *ldb;
12949 b_offset = 1 + b_dim1 * 1;
12950 b -= b_offset;
12951
12952 /* Function Body */
12953 if (lsame_(uplo, "U")) {
12954 i__1 = *n;
12955 for (j = 1; j <= i__1; ++j) {
12956 i__2 = f2cmin(j,*m);
12957 for (i__ = 1; i__ <= i__2; ++i__) {
12958 b_ref(i__, j) = a_ref(i__, j);
12959 /* L10: */
12960 }
12961 /* L20: */
12962 }
12963 } else if (lsame_(uplo, "L")) {
12964 i__1 = *n;
12965 for (j = 1; j <= i__1; ++j) {
12966 i__2 = *m;
12967 for (i__ = j; i__ <= i__2; ++i__) {
12968 b_ref(i__, j) = a_ref(i__, j);
12969 /* L30: */
12970 }
12971 /* L40: */
12972 }
12973 } else {
12974 i__1 = *n;
12975 for (j = 1; j <= i__1; ++j) {
12976 i__2 = *m;
12977 for (i__ = 1; i__ <= i__2; ++i__) {
12978 b_ref(i__, j) = a_ref(i__, j);
12979 /* L50: */
12980 }
12981 /* L60: */
12982 }
12983 }
12984 return 0;
12985
12986 /* End of SLACPY */
12987
12988 } /* slacpy_ */
|
|
||||||||||||||||||||||||
|
Definition at line 2012 of file lapackblas.cpp. References b, dabs, real, and sqrt(). Referenced by ssteqr_(), and ssterf_(). 02013 {
02014 /* -- LAPACK auxiliary routine (version 3.0) --
02015 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02016 Courant Institute, Argonne National Lab, and Rice University
02017 October 31, 1992
02018
02019
02020 Purpose
02021 =======
02022
02023 SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix
02024 [ A B ]
02025 [ B C ].
02026 On return, RT1 is the eigenvalue of larger absolute value, and RT2
02027 is the eigenvalue of smaller absolute value.
02028
02029 Arguments
02030 =========
02031
02032 A (input) REAL
02033 The (1,1) element of the 2-by-2 matrix.
02034
02035 B (input) REAL
02036 The (1,2) and (2,1) elements of the 2-by-2 matrix.
02037
02038 C (input) REAL
02039 The (2,2) element of the 2-by-2 matrix.
02040
02041 RT1 (output) REAL
02042 The eigenvalue of larger absolute value.
02043
02044 RT2 (output) REAL
02045 The eigenvalue of smaller absolute value.
02046
02047 Further Details
02048 ===============
02049
02050 RT1 is accurate to a few ulps barring over/underflow.
02051
02052 RT2 may be inaccurate if there is massive cancellation in the
02053 determinant A*C-B*B; higher precision or correctly rounded or
02054 correctly truncated arithmetic would be needed to compute RT2
02055 accurately in all cases.
02056
02057 Overflow is possible only if RT1 is within a factor of 5 of overflow.
02058 Underflow is harmless if the input data is 0 or exceeds
02059 underflow_threshold / macheps.
02060
02061 =====================================================================
02062
02063
02064 Compute the eigenvalues */
02065 /* System generated locals */
02066 real r__1;
02067 /* Builtin functions */
02068 // double sqrt(doublereal);
02069 /* Local variables */
02070 static real acmn, acmx, ab, df, tb, sm, rt, adf;
02071
02072
02073 sm = *a + *c__;
02074 df = *a - *c__;
02075 adf = dabs(df);
02076 tb = *b + *b;
02077 ab = dabs(tb);
02078 if (dabs(*a) > dabs(*c__)) {
02079 acmx = *a;
02080 acmn = *c__;
02081 } else {
02082 acmx = *c__;
02083 acmn = *a;
02084 }
02085 if (adf > ab) {
02086 /* Computing 2nd power */
02087 r__1 = ab / adf;
02088 rt = adf * sqrt(r__1 * r__1 + 1.f);
02089 } else if (adf < ab) {
02090 /* Computing 2nd power */
02091 r__1 = adf / ab;
02092 rt = ab * sqrt(r__1 * r__1 + 1.f);
02093 } else {
02094
02095 /* Includes case AB=ADF=0 */
02096
02097 rt = ab * sqrt(2.f);
02098 }
02099 if (sm < 0.f) {
02100 *rt1 = (sm - rt) * .5f;
02101
02102 /* Order of execution important.
02103 To get fully accurate smaller eigenvalue,
02104 next line needs to be executed in higher precision. */
02105
02106 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
02107 } else if (sm > 0.f) {
02108 *rt1 = (sm + rt) * .5f;
02109
02110 /* Order of execution important.
02111 To get fully accurate smaller eigenvalue,
02112 next line needs to be executed in higher precision. */
02113
02114 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
02115 } else {
02116
02117 /* Includes case RT1 = RT2 = 0 */
02118
02119 *rt1 = rt * .5f;
02120 *rt2 = rt * -.5f;
02121 }
02122 return 0;
02123
02124 /* End of SLAE2 */
02125
02126 } /* slae2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 11924 of file lapackblas.cpp. References c__0, c__1, c__2, dabs, f2cmax, ftnlen, ilaenv_(), integer, log(), pow_ii(), q, q_ref, qstore_ref, real, scopy_(), sgemm_(), slacpy_(), slaed1_(), slaed7_(), ssteqr_(), and xerbla_(). Referenced by sstedc_(). 11927 {
11928 /* -- LAPACK routine (version 3.0) --
11929 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11930 Courant Institute, Argonne National Lab, and Rice University
11931 June 30, 1999
11932
11933
11934 Purpose
11935 =======
11936
11937 SLAED0 computes all eigenvalues and corresponding eigenvectors of a
11938 symmetric tridiagonal matrix using the divide and conquer method.
11939
11940 Arguments
11941 =========
11942
11943 ICOMPQ (input) INTEGER
11944 = 0: Compute eigenvalues only.
11945 = 1: Compute eigenvectors of original dense symmetric matrix
11946 also. On entry, Q contains the orthogonal matrix used
11947 to reduce the original matrix to tridiagonal form.
11948 = 2: Compute eigenvalues and eigenvectors of tridiagonal
11949 matrix.
11950
11951 QSIZ (input) INTEGER
11952 The dimension of the orthogonal matrix used to reduce
11953 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1.
11954
11955 N (input) INTEGER
11956 The dimension of the symmetric tridiagonal matrix. N >= 0.
11957
11958 D (input/output) REAL array, dimension (N)
11959 On entry, the main diagonal of the tridiagonal matrix.
11960 On exit, its eigenvalues.
11961
11962 E (input) REAL array, dimension (N-1)
11963 The off-diagonal elements of the tridiagonal matrix.
11964 On exit, E has been destroyed.
11965
11966 Q (input/output) REAL array, dimension (LDQ, N)
11967 On entry, Q must contain an N-by-N orthogonal matrix.
11968 If ICOMPQ = 0 Q is not referenced.
11969 If ICOMPQ = 1 On entry, Q is a subset of the columns of the
11970 orthogonal matrix used to reduce the full
11971 matrix to tridiagonal form corresponding to
11972 the subset of the full matrix which is being
11973 decomposed at this time.
11974 If ICOMPQ = 2 On entry, Q will be the identity matrix.
11975 On exit, Q contains the eigenvectors of the
11976 tridiagonal matrix.
11977
11978 LDQ (input) INTEGER
11979 The leading dimension of the array Q. If eigenvectors are
11980 desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
11981
11982 QSTORE (workspace) REAL array, dimension (LDQS, N)
11983 Referenced only when ICOMPQ = 1. Used to store parts of
11984 the eigenvector matrix when the updating matrix multiplies
11985 take place.
11986
11987 LDQS (input) INTEGER
11988 The leading dimension of the array QSTORE. If ICOMPQ = 1,
11989 then LDQS >= max(1,N). In any case, LDQS >= 1.
11990
11991 WORK (workspace) REAL array,
11992 If ICOMPQ = 0 or 1, the dimension of WORK must be at least
11993 1 + 3*N + 2*N*lg N + 2*N**2
11994 ( lg( N ) = smallest integer k
11995 such that 2^k >= N )
11996 If ICOMPQ = 2, the dimension of WORK must be at least
11997 4*N + N**2.
11998
11999 IWORK (workspace) INTEGER array,
12000 If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
12001 6 + 6*N + 5*N*lg N.
12002 ( lg( N ) = smallest integer k
12003 such that 2^k >= N )
12004 If ICOMPQ = 2, the dimension of IWORK must be at least
12005 3 + 5*N.
12006
12007 INFO (output) INTEGER
12008 = 0: successful exit.
12009 < 0: if INFO = -i, the i-th argument had an illegal value.
12010 > 0: The algorithm failed to compute an eigenvalue while
12011 working on the submatrix lying in rows and columns
12012 INFO/(N+1) through mod(INFO,N+1).
12013
12014 Further Details
12015 ===============
12016
12017 Based on contributions by
12018 Jeff Rutter, Computer Science Division, University of California
12019 at Berkeley, USA
12020
12021 =====================================================================
12022
12023
12024 Test the input parameters.
12025
12026 Parameter adjustments */
12027 /* Table of constant values */
12028 static integer c__9 = 9;
12029 static integer c__0 = 0;
12030 static integer c__2 = 2;
12031 static real c_b23 = 1.f;
12032 static real c_b24 = 0.f;
12033 static integer c__1 = 1;
12034
12035 /* System generated locals */
12036 integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
12037 real r__1;
12038 /* Builtin functions */
12039 // double log(doublereal);
12040 integer pow_ii(integer *, integer *);
12041 /* Local variables */
12042 static real temp;
12043 static integer curr, i__, j, k;
12044 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
12045 integer *, real *, real *, integer *, real *, integer *, real *,
12046 real *, integer *);
12047 static integer iperm, indxq, iwrem;
12048 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
12049 integer *);
12050 static integer iqptr, tlvls;
12051 extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *,
12052 integer *, real *, integer *, real *, integer *, integer *),
12053 slaed7_(integer *, integer *, integer *, integer *, integer *,
12054 integer *, real *, real *, integer *, integer *, real *, integer *
12055 , real *, integer *, integer *, integer *, integer *, integer *,
12056 real *, real *, integer *, integer *);
12057 static integer iq, igivcl;
12058 extern /* Subroutine */ int xerbla_(const char *, integer *);
12059 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
12060 integer *, integer *, ftnlen, ftnlen);
12061 static integer igivnm, submat;
12062 extern /* Subroutine */ int slacpy_(const char *, integer *, integer *, real *,
12063 integer *, real *, integer *);
12064 static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz;
12065 extern /* Subroutine */ int ssteqr_(const char *, integer *, real *, real *,
12066 real *, integer *, real *, integer *);
12067 static integer lgn, msd2, smm1, spm1, spm2;
12068 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
12069 #define qstore_ref(a_1,a_2) qstore[(a_2)*qstore_dim1 + a_1]
12070
12071
12072 --d__;
12073 --e;
12074 q_dim1 = *ldq;
12075 q_offset = 1 + q_dim1 * 1;
12076 q -= q_offset;
12077 qstore_dim1 = *ldqs;
12078 qstore_offset = 1 + qstore_dim1 * 1;
12079 qstore -= qstore_offset;
12080 --work;
12081 --iwork;
12082
12083 /* Function Body */
12084 *info = 0;
12085
12086 if (*icompq < 0 || *icompq > 2) {
12087 *info = -1;
12088 } else if (*icompq == 1 && *qsiz < f2cmax(0,*n)) {
12089 *info = -2;
12090 } else if (*n < 0) {
12091 *info = -3;
12092 } else if (*ldq < f2cmax(1,*n)) {
12093 *info = -7;
12094 } else if (*ldqs < f2cmax(1,*n)) {
12095 *info = -9;
12096 }
12097 if (*info != 0) {
12098 i__1 = -(*info);
12099 xerbla_("SLAED0", &i__1);
12100 return 0;
12101 }
12102
12103 /* Quick return if possible */
12104
12105 if (*n == 0) {
12106 return 0;
12107 }
12108
12109 smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
12110 ftnlen)6, (ftnlen)1);
12111
12112 /* Determine the size and placement of the submatrices, and save in
12113 the leading elements of IWORK. */
12114
12115 iwork[1] = *n;
12116 subpbs = 1;
12117 tlvls = 0;
12118 L10:
12119 if (iwork[subpbs] > smlsiz) {
12120 for (j = subpbs; j >= 1; --j) {
12121 iwork[j * 2] = (iwork[j] + 1) / 2;
12122 iwork[(j << 1) - 1] = iwork[j] / 2;
12123 /* L20: */
12124 }
12125 ++tlvls;
12126 subpbs <<= 1;
12127 goto L10;
12128 }
12129 i__1 = subpbs;
12130 for (j = 2; j <= i__1; ++j) {
12131 iwork[j] += iwork[j - 1];
12132 /* L30: */
12133 }
12134
12135 /* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
12136 using rank-1 modifications (cuts). */
12137
12138 spm1 = subpbs - 1;
12139 i__1 = spm1;
12140 for (i__ = 1; i__ <= i__1; ++i__) {
12141 submat = iwork[i__] + 1;
12142 smm1 = submat - 1;
12143 d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
12144 d__[submat] -= (r__1 = e[smm1], dabs(r__1));
12145 /* L40: */
12146 }
12147
12148 indxq = (*n << 2) + 3;
12149 if (*icompq != 2) {
12150
12151 /* Set up workspaces for eigenvalues only/accumulate new vectors
12152 routine */
12153
12154 temp = log((real) (*n)) / log(2.f);
12155 lgn = (integer) temp;
12156 if (pow_ii(&c__2, &lgn) < *n) {
12157 ++lgn;
12158 }
12159 if (pow_ii(&c__2, &lgn) < *n) {
12160 ++lgn;
12161 }
12162 iprmpt = indxq + *n + 1;
12163 iperm = iprmpt + *n * lgn;
12164 iqptr = iperm + *n * lgn;
12165 igivpt = iqptr + *n + 2;
12166 igivcl = igivpt + *n * lgn;
12167
12168 igivnm = 1;
12169 iq = igivnm + (*n << 1) * lgn;
12170 /* Computing 2nd power */
12171 i__1 = *n;
12172 iwrem = iq + i__1 * i__1 + 1;
12173
12174 /* Initialize pointers */
12175
12176 i__1 = subpbs;
12177 for (i__ = 0; i__ <= i__1; ++i__) {
12178 iwork[iprmpt + i__] = 1;
12179 iwork[igivpt + i__] = 1;
12180 /* L50: */
12181 }
12182 iwork[iqptr] = 1;
12183 }
12184
12185 /* Solve each submatrix eigenproblem at the bottom of the divide and
12186 conquer tree. */
12187
12188 curr = 0;
12189 i__1 = spm1;
12190 for (i__ = 0; i__ <= i__1; ++i__) {
12191 if (i__ == 0) {
12192 submat = 1;
12193 matsiz = iwork[1];
12194 } else {
12195 submat = iwork[i__] + 1;
12196 matsiz = iwork[i__ + 1] - iwork[i__];
12197 }
12198 if (*icompq == 2) {
12199 ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q_ref(submat,
12200 submat), ldq, &work[1], info);
12201 if (*info != 0) {
12202 goto L130;
12203 }
12204 } else {
12205 ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
12206 iwork[iqptr + curr]], &matsiz, &work[1], info);
12207 if (*info != 0) {
12208 goto L130;
12209 }
12210 if (*icompq == 1) {
12211 sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q_ref(1,
12212 submat), ldq, &work[iq - 1 + iwork[iqptr + curr]], &
12213 matsiz, &c_b24, &qstore_ref(1, submat), ldqs);
12214 }
12215 /* Computing 2nd power */
12216 i__2 = matsiz;
12217 iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
12218 ++curr;
12219 }
12220 k = 1;
12221 i__2 = iwork[i__ + 1];
12222 for (j = submat; j <= i__2; ++j) {
12223 iwork[indxq + j] = k;
12224 ++k;
12225 /* L60: */
12226 }
12227 /* L70: */
12228 }
12229
12230 /* Successively merge eigensystems of adjacent submatrices
12231 into eigensystem for the corresponding larger matrix.
12232
12233 while ( SUBPBS > 1 ) */
12234
12235 curlvl = 1;
12236 L80:
12237 if (subpbs > 1) {
12238 spm2 = subpbs - 2;
12239 i__1 = spm2;
12240 for (i__ = 0; i__ <= i__1; i__ += 2) {
12241 if (i__ == 0) {
12242 submat = 1;
12243 matsiz = iwork[2];
12244 msd2 = iwork[1];
12245 curprb = 0;
12246 } else {
12247 submat = iwork[i__] + 1;
12248 matsiz = iwork[i__ + 2] - iwork[i__];
12249 msd2 = matsiz / 2;
12250 ++curprb;
12251 }
12252
12253 /* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
12254 into an eigensystem of size MATSIZ.
12255 SLAED1 is used only for the full eigensystem of a tridiagonal
12256 matrix.
12257 SLAED7 handles the cases in which eigenvalues only or eigenvalues
12258 and eigenvectors of a full symmetric matrix (which was reduced to
12259 tridiagonal form) are desired. */
12260
12261 if (*icompq == 2) {
12262 slaed1_(&matsiz, &d__[submat], &q_ref(submat, submat), ldq, &
12263 iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
12264 work[1], &iwork[subpbs + 1], info);
12265 } else {
12266 slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
12267 submat], &qstore_ref(1, submat), ldqs, &iwork[indxq +
12268 submat], &e[submat + msd2 - 1], &msd2, &work[iq], &
12269 iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
12270 igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem],
12271 &iwork[subpbs + 1], info);
12272 }
12273 if (*info != 0) {
12274 goto L130;
12275 }
12276 iwork[i__ / 2 + 1] = iwork[i__ + 2];
12277 /* L90: */
12278 }
12279 subpbs /= 2;
12280 ++curlvl;
12281 goto L80;
12282 }
12283
12284 /* end while
12285
12286 Re-merge the eigenvalues/vectors which were deflated at the final
12287 merge step. */
12288
12289 if (*icompq == 1) {
12290 i__1 = *n;
12291 for (i__ = 1; i__ <= i__1; ++i__) {
12292 j = iwork[indxq + i__];
12293 work[i__] = d__[j];
12294 scopy_(qsiz, &qstore_ref(1, j), &c__1, &q_ref(1, i__), &c__1);
12295 /* L100: */
12296 }
12297 scopy_(n, &work[1], &c__1, &d__[1], &c__1);
12298 } else if (*icompq == 2) {
12299 i__1 = *n;
12300 for (i__ = 1; i__ <= i__1; ++i__) {
12301 j = iwork[indxq + i__];
12302 work[i__] = d__[j];
12303 scopy_(n, &q_ref(1, j), &c__1, &work[*n * i__ + 1], &c__1);
12304 /* L110: */
12305 }
12306 scopy_(n, &work[1], &c__1, &d__[1], &c__1);
12307 slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
12308 } else {
12309 i__1 = *n;
12310 for (i__ = 1; i__ <= i__1; ++i__) {
12311 j = iwork[indxq + i__];
12312 work[i__] = d__[j];
12313 /* L120: */
12314 }
12315 scopy_(n, &work[1], &c__1, &d__[1], &c__1);
12316 }
12317 goto L140;
12318
12319 L130:
12320 *info = submat * (*n + 1) + submat + matsiz - 1;
12321
12322 L140:
12323 return 0;
12324
12325 /* End of SLAED0 */
12326
12327 } /* slaed0_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 12663 of file lapackblas.cpp. References c__1, c_n1, f2cmax, f2cmin, integer, q, q_ref, real, scopy_(), slaed2_(), slaed3_(), slamrg_(), and xerbla_(). Referenced by slaed0_(). 12666 {
12667 /* -- LAPACK routine (version 3.0) --
12668 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12669 Courant Institute, Argonne National Lab, and Rice University
12670 June 30, 1999
12671
12672
12673 Purpose
12674 =======
12675
12676 SLAED1 computes the updated eigensystem of a diagonal
12677 matrix after modification by a rank-one symmetric matrix. This
12678 routine is used only for the eigenproblem which requires all
12679 eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles
12680 the case in which eigenvalues only or eigenvalues and eigenvectors
12681 of a full symmetric matrix (which was reduced to tridiagonal form)
12682 are desired.
12683
12684 T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
12685
12686 where Z = Q'u, u is a vector of length N with ones in the
12687 CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
12688
12689 The eigenvectors of the original matrix are stored in Q, and the
12690 eigenvalues are in D. The algorithm consists of three stages:
12691
12692 The first stage consists of deflating the size of the problem
12693 when there are multiple eigenvalues or if there is a zero in
12694 the Z vector. For each such occurence the dimension of the
12695 secular equation problem is reduced by one. This stage is
12696 performed by the routine SLAED2.
12697
12698 The second stage consists of calculating the updated
12699 eigenvalues. This is done by finding the roots of the secular
12700 equation via the routine SLAED4 (as called by SLAED3).
12701 This routine also calculates the eigenvectors of the current
12702 problem.
12703
12704 The final stage consists of computing the updated eigenvectors
12705 directly using the updated eigenvalues. The eigenvectors for
12706 the current problem are multiplied with the eigenvectors from
12707 the overall problem.
12708
12709 Arguments
12710 =========
12711
12712 N (input) INTEGER
12713 The dimension of the symmetric tridiagonal matrix. N >= 0.
12714
12715 D (input/output) REAL array, dimension (N)
12716 On entry, the eigenvalues of the rank-1-perturbed matrix.
12717 On exit, the eigenvalues of the repaired matrix.
12718
12719 Q (input/output) REAL array, dimension (LDQ,N)
12720 On entry, the eigenvectors of the rank-1-perturbed matrix.
12721 On exit, the eigenvectors of the repaired tridiagonal matrix.
12722
12723 LDQ (input) INTEGER
12724 The leading dimension of the array Q. LDQ >= max(1,N).
12725
12726 INDXQ (input/output) INTEGER array, dimension (N)
12727 On entry, the permutation which separately sorts the two
12728 subproblems in D into ascending order.
12729 On exit, the permutation which will reintegrate the
12730 subproblems back into sorted order,
12731 i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
12732
12733 RHO (input) REAL
12734 The subdiagonal entry used to create the rank-1 modification.
12735
12736 CUTPNT (input) INTEGER
12737 The location of the last eigenvalue in the leading sub-matrix.
12738 min(1,N) <= CUTPNT <= N/2.
12739
12740 WORK (workspace) REAL array, dimension (4*N + N**2)
12741
12742 IWORK (workspace) INTEGER array, dimension (4*N)
12743
12744 INFO (output) INTEGER
12745 = 0: successful exit.
12746 < 0: if INFO = -i, the i-th argument had an illegal value.
12747 > 0: if INFO = 1, an eigenvalue did not converge
12748
12749 Further Details
12750 ===============
12751
12752 Based on contributions by
12753 Jeff Rutter, Computer Science Division, University of California
12754 at Berkeley, USA
12755 Modified by Francoise Tisseur, University of Tennessee.
12756
12757 =====================================================================
12758
12759
12760 Test the input parameters.
12761
12762 Parameter adjustments */
12763 /* Table of constant values */
12764 static integer c__1 = 1;
12765 static integer c_n1 = -1;
12766
12767 /* System generated locals */
12768 integer q_dim1, q_offset, i__1, i__2;
12769 /* Local variables */
12770 static integer indx, i__, k, indxc, indxp;
12771 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
12772 integer *);
12773 static integer n1, n2;
12774 extern /* Subroutine */ int slaed2_(integer *, integer *, integer *, real
12775 *, real *, integer *, integer *, real *, real *, real *, real *,
12776 real *, integer *, integer *, integer *, integer *, integer *),
12777 slaed3_(integer *, integer *, integer *, real *, real *, integer *
12778 , real *, real *, real *, integer *, integer *, real *, real *,
12779 integer *);
12780 static integer idlmda, is, iw, iz;
12781 extern /* Subroutine */ int xerbla_(const char *, integer *), slamrg_(
12782 integer *, integer *, real *, integer *, integer *, integer *);
12783 static integer coltyp, iq2, cpp1;
12784 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
12785
12786
12787 --d__;
12788 q_dim1 = *ldq;
12789 q_offset = 1 + q_dim1 * 1;
12790 q -= q_offset;
12791 --indxq;
12792 --work;
12793 --iwork;
12794
12795 /* Function Body */
12796 *info = 0;
12797
12798 if (*n < 0) {
12799 *info = -1;
12800 } else if (*ldq < f2cmax(1,*n)) {
12801 *info = -4;
12802 } else /* if(complicated condition) */ {
12803 /* Computing MIN */
12804 i__1 = 1, i__2 = *n / 2;
12805 if (f2cmin(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
12806 *info = -7;
12807 }
12808 }
12809 if (*info != 0) {
12810 i__1 = -(*info);
12811 xerbla_("SLAED1", &i__1);
12812 return 0;
12813 }
12814
12815 /* Quick return if possible */
12816
12817 if (*n == 0) {
12818 return 0;
12819 }
12820
12821 /* The following values are integer pointers which indicate
12822 the portion of the workspace
12823 used by a particular array in SLAED2 and SLAED3. */
12824
12825 iz = 1;
12826 idlmda = iz + *n;
12827 iw = idlmda + *n;
12828 iq2 = iw + *n;
12829
12830 indx = 1;
12831 indxc = indx + *n;
12832 coltyp = indxc + *n;
12833 indxp = coltyp + *n;
12834
12835
12836 /* Form the z-vector which consists of the last row of Q_1 and the
12837 first row of Q_2. */
12838
12839 scopy_(cutpnt, &q_ref(*cutpnt, 1), ldq, &work[iz], &c__1);
12840 cpp1 = *cutpnt + 1;
12841 i__1 = *n - *cutpnt;
12842 scopy_(&i__1, &q_ref(cpp1, cpp1), ldq, &work[iz + *cutpnt], &c__1);
12843
12844 /* Deflate eigenvalues. */
12845
12846 slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
12847 iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
12848 indxc], &iwork[indxp], &iwork[coltyp], info);
12849
12850 if (*info != 0) {
12851 goto L20;
12852 }
12853
12854 /* Solve Secular Equation. */
12855
12856 if (k != 0) {
12857 is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
12858 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
12859 slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
12860 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
12861 is], info);
12862 if (*info != 0) {
12863 goto L20;
12864 }
12865
12866 /* Prepare the INDXQ sorting permutation. */
12867
12868 n1 = k;
12869 n2 = *n - k;
12870 slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
12871 } else {
12872 i__1 = *n;
12873 for (i__ = 1; i__ <= i__1; ++i__) {
12874 indxq[i__] = i__;
12875 /* L10: */
12876 }
12877 }
12878
12879 L20:
12880 return 0;
12881
12882 /* End of SLAED1 */
12883
12884 } /* slaed1_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 13569 of file lapackblas.cpp. References c__1, c_b3, dabs, df2cmax, doublereal, f2cmax, f2cmin, integer, isamax_(), q, q_ref, real, scopy_(), slacpy_(), slamch_(), slamrg_(), slapy2_(), sqrt(), srot_(), sscal_(), t, and xerbla_(). Referenced by slaed1_(). 13573 {
13574 /* System generated locals */
13575 integer q_dim1, q_offset, i__1, i__2;
13576 real r__1, r__2, r__3, r__4;
13577
13578 /* Builtin functions */
13579 // double sqrt(doublereal);
13580
13581 /* Local variables */
13582 static integer imax, jmax, ctot[4];
13583 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
13584 integer *, real *, real *);
13585 static real c__;
13586 static integer i__, j;
13587 static real s, t;
13588 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
13589 static integer k2;
13590 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
13591 integer *);
13592 static integer n2;
13593 extern doublereal slapy2_(real *, real *);
13594 static integer ct, nj, pj, js;
13595 extern doublereal slamch_(const char *);
13596 extern /* Subroutine */ int xerbla_(const char *, integer *);
13597 extern integer isamax_(integer *, real *, integer *);
13598 extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
13599 *, integer *, integer *), slacpy_(const char *, integer *, integer *,
13600 real *, integer *, real *, integer *);
13601 static integer iq1, iq2, n1p1;
13602 static real eps, tau, tol;
13603 static integer psm[4];
13604
13605
13606 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
13607
13608
13609 /* -- LAPACK routine (version 3.0) --
13610 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
13611 Courant Institute, Argonne National Lab, and Rice University
13612 October 31, 1999
13613
13614
13615 Purpose
13616 =======
13617
13618 SLAED2 merges the two sets of eigenvalues together into a single
13619 sorted set. Then it tries to deflate the size of the problem.
13620 There are two ways in which deflation can occur: when two or more
13621 eigenvalues are close together or if there is a tiny entry in the
13622 Z vector. For each such occurrence the order of the related secular
13623 equation problem is reduced by one.
13624
13625 Arguments
13626 =========
13627
13628 K (output) INTEGER
13629 The number of non-deflated eigenvalues, and the order of the
13630 related secular equation. 0 <= K <=N.
13631
13632 N (input) INTEGER
13633 The dimension of the symmetric tridiagonal matrix. N >= 0.
13634
13635 N1 (input) INTEGER
13636 The location of the last eigenvalue in the leading sub-matrix.
13637 f2cmin(1,N) <= N1 <= N/2.
13638
13639 D (input/output) REAL array, dimension (N)
13640 On entry, D contains the eigenvalues of the two submatrices to
13641 be combined.
13642 On exit, D contains the trailing (N-K) updated eigenvalues
13643 (those which were deflated) sorted into increasing order.
13644
13645 Q (input/output) REAL array, dimension (LDQ, N)
13646 On entry, Q contains the eigenvectors of two submatrices in
13647 the two square blocks with corners at (1,1), (N1,N1)
13648 and (N1+1, N1+1), (N,N).
13649 On exit, Q contains the trailing (N-K) updated eigenvectors
13650 (those which were deflated) in its last N-K columns.
13651
13652 LDQ (input) INTEGER
13653 The leading dimension of the array Q. LDQ >= max(1,N).
13654
13655 INDXQ (input/output) INTEGER array, dimension (N)
13656 The permutation which separately sorts the two sub-problems
13657 in D into ascending order. Note that elements in the second
13658 half of this permutation must first have N1 added to their
13659 values. Destroyed on exit.
13660
13661 RHO (input/output) REAL
13662 On entry, the off-diagonal element associated with the rank-1
13663 cut which originally split the two submatrices which are now
13664 being recombined.
13665 On exit, RHO has been modified to the value required by
13666 SLAED3.
13667
13668 Z (input) REAL array, dimension (N)
13669 On entry, Z contains the updating vector (the last
13670 row of the first sub-eigenvector matrix and the first row of
13671 the second sub-eigenvector matrix).
13672 On exit, the contents of Z have been destroyed by the updating
13673 process.
13674
13675 DLAMDA (output) REAL array, dimension (N)
13676 A copy of the first K eigenvalues which will be used by
13677 SLAED3 to form the secular equation.
13678
13679 W (output) REAL array, dimension (N)
13680 The first k values of the final deflation-altered z-vector
13681 which will be passed to SLAED3.
13682
13683 Q2 (output) REAL array, dimension (N1**2+(N-N1)**2)
13684 A copy of the first K eigenvectors which will be used by
13685 SLAED3 in a matrix multiply (SGEMM) to solve for the new
13686 eigenvectors.
13687
13688 INDX (workspace) INTEGER array, dimension (N)
13689 The permutation used to sort the contents of DLAMDA into
13690 ascending order.
13691
13692 INDXC (output) INTEGER array, dimension (N)
13693 The permutation used to arrange the columns of the deflated
13694 Q matrix into three groups: the first group contains non-zero
13695 elements only at and above N1, the second contains
13696 non-zero elements only below N1, and the third is dense.
13697
13698 INDXP (workspace) INTEGER array, dimension (N)
13699 The permutation used to place deflated values of D at the end
13700 of the array. INDXP(1:K) points to the nondeflated D-values
13701 and INDXP(K+1:N) points to the deflated eigenvalues.
13702
13703 COLTYP (workspace/output) INTEGER array, dimension (N)
13704 During execution, a label which will indicate which of the
13705 following types a column in the Q2 matrix is:
13706 1 : non-zero in the upper half only;
13707 2 : dense;
13708 3 : non-zero in the lower half only;
13709 4 : deflated.
13710 On exit, COLTYP(i) is the number of columns of type i,
13711 for i=1 to 4 only.
13712
13713 INFO (output) INTEGER
13714 = 0: successful exit.
13715 < 0: if INFO = -i, the i-th argument had an illegal value.
13716
13717 Further Details
13718 ===============
13719
13720 Based on contributions by
13721 Jeff Rutter, Computer Science Division, University of California
13722 at Berkeley, USA
13723 Modified by Francoise Tisseur, University of Tennessee.
13724
13725 =====================================================================
13726
13727
13728 Test the input parameters.
13729
13730 Parameter adjustments */
13731 --d__;
13732 q_dim1 = *ldq;
13733 q_offset = 1 + q_dim1 * 1;
13734 q -= q_offset;
13735 --indxq;
13736 --z__;
13737 --dlamda;
13738 --w;
13739 --q2;
13740 --indx;
13741 --indxc;
13742 --indxp;
13743 --coltyp;
13744
13745 /* Function Body */
13746 *info = 0;
13747
13748 if (*n < 0) {
13749 *info = -2;
13750 } else if (*ldq < f2cmax(1,*n)) {
13751 *info = -6;
13752 } else /* if(complicated condition) */ {
13753 /* Computing F2CMIN */
13754 i__1 = 1, i__2 = *n / 2;
13755 if (f2cmin(i__1,i__2) > *n1 || *n / 2 < *n1) {
13756 *info = -3;
13757 }
13758 }
13759 if (*info != 0) {
13760 i__1 = -(*info);
13761 xerbla_("SLAED2", &i__1);
13762 return 0;
13763 }
13764
13765 /* Quick return if possible */
13766
13767 if (*n == 0) {
13768 return 0;
13769 }
13770
13771 n2 = *n - *n1;
13772 n1p1 = *n1 + 1;
13773
13774 if (*rho < 0.f) {
13775 sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
13776 }
13777
13778 /* Normalize z so that norm(z) = 1. Since z is the concatenation of
13779 two normalized vectors, norm2(z) = sqrt(2). */
13780
13781 t = 1.f / sqrt(2.f);
13782 sscal_(n, &t, &z__[1], &c__1);
13783
13784 /* RHO = ABS( norm(z)**2 * RHO ) */
13785
13786 *rho = (r__1 = *rho * 2.f, dabs(r__1));
13787
13788 /* Sort the eigenvalues into increasing order */
13789
13790 i__1 = *n;
13791 for (i__ = n1p1; i__ <= i__1; ++i__) {
13792 indxq[i__] += *n1;
13793 /* L10: */
13794 }
13795
13796 /* re-integrate the deflated parts from the last pass */
13797
13798 i__1 = *n;
13799 for (i__ = 1; i__ <= i__1; ++i__) {
13800 dlamda[i__] = d__[indxq[i__]];
13801 /* L20: */
13802 }
13803 slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
13804 i__1 = *n;
13805 for (i__ = 1; i__ <= i__1; ++i__) {
13806 indx[i__] = indxq[indxc[i__]];
13807 /* L30: */
13808 }
13809
13810 /* Calculate the allowable deflation tolerance */
13811
13812 imax = isamax_(n, &z__[1], &c__1);
13813 jmax = isamax_(n, &d__[1], &c__1);
13814 eps = slamch_("Epsilon");
13815 /* Computing MAX */
13816 r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
13817 r__2));
13818 tol = eps * 8.f * df2cmax(r__3,r__4);
13819
13820 /* If the rank-1 modifier is small enough, no more needs to be done
13821 except to reorganize Q so that its columns correspond with the
13822 elements in D. */
13823
13824 if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
13825 *k = 0;
13826 iq2 = 1;
13827 i__1 = *n;
13828 for (j = 1; j <= i__1; ++j) {
13829 i__ = indx[j];
13830 scopy_(n, &q_ref(1, i__), &c__1, &q2[iq2], &c__1);
13831 dlamda[j] = d__[i__];
13832 iq2 += *n;
13833 /* L40: */
13834 }
13835 slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
13836 scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
13837 goto L190;
13838 }
13839
13840 /* If there are multiple eigenvalues then the problem deflates. Here
13841 the number of equal eigenvalues are found. As each equal
13842 eigenvalue is found, an elementary reflector is computed to rotate
13843 the corresponding eigensubspace so that the corresponding
13844 components of Z are zero in this new basis. */
13845
13846 i__1 = *n1;
13847 for (i__ = 1; i__ <= i__1; ++i__) {
13848 coltyp[i__] = 1;
13849 /* L50: */
13850 }
13851 i__1 = *n;
13852 for (i__ = n1p1; i__ <= i__1; ++i__) {
13853 coltyp[i__] = 3;
13854 /* L60: */
13855 }
13856
13857
13858 *k = 0;
13859 k2 = *n + 1;
13860 i__1 = *n;
13861 for (j = 1; j <= i__1; ++j) {
13862 nj = indx[j];
13863 if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
13864
13865 /* Deflate due to small z component. */
13866
13867 --k2;
13868 coltyp[nj] = 4;
13869 indxp[k2] = nj;
13870 if (j == *n) {
13871 goto L100;
13872 }
13873 } else {
13874 pj = nj;
13875 goto L80;
13876 }
13877 /* L70: */
13878 }
13879 L80:
13880 ++j;
13881 nj = indx[j];
13882 if (j > *n) {
13883 goto L100;
13884 }
13885 if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
13886
13887 /* Deflate due to small z component. */
13888
13889 --k2;
13890 coltyp[nj] = 4;
13891 indxp[k2] = nj;
13892 } else {
13893
13894 /* Check if eigenvalues are close enough to allow deflation. */
13895
13896 s = z__[pj];
13897 c__ = z__[nj];
13898
13899 /* Find sqrt(a**2+b**2) without overflow or
13900 destructive underflow. */
13901
13902 tau = slapy2_(&c__, &s);
13903 t = d__[nj] - d__[pj];
13904 c__ /= tau;
13905 s = -s / tau;
13906 if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
13907
13908 /* Deflation is possible. */
13909
13910 z__[nj] = tau;
13911 z__[pj] = 0.f;
13912 if (coltyp[nj] != coltyp[pj]) {
13913 coltyp[nj] = 2;
13914 }
13915 coltyp[pj] = 4;
13916 srot_(n, &q_ref(1, pj), &c__1, &q_ref(1, nj), &c__1, &c__, &s);
13917 /* Computing 2nd power */
13918 r__1 = c__;
13919 /* Computing 2nd power */
13920 r__2 = s;
13921 t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
13922 /* Computing 2nd power */
13923 r__1 = s;
13924 /* Computing 2nd power */
13925 r__2 = c__;
13926 d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
13927 d__[pj] = t;
13928 --k2;
13929 i__ = 1;
13930 L90:
13931 if (k2 + i__ <= *n) {
13932 if (d__[pj] < d__[indxp[k2 + i__]]) {
13933 indxp[k2 + i__ - 1] = indxp[k2 + i__];
13934 indxp[k2 + i__] = pj;
13935 ++i__;
13936 goto L90;
13937 } else {
13938 indxp[k2 + i__ - 1] = pj;
13939 }
13940 } else {
13941 indxp[k2 + i__ - 1] = pj;
13942 }
13943 pj = nj;
13944 } else {
13945 ++(*k);
13946 dlamda[*k] = d__[pj];
13947 w[*k] = z__[pj];
13948 indxp[*k] = pj;
13949 pj = nj;
13950 }
13951 }
13952 goto L80;
13953 L100:
13954
13955 /* Record the last eigenvalue. */
13956
13957 ++(*k);
13958 dlamda[*k] = d__[pj];
13959 w[*k] = z__[pj];
13960 indxp[*k] = pj;
13961
13962 /* Count up the total number of the various types of columns, then
13963 form a permutation which positions the four column types into
13964 four uniform groups (although one or more of these groups may be
13965 empty). */
13966
13967 for (j = 1; j <= 4; ++j) {
13968 ctot[j - 1] = 0;
13969 /* L110: */
13970 }
13971 i__1 = *n;
13972 for (j = 1; j <= i__1; ++j) {
13973 ct = coltyp[j];
13974 ++ctot[ct - 1];
13975 /* L120: */
13976 }
13977
13978 /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
13979
13980 psm[0] = 1;
13981 psm[1] = ctot[0] + 1;
13982 psm[2] = psm[1] + ctot[1];
13983 psm[3] = psm[2] + ctot[2];
13984 *k = *n - ctot[3];
13985
13986 /* Fill out the INDXC array so that the permutation which it induces
13987 will place all type-1 columns first, all type-2 columns next,
13988 then all type-3's, and finally all type-4's. */
13989
13990 i__1 = *n;
13991 for (j = 1; j <= i__1; ++j) {
13992 js = indxp[j];
13993 ct = coltyp[js];
13994 indx[psm[ct - 1]] = js;
13995 indxc[psm[ct - 1]] = j;
13996 ++psm[ct - 1];
13997 /* L130: */
13998 }
13999
14000 /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
14001 and Q2 respectively. The eigenvalues/vectors which were not
14002 deflated go into the first K slots of DLAMDA and Q2 respectively,
14003 while those which were deflated go into the last N - K slots. */
14004
14005 i__ = 1;
14006 iq1 = 1;
14007 iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
14008 i__1 = ctot[0];
14009 for (j = 1; j <= i__1; ++j) {
14010 js = indx[i__];
14011 scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1);
14012 z__[i__] = d__[js];
14013 ++i__;
14014 iq1 += *n1;
14015 /* L140: */
14016 }
14017
14018 i__1 = ctot[1];
14019 for (j = 1; j <= i__1; ++j) {
14020 js = indx[i__];
14021 scopy_(n1, &q_ref(1, js), &c__1, &q2[iq1], &c__1);
14022 scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1);
14023 z__[i__] = d__[js];
14024 ++i__;
14025 iq1 += *n1;
14026 iq2 += n2;
14027 /* L150: */
14028 }
14029
14030 i__1 = ctot[2];
14031 for (j = 1; j <= i__1; ++j) {
14032 js = indx[i__];
14033 scopy_(&n2, &q_ref(*n1 + 1, js), &c__1, &q2[iq2], &c__1);
14034 z__[i__] = d__[js];
14035 ++i__;
14036 iq2 += n2;
14037 /* L160: */
14038 }
14039
14040 iq1 = iq2;
14041 i__1 = ctot[3];
14042 for (j = 1; j <= i__1; ++j) {
14043 js = indx[i__];
14044 scopy_(n, &q_ref(1, js), &c__1, &q2[iq2], &c__1);
14045 iq2 += *n;
14046 z__[i__] = d__[js];
14047 ++i__;
14048 /* L170: */
14049 }
14050
14051 /* The deflated eigenvalues and their corresponding vectors go back
14052 into the last N - K slots of D and Q respectively. */
14053
14054 slacpy_("A", n, &ctot[3], &q2[iq1], n, &q_ref(1, *k + 1), ldq);
14055 i__1 = *n - *k;
14056 scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
14057
14058 /* Copy CTOT into COLTYP for referencing in SLAED3. */
14059
14060 for (j = 1; j <= 4; ++j) {
14061 coltyp[j] = ctot[j - 1];
14062 /* L180: */
14063 }
14064
14065 L190:
14066 return 0;
14067
14068 /* End of SLAED2 */
14069
14070 } /* slaed2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 15616 of file lapackblas.cpp. References c__1, doublereal, f2cmax, integer, q, q_ref, r_sign(), real, scopy_(), sgemm_(), slacpy_(), slaed4_(), slamc3_(), slaset_(), snrm2_(), sqrt(), and xerbla_(). Referenced by slaed1_(). 15619 {
15620 /* -- LAPACK routine (version 3.0) --
15621 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
15622 Courant Institute, NAG Ltd., and Rice University
15623 June 30, 1999
15624
15625
15626 Purpose
15627 =======
15628
15629 SLAED3 finds the roots of the secular equation, as defined by the
15630 values in D, W, and RHO, between 1 and K. It makes the
15631 appropriate calls to SLAED4 and then updates the eigenvectors by
15632 multiplying the matrix of eigenvectors of the pair of eigensystems
15633 being combined by the matrix of eigenvectors of the K-by-K system
15634 which is solved here.
15635
15636 This code makes very mild assumptions about floating point
15637 arithmetic. It will work on machines with a guard digit in
15638 add/subtract, or on those binary machines without guard digits
15639 which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
15640 It could conceivably fail on hexadecimal or decimal machines
15641 without guard digits, but we know of none.
15642
15643 Arguments
15644 =========
15645
15646 K (input) INTEGER
15647 The number of terms in the rational function to be solved by
15648 SLAED4. K >= 0.
15649
15650 N (input) INTEGER
15651 The number of rows and columns in the Q matrix.
15652 N >= K (deflation may result in N>K).
15653
15654 N1 (input) INTEGER
15655 The location of the last eigenvalue in the leading submatrix.
15656 min(1,N) <= N1 <= N/2.
15657
15658 D (output) REAL array, dimension (N)
15659 D(I) contains the updated eigenvalues for
15660 1 <= I <= K.
15661
15662 Q (output) REAL array, dimension (LDQ,N)
15663 Initially the first K columns are used as workspace.
15664 On output the columns 1 to K contain
15665 the updated eigenvectors.
15666
15667 LDQ (input) INTEGER
15668 The leading dimension of the array Q. LDQ >= max(1,N).
15669
15670 RHO (input) REAL
15671 The value of the parameter in the rank one update equation.
15672 RHO >= 0 required.
15673
15674 DLAMDA (input/output) REAL array, dimension (K)
15675 The first K elements of this array contain the old roots
15676 of the deflated updating problem. These are the poles
15677 of the secular equation. May be changed on output by
15678 having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
15679 Cray-2, or Cray C-90, as described above.
15680
15681 Q2 (input) REAL array, dimension (LDQ2, N)
15682 The first K columns of this matrix contain the non-deflated
15683 eigenvectors for the split problem.
15684
15685 INDX (input) INTEGER array, dimension (N)
15686 The permutation used to arrange the columns of the deflated
15687 Q matrix into three groups (see SLAED2).
15688 The rows of the eigenvectors found by SLAED4 must be likewise
15689 permuted before the matrix multiply can take place.
15690
15691 CTOT (input) INTEGER array, dimension (4)
15692 A count of the total number of the various types of columns
15693 in Q, as described in INDX. The fourth column type is any
15694 column which has been deflated.
15695
15696 W (input/output) REAL array, dimension (K)
15697 The first K elements of this array contain the components
15698 of the deflation-adjusted updating vector. Destroyed on
15699 output.
15700
15701 S (workspace) REAL array, dimension (N1 + 1)*K
15702 Will contain the eigenvectors of the repaired matrix which
15703 will be multiplied by the previously accumulated eigenvectors
15704 to update the system.
15705
15706 LDS (input) INTEGER
15707 The leading dimension of S. LDS >= max(1,K).
15708
15709 INFO (output) INTEGER
15710 = 0: successful exit.
15711 < 0: if INFO = -i, the i-th argument had an illegal value.
15712 > 0: if INFO = 1, an eigenvalue did not converge
15713
15714 Further Details
15715 ===============
15716
15717 Based on contributions by
15718 Jeff Rutter, Computer Science Division, University of California
15719 at Berkeley, USA
15720 Modified by Francoise Tisseur, University of Tennessee.
15721
15722 =====================================================================
15723
15724
15725 Test the input parameters.
15726
15727 Parameter adjustments */
15728 /* Table of constant values */
15729 static integer c__1 = 1;
15730 static real c_b22 = 1.f;
15731 static real c_b23 = 0.f;
15732
15733 /* System generated locals */
15734 integer q_dim1, q_offset, i__1, i__2;
15735 real r__1;
15736 /* Builtin functions */
15737 // double sqrt(doublereal), r_sign(real *, real *);
15738 /* Local variables */
15739 static real temp;
15740 extern doublereal snrm2_(integer *, real *, integer *);
15741 static integer i__, j;
15742 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
15743 integer *, real *, real *, integer *, real *, integer *, real *,
15744 real *, integer *), scopy_(integer *, real *,
15745 integer *, real *, integer *);
15746 static integer n2;
15747 extern /* Subroutine */ int slaed4_(integer *, integer *, real *, real *,
15748 real *, real *, real *, integer *);
15749 extern doublereal slamc3_(real *, real *);
15750 static integer n12, ii, n23;
15751 extern /* Subroutine */ int xerbla_(const char *, integer *), slacpy_(
15752 const char *, integer *, integer *, real *, integer *, real *, integer *
15753 ), slaset_(const char *, integer *, integer *, real *, real *,
15754 real *, integer *);
15755 static integer iq2;
15756 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
15757
15758
15759 --d__;
15760 q_dim1 = *ldq;
15761 q_offset = 1 + q_dim1 * 1;
15762 q -= q_offset;
15763 --dlamda;
15764 --q2;
15765 --indx;
15766 --ctot;
15767 --w;
15768 --s;
15769
15770 /* Function Body */
15771 *info = 0;
15772
15773 if (*k < 0) {
15774 *info = -1;
15775 } else if (*n < *k) {
15776 *info = -2;
15777 } else if (*ldq < f2cmax(1,*n)) {
15778 *info = -6;
15779 }
15780 if (*info != 0) {
15781 i__1 = -(*info);
15782 xerbla_("SLAED3", &i__1);
15783 return 0;
15784 }
15785
15786 /* Quick return if possible */
15787
15788 if (*k == 0) {
15789 return 0;
15790 }
15791
15792 /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
15793 be computed with high relative accuracy (barring over/underflow).
15794 This is a problem on machines without a guard digit in
15795 add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
15796 The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
15797 which on any of these machines zeros out the bottommost
15798 bit of DLAMDA(I) if it is 1; this makes the subsequent
15799 subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
15800 occurs. On binary machines with a guard digit (almost all
15801 machines) it does not change DLAMDA(I) at all. On hexadecimal
15802 and decimal machines with a guard digit, it slightly
15803 changes the bottommost bits of DLAMDA(I). It does not account
15804 for hexadecimal or decimal machines without guard digits
15805 (we know of none). We use a subroutine call to compute
15806 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
15807 this code. */
15808
15809 i__1 = *k;
15810 for (i__ = 1; i__ <= i__1; ++i__) {
15811 dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
15812 /* L10: */
15813 }
15814
15815 i__1 = *k;
15816 for (j = 1; j <= i__1; ++j) {
15817 slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info);
15818
15819 /* If the zero finder fails, the computation is terminated. */
15820
15821 if (*info != 0) {
15822 goto L120;
15823 }
15824 /* L20: */
15825 }
15826
15827 if (*k == 1) {
15828 goto L110;
15829 }
15830 if (*k == 2) {
15831 i__1 = *k;
15832 for (j = 1; j <= i__1; ++j) {
15833 w[1] = q_ref(1, j);
15834 w[2] = q_ref(2, j);
15835 ii = indx[1];
15836 q_ref(1, j) = w[ii];
15837 ii = indx[2];
15838 q_ref(2, j) = w[ii];
15839 /* L30: */
15840 }
15841 goto L110;
15842 }
15843
15844 /* Compute updated W. */
15845
15846 scopy_(k, &w[1], &c__1, &s[1], &c__1);
15847
15848 /* Initialize W(I) = Q(I,I) */
15849
15850 i__1 = *ldq + 1;
15851 scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
15852 i__1 = *k;
15853 for (j = 1; j <= i__1; ++j) {
15854 i__2 = j - 1;
15855 for (i__ = 1; i__ <= i__2; ++i__) {
15856 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
15857 /* L40: */
15858 }
15859 i__2 = *k;
15860 for (i__ = j + 1; i__ <= i__2; ++i__) {
15861 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
15862 /* L50: */
15863 }
15864 /* L60: */
15865 }
15866 i__1 = *k;
15867 for (i__ = 1; i__ <= i__1; ++i__) {
15868 r__1 = sqrt(-w[i__]);
15869 w[i__] = r_sign(&r__1, &s[i__]);
15870 /* L70: */
15871 }
15872
15873 /* Compute eigenvectors of the modified rank-1 modification. */
15874
15875 i__1 = *k;
15876 for (j = 1; j <= i__1; ++j) {
15877 i__2 = *k;
15878 for (i__ = 1; i__ <= i__2; ++i__) {
15879 s[i__] = w[i__] / q_ref(i__, j);
15880 /* L80: */
15881 }
15882 temp = snrm2_(k, &s[1], &c__1);
15883 i__2 = *k;
15884 for (i__ = 1; i__ <= i__2; ++i__) {
15885 ii = indx[i__];
15886 q_ref(i__, j) = s[ii] / temp;
15887 /* L90: */
15888 }
15889 /* L100: */
15890 }
15891
15892 /* Compute the updated eigenvectors. */
15893
15894 L110:
15895
15896 n2 = *n - *n1;
15897 n12 = ctot[1] + ctot[2];
15898 n23 = ctot[2] + ctot[3];
15899
15900 slacpy_("A", &n23, k, &q_ref(ctot[1] + 1, 1), ldq, &s[1], &n23)
15901 ;
15902 iq2 = *n1 * n12 + 1;
15903 if (n23 != 0) {
15904 sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &
15905 c_b23, &q_ref(*n1 + 1, 1), ldq);
15906 } else {
15907 slaset_("A", &n2, k, &c_b23, &c_b23, &q_ref(*n1 + 1, 1), ldq);
15908 }
15909
15910 slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
15911 if (n12 != 0) {
15912 sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23,
15913 &q[q_offset], ldq);
15914 } else {
15915 slaset_("A", n1, k, &c_b23, &c_b23, &q_ref(1, 1), ldq);
15916 }
15917
15918
15919 L120:
15920 return 0;
15921
15922 /* End of SLAED3 */
15923
15924 } /* slaed3_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 14435 of file lapackblas.cpp. References b, dabs, df2cmax, df2cmin, doublereal, integer, logical, phi, real, slaed5_(), slaed6_(), slamch_(), and sqrt(). Referenced by slaed3_(), and slaed9_(). 14437 {
14438 /* -- LAPACK routine (version 3.0) --
14439 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
14440 Courant Institute, NAG Ltd., and Rice University
14441 December 23, 1999
14442
14443
14444 Purpose
14445 =======
14446
14447 This subroutine computes the I-th updated eigenvalue of a symmetric
14448 rank-one modification to a diagonal matrix whose elements are
14449 given in the array d, and that
14450
14451 D(i) < D(j) for i < j
14452
14453 and that RHO > 0. This is arranged by the calling routine, and is
14454 no loss in generality. The rank-one modified system is thus
14455
14456 diag( D ) + RHO * Z * Z_transpose.
14457
14458 where we assume the Euclidean norm of Z is 1.
14459
14460 The method consists of approximating the rational functions in the
14461 secular equation by simpler interpolating rational functions.
14462
14463 Arguments
14464 =========
14465
14466 N (input) INTEGER
14467 The length of all arrays.
14468
14469 I (input) INTEGER
14470 The index of the eigenvalue to be computed. 1 <= I <= N.
14471
14472 D (input) REAL array, dimension (N)
14473 The original eigenvalues. It is assumed that they are in
14474 order, D(I) < D(J) for I < J.
14475
14476 Z (input) REAL array, dimension (N)
14477 The components of the updating vector.
14478
14479 DELTA (output) REAL array, dimension (N)
14480 If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th
14481 component. If N = 1, then DELTA(1) = 1. The vector DELTA
14482 contains the information necessary to construct the
14483 eigenvectors.
14484
14485 RHO (input) REAL
14486 The scalar in the symmetric updating formula.
14487
14488 DLAM (output) REAL
14489 The computed lambda_I, the I-th updated eigenvalue.
14490
14491 INFO (output) INTEGER
14492 = 0: successful exit
14493 > 0: if INFO = 1, the updating process failed.
14494
14495 Internal Parameters
14496 ===================
14497
14498 Logical variable ORGATI (origin-at-i?) is used for distinguishing
14499 whether D(i) or D(i+1) is treated as the origin.
14500
14501 ORGATI = .true. origin at i
14502 ORGATI = .false. origin at i+1
14503
14504 Logical variable SWTCH3 (switch-for-3-poles?) is for noting
14505 if we are working with THREE poles!
14506
14507 MAXIT is the maximum number of iterations allowed for each
14508 eigenvalue.
14509
14510 Further Details
14511 ===============
14512
14513 Based on contributions by
14514 Ren-Cang Li, Computer Science Division, University of California
14515 at Berkeley, USA
14516
14517 =====================================================================
14518
14519
14520 Since this routine is called in an inner loop, we do no argument
14521 checking.
14522
14523 Quick return for N=1 and 2.
14524
14525 Parameter adjustments */
14526 /* System generated locals */
14527 integer i__1;
14528 real r__1;
14529 /* Builtin functions */
14530 // double sqrt(doublereal);
14531 /* Local variables */
14532 static real dphi, dpsi;
14533 static integer iter;
14534 static real temp, prew, temp1, a, b, c__;
14535 static integer j;
14536 static real w, dltlb, dltub, midpt;
14537 static integer niter;
14538 static logical swtch;
14539 extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *,
14540 real *, real *), slaed6_(integer *, logical *, real *, real *,
14541 real *, real *, real *, integer *);
14542 static logical swtch3;
14543 static integer ii;
14544 static real dw;
14545 extern doublereal slamch_(const char *);
14546 static real zz[3];
14547 static logical orgati;
14548 static real erretm, rhoinv;
14549 static integer ip1;
14550 static real del, eta, phi, eps, tau, psi;
14551 static integer iim1, iip1;
14552
14553 --delta;
14554 --z__;
14555 --d__;
14556
14557 /* Function Body */
14558 *info = 0;
14559 if (*n == 1) {
14560
14561 /* Presumably, I=1 upon entry */
14562
14563 *dlam = d__[1] + *rho * z__[1] * z__[1];
14564 delta[1] = 1.f;
14565 return 0;
14566 }
14567 if (*n == 2) {
14568 slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
14569 return 0;
14570 }
14571
14572 /* Compute machine epsilon */
14573
14574 eps = slamch_("Epsilon");
14575 rhoinv = 1.f / *rho;
14576
14577 /* The case I = N */
14578
14579 if (*i__ == *n) {
14580
14581 /* Initialize some basic variables */
14582
14583 ii = *n - 1;
14584 niter = 1;
14585
14586 /* Calculate initial guess */
14587
14588 midpt = *rho / 2.f;
14589
14590 /* If ||Z||_2 is not one, then TEMP should be set to
14591 RHO * ||Z||_2^2 / TWO */
14592
14593 i__1 = *n;
14594 for (j = 1; j <= i__1; ++j) {
14595 delta[j] = d__[j] - d__[*i__] - midpt;
14596 /* L10: */
14597 }
14598
14599 psi = 0.f;
14600 i__1 = *n - 2;
14601 for (j = 1; j <= i__1; ++j) {
14602 psi += z__[j] * z__[j] / delta[j];
14603 /* L20: */
14604 }
14605
14606 c__ = rhoinv + psi;
14607 w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
14608 n];
14609
14610 if (w <= 0.f) {
14611 temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
14612 + z__[*n] * z__[*n] / *rho;
14613 if (c__ <= temp) {
14614 tau = *rho;
14615 } else {
14616 del = d__[*n] - d__[*n - 1];
14617 a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
14618 ;
14619 b = z__[*n] * z__[*n] * del;
14620 if (a < 0.f) {
14621 tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
14622 } else {
14623 tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
14624 }
14625 }
14626
14627 /* It can be proved that
14628 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */
14629
14630 dltlb = midpt;
14631 dltub = *rho;
14632 } else {
14633 del = d__[*n] - d__[*n - 1];
14634 a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
14635 b = z__[*n] * z__[*n] * del;
14636 if (a < 0.f) {
14637 tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
14638 } else {
14639 tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
14640 }
14641
14642 /* It can be proved that
14643 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */
14644
14645 dltlb = 0.f;
14646 dltub = midpt;
14647 }
14648
14649 i__1 = *n;
14650 for (j = 1; j <= i__1; ++j) {
14651 delta[j] = d__[j] - d__[*i__] - tau;
14652 /* L30: */
14653 }
14654
14655 /* Evaluate PSI and the derivative DPSI */
14656
14657 dpsi = 0.f;
14658 psi = 0.f;
14659 erretm = 0.f;
14660 i__1 = ii;
14661 for (j = 1; j <= i__1; ++j) {
14662 temp = z__[j] / delta[j];
14663 psi += z__[j] * temp;
14664 dpsi += temp * temp;
14665 erretm += psi;
14666 /* L40: */
14667 }
14668 erretm = dabs(erretm);
14669
14670 /* Evaluate PHI and the derivative DPHI */
14671
14672 temp = z__[*n] / delta[*n];
14673 phi = z__[*n] * temp;
14674 dphi = temp * temp;
14675 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
14676 dpsi + dphi);
14677
14678 w = rhoinv + phi + psi;
14679
14680 /* Test for convergence */
14681
14682 if (dabs(w) <= eps * erretm) {
14683 *dlam = d__[*i__] + tau;
14684 goto L250;
14685 }
14686
14687 if (w <= 0.f) {
14688 dltlb = df2cmax(dltlb,tau);
14689 } else {
14690 dltub = df2cmin(dltub,tau);
14691 }
14692
14693 /* Calculate the new step */
14694
14695 ++niter;
14696 c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
14697 a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
14698 dpsi + dphi);
14699 b = delta[*n - 1] * delta[*n] * w;
14700 if (c__ < 0.f) {
14701 c__ = dabs(c__);
14702 }
14703 if (c__ == 0.f) {
14704 /* ETA = B/A
14705 ETA = RHO - TAU */
14706 eta = dltub - tau;
14707 } else if (a >= 0.f) {
14708 eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
14709 c__ * 2.f);
14710 } else {
14711 eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
14712 r__1))));
14713 }
14714
14715 /* Note, eta should be positive if w is negative, and
14716 eta should be negative otherwise. However,
14717 if for some reason caused by roundoff, eta*w > 0,
14718 we simply use one Newton step instead. This way
14719 will guarantee eta*w < 0. */
14720
14721 if (w * eta > 0.f) {
14722 eta = -w / (dpsi + dphi);
14723 }
14724 temp = tau + eta;
14725 if (temp > dltub || temp < dltlb) {
14726 if (w < 0.f) {
14727 eta = (dltub - tau) / 2.f;
14728 } else {
14729 eta = (dltlb - tau) / 2.f;
14730 }
14731 }
14732 i__1 = *n;
14733 for (j = 1; j <= i__1; ++j) {
14734 delta[j] -= eta;
14735 /* L50: */
14736 }
14737
14738 tau += eta;
14739
14740 /* Evaluate PSI and the derivative DPSI */
14741
14742 dpsi = 0.f;
14743 psi = 0.f;
14744 erretm = 0.f;
14745 i__1 = ii;
14746 for (j = 1; j <= i__1; ++j) {
14747 temp = z__[j] / delta[j];
14748 psi += z__[j] * temp;
14749 dpsi += temp * temp;
14750 erretm += psi;
14751 /* L60: */
14752 }
14753 erretm = dabs(erretm);
14754
14755 /* Evaluate PHI and the derivative DPHI */
14756
14757 temp = z__[*n] / delta[*n];
14758 phi = z__[*n] * temp;
14759 dphi = temp * temp;
14760 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
14761 dpsi + dphi);
14762
14763 w = rhoinv + phi + psi;
14764
14765 /* Main loop to update the values of the array DELTA */
14766
14767 iter = niter + 1;
14768
14769 for (niter = iter; niter <= 30; ++niter) {
14770
14771 /* Test for convergence */
14772
14773 if (dabs(w) <= eps * erretm) {
14774 *dlam = d__[*i__] + tau;
14775 goto L250;
14776 }
14777
14778 if (w <= 0.f) {
14779 dltlb = df2cmax(dltlb,tau);
14780 } else {
14781 dltub = df2cmin(dltub,tau);
14782 }
14783
14784 /* Calculate the new step */
14785
14786 c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
14787 a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
14788 (dpsi + dphi);
14789 b = delta[*n - 1] * delta[*n] * w;
14790 if (a >= 0.f) {
14791 eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
14792 (c__ * 2.f);
14793 } else {
14794 eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
14795 r__1))));
14796 }
14797
14798 /* Note, eta should be positive if w is negative, and
14799 eta should be negative otherwise. However,
14800 if for some reason caused by roundoff, eta*w > 0,
14801 we simply use one Newton step instead. This way
14802 will guarantee eta*w < 0. */
14803
14804 if (w * eta > 0.f) {
14805 eta = -w / (dpsi + dphi);
14806 }
14807 temp = tau + eta;
14808 if (temp > dltub || temp < dltlb) {
14809 if (w < 0.f) {
14810 eta = (dltub - tau) / 2.f;
14811 } else {
14812 eta = (dltlb - tau) / 2.f;
14813 }
14814 }
14815 i__1 = *n;
14816 for (j = 1; j <= i__1; ++j) {
14817 delta[j] -= eta;
14818 /* L70: */
14819 }
14820
14821 tau += eta;
14822
14823 /* Evaluate PSI and the derivative DPSI */
14824
14825 dpsi = 0.f;
14826 psi = 0.f;
14827 erretm = 0.f;
14828 i__1 = ii;
14829 for (j = 1; j <= i__1; ++j) {
14830 temp = z__[j] / delta[j];
14831 psi += z__[j] * temp;
14832 dpsi += temp * temp;
14833 erretm += psi;
14834 /* L80: */
14835 }
14836 erretm = dabs(erretm);
14837
14838 /* Evaluate PHI and the derivative DPHI */
14839
14840 temp = z__[*n] / delta[*n];
14841 phi = z__[*n] * temp;
14842 dphi = temp * temp;
14843 erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
14844 (dpsi + dphi);
14845
14846 w = rhoinv + phi + psi;
14847 /* L90: */
14848 }
14849
14850 /* Return with INFO = 1, NITER = MAXIT and not converged */
14851
14852 *info = 1;
14853 *dlam = d__[*i__] + tau;
14854 goto L250;
14855
14856 /* End for the case I = N */
14857
14858 } else {
14859
14860 /* The case for I < N */
14861
14862 niter = 1;
14863 ip1 = *i__ + 1;
14864
14865 /* Calculate initial guess */
14866
14867 del = d__[ip1] - d__[*i__];
14868 midpt = del / 2.f;
14869 i__1 = *n;
14870 for (j = 1; j <= i__1; ++j) {
14871 delta[j] = d__[j] - d__[*i__] - midpt;
14872 /* L100: */
14873 }
14874
14875 psi = 0.f;
14876 i__1 = *i__ - 1;
14877 for (j = 1; j <= i__1; ++j) {
14878 psi += z__[j] * z__[j] / delta[j];
14879 /* L110: */
14880 }
14881
14882 phi = 0.f;
14883 i__1 = *i__ + 2;
14884 for (j = *n; j >= i__1; --j) {
14885 phi += z__[j] * z__[j] / delta[j];
14886 /* L120: */
14887 }
14888 c__ = rhoinv + psi + phi;
14889 w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
14890 delta[ip1];
14891
14892 if (w > 0.f) {
14893
14894 /* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
14895
14896 We choose d(i) as origin. */
14897
14898 orgati = TRUE_;
14899 a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
14900 b = z__[*i__] * z__[*i__] * del;
14901 if (a > 0.f) {
14902 tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
14903 r__1))));
14904 } else {
14905 tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
14906 (c__ * 2.f);
14907 }
14908 dltlb = 0.f;
14909 dltub = midpt;
14910 } else {
14911
14912 /* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
14913
14914 We choose d(i+1) as origin. */
14915
14916 orgati = FALSE_;
14917 a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
14918 b = z__[ip1] * z__[ip1] * del;
14919 if (a < 0.f) {
14920 tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
14921 r__1))));
14922 } else {
14923 tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
14924 / (c__ * 2.f);
14925 }
14926 dltlb = -midpt;
14927 dltub = 0.f;
14928 }
14929
14930 if (orgati) {
14931 i__1 = *n;
14932 for (j = 1; j <= i__1; ++j) {
14933 delta[j] = d__[j] - d__[*i__] - tau;
14934 /* L130: */
14935 }
14936 } else {
14937 i__1 = *n;
14938 for (j = 1; j <= i__1; ++j) {
14939 delta[j] = d__[j] - d__[ip1] - tau;
14940 /* L140: */
14941 }
14942 }
14943 if (orgati) {
14944 ii = *i__;
14945 } else {
14946 ii = *i__ + 1;
14947 }
14948 iim1 = ii - 1;
14949 iip1 = ii + 1;
14950
14951 /* Evaluate PSI and the derivative DPSI */
14952
14953 dpsi = 0.f;
14954 psi = 0.f;
14955 erretm = 0.f;
14956 i__1 = iim1;
14957 for (j = 1; j <= i__1; ++j) {
14958 temp = z__[j] / delta[j];
14959 psi += z__[j] * temp;
14960 dpsi += temp * temp;
14961 erretm += psi;
14962 /* L150: */
14963 }
14964 erretm = dabs(erretm);
14965
14966 /* Evaluate PHI and the derivative DPHI */
14967
14968 dphi = 0.f;
14969 phi = 0.f;
14970 i__1 = iip1;
14971 for (j = *n; j >= i__1; --j) {
14972 temp = z__[j] / delta[j];
14973 phi += z__[j] * temp;
14974 dphi += temp * temp;
14975 erretm += phi;
14976 /* L160: */
14977 }
14978
14979 w = rhoinv + phi + psi;
14980
14981 /* W is the value of the secular function with
14982 its ii-th element removed. */
14983
14984 swtch3 = FALSE_;
14985 if (orgati) {
14986 if (w < 0.f) {
14987 swtch3 = TRUE_;
14988 }
14989 } else {
14990 if (w > 0.f) {
14991 swtch3 = TRUE_;
14992 }
14993 }
14994 if (ii == 1 || ii == *n) {
14995 swtch3 = FALSE_;
14996 }
14997
14998 temp = z__[ii] / delta[ii];
14999 dw = dpsi + dphi + temp * temp;
15000 temp = z__[ii] * temp;
15001 w += temp;
15002 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
15003 + dabs(tau) * dw;
15004
15005 /* Test for convergence */
15006
15007 if (dabs(w) <= eps * erretm) {
15008 if (orgati) {
15009 *dlam = d__[*i__] + tau;
15010 } else {
15011 *dlam = d__[ip1] + tau;
15012 }
15013 goto L250;
15014 }
15015
15016 if (w <= 0.f) {
15017 dltlb = df2cmax(dltlb,tau);
15018 } else {
15019 dltub = df2cmin(dltub,tau);
15020 }
15021
15022 /* Calculate the new step */
15023
15024 ++niter;
15025 if (! swtch3) {
15026 if (orgati) {
15027 /* Computing 2nd power */
15028 r__1 = z__[*i__] / delta[*i__];
15029 c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 *
15030 r__1);
15031 } else {
15032 /* Computing 2nd power */
15033 r__1 = z__[ip1] / delta[ip1];
15034 c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 *
15035 r__1);
15036 }
15037 a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
15038 dw;
15039 b = delta[*i__] * delta[ip1] * w;
15040 if (c__ == 0.f) {
15041 if (a == 0.f) {
15042 if (orgati) {
15043 a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
15044 (dpsi + dphi);
15045 } else {
15046 a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
15047 (dpsi + dphi);
15048 }
15049 }
15050 eta = b / a;
15051 } else if (a <= 0.f) {
15052 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
15053 (c__ * 2.f);
15054 } else {
15055 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
15056 r__1))));
15057 }
15058 } else {
15059
15060 /* Interpolation using THREE most relevant poles */
15061
15062 temp = rhoinv + psi + phi;
15063 if (orgati) {
15064 temp1 = z__[iim1] / delta[iim1];
15065 temp1 *= temp1;
15066 c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
15067 iip1]) * temp1;
15068 zz[0] = z__[iim1] * z__[iim1];
15069 zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
15070 } else {
15071 temp1 = z__[iip1] / delta[iip1];
15072 temp1 *= temp1;
15073 c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
15074 iim1]) * temp1;
15075 zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
15076 zz[2] = z__[iip1] * z__[iip1];
15077 }
15078 zz[1] = z__[ii] * z__[ii];
15079 slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
15080 if (*info != 0) {
15081 goto L250;
15082 }
15083 }
15084
15085 /* Note, eta should be positive if w is negative, and
15086 eta should be negative otherwise. However,
15087 if for some reason caused by roundoff, eta*w > 0,
15088 we simply use one Newton step instead. This way
15089 will guarantee eta*w < 0. */
15090
15091 if (w * eta >= 0.f) {
15092 eta = -w / dw;
15093 }
15094 temp = tau + eta;
15095 if (temp > dltub || temp < dltlb) {
15096 if (w < 0.f) {
15097 eta = (dltub - tau) / 2.f;
15098 } else {
15099 eta = (dltlb - tau) / 2.f;
15100 }
15101 }
15102
15103 prew = w;
15104
15105 /* L170: */
15106 i__1 = *n;
15107 for (j = 1; j <= i__1; ++j) {
15108 delta[j] -= eta;
15109 /* L180: */
15110 }
15111
15112 /* Evaluate PSI and the derivative DPSI */
15113
15114 dpsi = 0.f;
15115 psi = 0.f;
15116 erretm = 0.f;
15117 i__1 = iim1;
15118 for (j = 1; j <= i__1; ++j) {
15119 temp = z__[j] / delta[j];
15120 psi += z__[j] * temp;
15121 dpsi += temp * temp;
15122 erretm += psi;
15123 /* L190: */
15124 }
15125 erretm = dabs(erretm);
15126
15127 /* Evaluate PHI and the derivative DPHI */
15128
15129 dphi = 0.f;
15130 phi = 0.f;
15131 i__1 = iip1;
15132 for (j = *n; j >= i__1; --j) {
15133 temp = z__[j] / delta[j];
15134 phi += z__[j] * temp;
15135 dphi += temp * temp;
15136 erretm += phi;
15137 /* L200: */
15138 }
15139
15140 temp = z__[ii] / delta[ii];
15141 dw = dpsi + dphi + temp * temp;
15142 temp = z__[ii] * temp;
15143 w = rhoinv + phi + psi + temp;
15144 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
15145 + (r__1 = tau + eta, dabs(r__1)) * dw;
15146
15147 swtch = FALSE_;
15148 if (orgati) {
15149 if (-w > dabs(prew) / 10.f) {
15150 swtch = TRUE_;
15151 }
15152 } else {
15153 if (w > dabs(prew) / 10.f) {
15154 swtch = TRUE_;
15155 }
15156 }
15157
15158 tau += eta;
15159
15160 /* Main loop to update the values of the array DELTA */
15161
15162 iter = niter + 1;
15163
15164 for (niter = iter; niter <= 30; ++niter) {
15165
15166 /* Test for convergence */
15167
15168 if (dabs(w) <= eps * erretm) {
15169 if (orgati) {
15170 *dlam = d__[*i__] + tau;
15171 } else {
15172 *dlam = d__[ip1] + tau;
15173 }
15174 goto L250;
15175 }
15176
15177 if (w <= 0.f) {
15178 dltlb = df2cmax(dltlb,tau);
15179 } else {
15180 dltub = df2cmin(dltub,tau);
15181 }
15182
15183 /* Calculate the new step */
15184
15185 if (! swtch3) {
15186 if (! swtch) {
15187 if (orgati) {
15188 /* Computing 2nd power */
15189 r__1 = z__[*i__] / delta[*i__];
15190 c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
15191 r__1 * r__1);
15192 } else {
15193 /* Computing 2nd power */
15194 r__1 = z__[ip1] / delta[ip1];
15195 c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
15196 (r__1 * r__1);
15197 }
15198 } else {
15199 temp = z__[ii] / delta[ii];
15200 if (orgati) {
15201 dpsi += temp * temp;
15202 } else {
15203 dphi += temp * temp;
15204 }
15205 c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
15206 }
15207 a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
15208 * dw;
15209 b = delta[*i__] * delta[ip1] * w;
15210 if (c__ == 0.f) {
15211 if (a == 0.f) {
15212 if (! swtch) {
15213 if (orgati) {
15214 a = z__[*i__] * z__[*i__] + delta[ip1] *
15215 delta[ip1] * (dpsi + dphi);
15216 } else {
15217 a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
15218 *i__] * (dpsi + dphi);
15219 }
15220 } else {
15221 a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
15222 * delta[ip1] * dphi;
15223 }
15224 }
15225 eta = b / a;
15226 } else if (a <= 0.f) {
15227 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
15228 )) / (c__ * 2.f);
15229 } else {
15230 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
15231 dabs(r__1))));
15232 }
15233 } else {
15234
15235 /* Interpolation using THREE most relevant poles */
15236
15237 temp = rhoinv + psi + phi;
15238 if (swtch) {
15239 c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
15240 zz[0] = delta[iim1] * delta[iim1] * dpsi;
15241 zz[2] = delta[iip1] * delta[iip1] * dphi;
15242 } else {
15243 if (orgati) {
15244 temp1 = z__[iim1] / delta[iim1];
15245 temp1 *= temp1;
15246 c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
15247 - d__[iip1]) * temp1;
15248 zz[0] = z__[iim1] * z__[iim1];
15249 zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
15250 dphi);
15251 } else {
15252 temp1 = z__[iip1] / delta[iip1];
15253 temp1 *= temp1;
15254 c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
15255 - d__[iim1]) * temp1;
15256 zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
15257 temp1));
15258 zz[2] = z__[iip1] * z__[iip1];
15259 }
15260 }
15261 slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
15262 info);
15263 if (*info != 0) {
15264 goto L250;
15265 }
15266 }
15267
15268 /* Note, eta should be positive if w is negative, and
15269 eta should be negative otherwise. However,
15270 if for some reason caused by roundoff, eta*w > 0,
15271 we simply use one Newton step instead. This way
15272 will guarantee eta*w < 0. */
15273
15274 if (w * eta >= 0.f) {
15275 eta = -w / dw;
15276 }
15277 temp = tau + eta;
15278 if (temp > dltub || temp < dltlb) {
15279 if (w < 0.f) {
15280 eta = (dltub - tau) / 2.f;
15281 } else {
15282 eta = (dltlb - tau) / 2.f;
15283 }
15284 }
15285
15286 i__1 = *n;
15287 for (j = 1; j <= i__1; ++j) {
15288 delta[j] -= eta;
15289 /* L210: */
15290 }
15291
15292 tau += eta;
15293 prew = w;
15294
15295 /* Evaluate PSI and the derivative DPSI */
15296
15297 dpsi = 0.f;
15298 psi = 0.f;
15299 erretm = 0.f;
15300 i__1 = iim1;
15301 for (j = 1; j <= i__1; ++j) {
15302 temp = z__[j] / delta[j];
15303 psi += z__[j] * temp;
15304 dpsi += temp * temp;
15305 erretm += psi;
15306 /* L220: */
15307 }
15308 erretm = dabs(erretm);
15309
15310 /* Evaluate PHI and the derivative DPHI */
15311
15312 dphi = 0.f;
15313 phi = 0.f;
15314 i__1 = iip1;
15315 for (j = *n; j >= i__1; --j) {
15316 temp = z__[j] / delta[j];
15317 phi += z__[j] * temp;
15318 dphi += temp * temp;
15319 erretm += phi;
15320 /* L230: */
15321 }
15322
15323 temp = z__[ii] / delta[ii];
15324 dw = dpsi + dphi + temp * temp;
15325 temp = z__[ii] * temp;
15326 w = rhoinv + phi + psi + temp;
15327 erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
15328 3.f + dabs(tau) * dw;
15329 if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
15330 swtch = ! swtch;
15331 }
15332
15333 /* L240: */
15334 }
15335
15336 /* Return with INFO = 1, NITER = MAXIT and not converged */
15337
15338 *info = 1;
15339 if (orgati) {
15340 *dlam = d__[*i__] + tau;
15341 } else {
15342 *dlam = d__[ip1] + tau;
15343 }
15344
15345 }
15346
15347 L250:
15348
15349 return 0;
15350
15351 /* End of SLAED4 */
15352
15353 } /* slaed4_ */
|
|
||||||||||||||||||||||||||||
|
Definition at line 16243 of file lapackblas.cpp. References b, dabs, real, and sqrt(). Referenced by slaed4_(). 16245 {
16246 /* -- LAPACK routine (version 3.0) --
16247 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
16248 Courant Institute, NAG Ltd., and Rice University
16249 September 30, 1994
16250
16251
16252 Purpose
16253 =======
16254
16255 This subroutine computes the I-th eigenvalue of a symmetric rank-one
16256 modification of a 2-by-2 diagonal matrix
16257
16258 diag( D ) + RHO * Z * transpose(Z) .
16259
16260 The diagonal elements in the array D are assumed to satisfy
16261
16262 D(i) < D(j) for i < j .
16263
16264 We also assume RHO > 0 and that the Euclidean norm of the vector
16265 Z is one.
16266
16267 Arguments
16268 =========
16269
16270 I (input) INTEGER
16271 The index of the eigenvalue to be computed. I = 1 or I = 2.
16272
16273 D (input) REAL array, dimension (2)
16274 The original eigenvalues. We assume D(1) < D(2).
16275
16276 Z (input) REAL array, dimension (2)
16277 The components of the updating vector.
16278
16279 DELTA (output) REAL array, dimension (2)
16280 The vector DELTA contains the information necessary
16281 to construct the eigenvectors.
16282
16283 RHO (input) REAL
16284 The scalar in the symmetric updating formula.
16285
16286 DLAM (output) REAL
16287 The computed lambda_I, the I-th updated eigenvalue.
16288
16289 Further Details
16290 ===============
16291
16292 Based on contributions by
16293 Ren-Cang Li, Computer Science Division, University of California
16294 at Berkeley, USA
16295
16296 =====================================================================
16297
16298
16299 Parameter adjustments */
16300 /* System generated locals */
16301 real r__1;
16302 /* Builtin functions */
16303 // double sqrt(doublereal);
16304 /* Local variables */
16305 static real temp, b, c__, w, del, tau;
16306
16307 --delta;
16308 --z__;
16309 --d__;
16310
16311 /* Function Body */
16312 del = d__[2] - d__[1];
16313 if (*i__ == 1) {
16314 w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f;
16315 if (w > 0.f) {
16316 b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
16317 c__ = *rho * z__[1] * z__[1] * del;
16318
16319 /* B > ZERO, always */
16320
16321 tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
16322 ));
16323 *dlam = d__[1] + tau;
16324 delta[1] = -z__[1] / tau;
16325 delta[2] = z__[2] / (del - tau);
16326 } else {
16327 b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
16328 c__ = *rho * z__[2] * z__[2] * del;
16329 if (b > 0.f) {
16330 tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
16331 } else {
16332 tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
16333 }
16334 *dlam = d__[2] + tau;
16335 delta[1] = -z__[1] / (del + tau);
16336 delta[2] = -z__[2] / tau;
16337 }
16338 temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
16339 delta[1] /= temp;
16340 delta[2] /= temp;
16341 } else {
16342
16343 /* Now I=2 */
16344
16345 b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
16346 c__ = *rho * z__[2] * z__[2] * del;
16347 if (b > 0.f) {
16348 tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
16349 } else {
16350 tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
16351 }
16352 *dlam = d__[2] + tau;
16353 delta[1] = -z__[1] / (del + tau);
16354 delta[2] = -z__[2] / tau;
16355 temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
16356 delta[1] /= temp;
16357 delta[2] /= temp;
16358 }
16359 return 0;
16360
16361 /* End OF SLAED5 */
16362
16363 } /* slaed5_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 15929 of file lapackblas.cpp. References b, dabs, df2cmax, df2cmin, doublereal, f2cmax, integer, log(), logical, pow_ri(), real, slamch_(), and sqrt(). Referenced by slaed4_(). 15931 {
15932 /* -- LAPACK routine (version 3.0) --
15933 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
15934 Courant Institute, NAG Ltd., and Rice University
15935 June 30, 1999
15936
15937
15938 Purpose
15939 =======
15940
15941 SLAED6 computes the positive or negative root (closest to the origin)
15942 of
15943 z(1) z(2) z(3)
15944 f(x) = rho + --------- + ---------- + ---------
15945 d(1)-x d(2)-x d(3)-x
15946
15947 It is assumed that
15948
15949 if ORGATI = .true. the root is between d(2) and d(3);
15950 otherwise it is between d(1) and d(2)
15951
15952 This routine will be called by SLAED4 when necessary. In most cases,
15953 the root sought is the smallest in magnitude, though it might not be
15954 in some extremely rare situations.
15955
15956 Arguments
15957 =========
15958
15959 KNITER (input) INTEGER
15960 Refer to SLAED4 for its significance.
15961
15962 ORGATI (input) LOGICAL
15963 If ORGATI is true, the needed root is between d(2) and
15964 d(3); otherwise it is between d(1) and d(2). See
15965 SLAED4 for further details.
15966
15967 RHO (input) REAL
15968 Refer to the equation f(x) above.
15969
15970 D (input) REAL array, dimension (3)
15971 D satisfies d(1) < d(2) < d(3).
15972
15973 Z (input) REAL array, dimension (3)
15974 Each of the elements in z must be positive.
15975
15976 FINIT (input) REAL
15977 The value of f at 0. It is more accurate than the one
15978 evaluated inside this routine (if someone wants to do
15979 so).
15980
15981 TAU (output) REAL
15982 The root of the equation f(x).
15983
15984 INFO (output) INTEGER
15985 = 0: successful exit
15986 > 0: if INFO = 1, failure to converge
15987
15988 Further Details
15989 ===============
15990
15991 Based on contributions by
15992 Ren-Cang Li, Computer Science Division, University of California
15993 at Berkeley, USA
15994
15995 =====================================================================
15996
15997 Parameter adjustments */
15998 /* Initialized data */
15999 static logical first = TRUE_;
16000 /* System generated locals */
16001 integer i__1;
16002 real r__1, r__2, r__3, r__4;
16003 /* Builtin functions */
16004 // double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
16005 /* Local variables */
16006 static real base;
16007 static integer iter;
16008 static real temp, temp1, temp2, temp3, temp4, a, b, c__, f;
16009 static integer i__;
16010 static logical scale;
16011 static integer niter;
16012 static real small1, small2, fc, df, sminv1, sminv2, dscale[3], sclfac;
16013 extern doublereal slamch_(const char *);
16014 static real zscale[3], erretm, sclinv, ddf, eta, eps;
16015
16016 --z__;
16017 --d__;
16018
16019 /* Function Body */
16020
16021 *info = 0;
16022
16023 niter = 1;
16024 *tau = 0.f;
16025 if (*kniter == 2) {
16026 if (*orgati) {
16027 temp = (d__[3] - d__[2]) / 2.f;
16028 c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
16029 a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
16030 b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
16031 } else {
16032 temp = (d__[1] - d__[2]) / 2.f;
16033 c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
16034 a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
16035 b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
16036 }
16037 /* Computing MAX */
16038 r__1 = dabs(a), r__2 = dabs(b), r__1 = f2cmax(r__1,r__2), r__2 = dabs(
16039 c__);
16040 temp = df2cmax(r__1,r__2);
16041 a /= temp;
16042 b /= temp;
16043 c__ /= temp;
16044 if (c__ == 0.f) {
16045 *tau = b / a;
16046 } else if (a <= 0.f) {
16047 *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
16048 c__ * 2.f);
16049 } else {
16050 *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
16051 r__1))));
16052 }
16053 temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
16054 z__[3] / (d__[3] - *tau);
16055 if (dabs(*finit) <= dabs(temp)) {
16056 *tau = 0.f;
16057 }
16058 }
16059
16060 /* On first call to routine, get machine parameters for
16061 possible scaling to avoid overflow */
16062
16063 if (first) {
16064 eps = slamch_("Epsilon");
16065 base = slamch_("Base");
16066 i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f)
16067 ;
16068 small1 = pow_ri(&base, &i__1);
16069 sminv1 = 1.f / small1;
16070 small2 = small1 * small1;
16071 sminv2 = sminv1 * sminv1;
16072 first = FALSE_;
16073 }
16074
16075 /* Determine if scaling of inputs necessary to avoid overflow
16076 when computing 1/TEMP**3 */
16077
16078 if (*orgati) {
16079 /* Computing MIN */
16080 r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
16081 tau, dabs(r__2));
16082 temp = df2cmin(r__3,r__4);
16083 } else {
16084 /* Computing MIN */
16085 r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
16086 tau, dabs(r__2));
16087 temp = df2cmin(r__3,r__4);
16088 }
16089 scale = FALSE_;
16090 if (temp <= small1) {
16091 scale = TRUE_;
16092 if (temp <= small2) {
16093
16094 /* Scale up by power of radix nearest 1/SAFMIN**(2/3) */
16095
16096 sclfac = sminv2;
16097 sclinv = small2;
16098 } else {
16099
16100 /* Scale up by power of radix nearest 1/SAFMIN**(1/3) */
16101
16102 sclfac = sminv1;
16103 sclinv = small1;
16104 }
16105
16106 /* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
16107
16108 for (i__ = 1; i__ <= 3; ++i__) {
16109 dscale[i__ - 1] = d__[i__] * sclfac;
16110 zscale[i__ - 1] = z__[i__] * sclfac;
16111 /* L10: */
16112 }
16113 *tau *= sclfac;
16114 } else {
16115
16116 /* Copy D and Z to DSCALE and ZSCALE */
16117
16118 for (i__ = 1; i__ <= 3; ++i__) {
16119 dscale[i__ - 1] = d__[i__];
16120 zscale[i__ - 1] = z__[i__];
16121 /* L20: */
16122 }
16123 }
16124
16125 fc = 0.f;
16126 df = 0.f;
16127 ddf = 0.f;
16128 for (i__ = 1; i__ <= 3; ++i__) {
16129 temp = 1.f / (dscale[i__ - 1] - *tau);
16130 temp1 = zscale[i__ - 1] * temp;
16131 temp2 = temp1 * temp;
16132 temp3 = temp2 * temp;
16133 fc += temp1 / dscale[i__ - 1];
16134 df += temp2;
16135 ddf += temp3;
16136 /* L30: */
16137 }
16138 f = *finit + *tau * fc;
16139
16140 if (dabs(f) <= 0.f) {
16141 goto L60;
16142 }
16143
16144 /* Iteration begins
16145
16146 It is not hard to see that
16147
16148 1) Iterations will go up monotonically
16149 if FINIT < 0;
16150
16151 2) Iterations will go down monotonically
16152 if FINIT > 0. */
16153
16154 iter = niter + 1;
16155
16156 for (niter = iter; niter <= 20; ++niter) {
16157
16158 if (*orgati) {
16159 temp1 = dscale[1] - *tau;
16160 temp2 = dscale[2] - *tau;
16161 } else {
16162 temp1 = dscale[0] - *tau;
16163 temp2 = dscale[1] - *tau;
16164 }
16165 a = (temp1 + temp2) * f - temp1 * temp2 * df;
16166 b = temp1 * temp2 * f;
16167 c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
16168 /* Computing MAX */
16169 r__1 = dabs(a), r__2 = dabs(b), r__1 = f2cmax(r__1,r__2), r__2 = dabs(
16170 c__);
16171 temp = df2cmax(r__1,r__2);
16172 a /= temp;
16173 b /= temp;
16174 c__ /= temp;
16175 if (c__ == 0.f) {
16176 eta = b / a;
16177 } else if (a <= 0.f) {
16178 eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
16179 c__ * 2.f);
16180 } else {
16181 eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
16182 r__1))));
16183 }
16184 if (f * eta >= 0.f) {
16185 eta = -f / df;
16186 }
16187
16188 temp = eta + *tau;
16189 if (*orgati) {
16190 if (eta > 0.f && temp >= dscale[2]) {
16191 eta = (dscale[2] - *tau) / 2.f;
16192 }
16193 if (eta < 0.f && temp <= dscale[1]) {
16194 eta = (dscale[1] - *tau) / 2.f;
16195 }
16196 } else {
16197 if (eta > 0.f && temp >= dscale[1]) {
16198 eta = (dscale[1] - *tau) / 2.f;
16199 }
16200 if (eta < 0.f && temp <= dscale[0]) {
16201 eta = (dscale[0] - *tau) / 2.f;
16202 }
16203 }
16204 *tau += eta;
16205
16206 fc = 0.f;
16207 erretm = 0.f;
16208 df = 0.f;
16209 ddf = 0.f;
16210 for (i__ = 1; i__ <= 3; ++i__) {
16211 temp = 1.f / (dscale[i__ - 1] - *tau);
16212 temp1 = zscale[i__ - 1] * temp;
16213 temp2 = temp1 * temp;
16214 temp3 = temp2 * temp;
16215 temp4 = temp1 / dscale[i__ - 1];
16216 fc += temp4;
16217 erretm += dabs(temp4);
16218 df += temp2;
16219 ddf += temp3;
16220 /* L40: */
16221 }
16222 f = *finit + *tau * fc;
16223 erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
16224 if (dabs(f) <= eps * erretm) {
16225 goto L60;
16226 }
16227 /* L50: */
16228 }
16229 *info = 1;
16230 L60:
16231
16232 /* Undo scaling */
16233
16234 if (scale) {
16235 *tau *= sclinv;
16236 }
16237 return 0;
16238
16239 /* End of SLAED6 */
16240
16241 } /* slaed6_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 12334 of file lapackblas.cpp. References c__1, c__2, c_n1, f2cmax, f2cmin, givcol_ref, givnum_ref, integer, pow_ii(), q, real, sgemm_(), slaed8_(), slaed9_(), slaeda_(), slamrg_(), and xerbla_(). Referenced by slaed0_(). 12340 {
12341 /* -- LAPACK routine (version 3.0) --
12342 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12343 Courant Institute, Argonne National Lab, and Rice University
12344 September 30, 1994
12345
12346
12347 Purpose
12348 =======
12349
12350 SLAED7 computes the updated eigensystem of a diagonal
12351 matrix after modification by a rank-one symmetric matrix. This
12352 routine is used only for the eigenproblem which requires all
12353 eigenvalues and optionally eigenvectors of a dense symmetric matrix
12354 that has been reduced to tridiagonal form. SLAED1 handles
12355 the case in which all eigenvalues and eigenvectors of a symmetric
12356 tridiagonal matrix are desired.
12357
12358 T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
12359
12360 where Z = Q'u, u is a vector of length N with ones in the
12361 CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
12362
12363 The eigenvectors of the original matrix are stored in Q, and the
12364 eigenvalues are in D. The algorithm consists of three stages:
12365
12366 The first stage consists of deflating the size of the problem
12367 when there are multiple eigenvalues or if there is a zero in
12368 the Z vector. For each such occurence the dimension of the
12369 secular equation problem is reduced by one. This stage is
12370 performed by the routine SLAED8.
12371
12372 The second stage consists of calculating the updated
12373 eigenvalues. This is done by finding the roots of the secular
12374 equation via the routine SLAED4 (as called by SLAED9).
12375 This routine also calculates the eigenvectors of the current
12376 problem.
12377
12378 The final stage consists of computing the updated eigenvectors
12379 directly using the updated eigenvalues. The eigenvectors for
12380 the current problem are multiplied with the eigenvectors from
12381 the overall problem.
12382
12383 Arguments
12384 =========
12385
12386 ICOMPQ (input) INTEGER
12387 = 0: Compute eigenvalues only.
12388 = 1: Compute eigenvectors of original dense symmetric matrix
12389 also. On entry, Q contains the orthogonal matrix used
12390 to reduce the original matrix to tridiagonal form.
12391
12392 N (input) INTEGER
12393 The dimension of the symmetric tridiagonal matrix. N >= 0.
12394
12395 QSIZ (input) INTEGER
12396 The dimension of the orthogonal matrix used to reduce
12397 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1.
12398
12399 TLVLS (input) INTEGER
12400 The total number of merging levels in the overall divide and
12401 conquer tree.
12402
12403 CURLVL (input) INTEGER
12404 The current level in the overall merge routine,
12405 0 <= CURLVL <= TLVLS.
12406
12407 CURPBM (input) INTEGER
12408 The current problem in the current level in the overall
12409 merge routine (counting from upper left to lower right).
12410
12411 D (input/output) REAL array, dimension (N)
12412 On entry, the eigenvalues of the rank-1-perturbed matrix.
12413 On exit, the eigenvalues of the repaired matrix.
12414
12415 Q (input/output) REAL array, dimension (LDQ, N)
12416 On entry, the eigenvectors of the rank-1-perturbed matrix.
12417 On exit, the eigenvectors of the repaired tridiagonal matrix.
12418
12419 LDQ (input) INTEGER
12420 The leading dimension of the array Q. LDQ >= max(1,N).
12421
12422 INDXQ (output) INTEGER array, dimension (N)
12423 The permutation which will reintegrate the subproblem just
12424 solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
12425 will be in ascending order.
12426
12427 RHO (input) REAL
12428 The subdiagonal element used to create the rank-1
12429 modification.
12430
12431 CUTPNT (input) INTEGER
12432 Contains the location of the last eigenvalue in the leading
12433 sub-matrix. min(1,N) <= CUTPNT <= N.
12434
12435 QSTORE (input/output) REAL array, dimension (N**2+1)
12436 Stores eigenvectors of submatrices encountered during
12437 divide and conquer, packed together. QPTR points to
12438 beginning of the submatrices.
12439
12440 QPTR (input/output) INTEGER array, dimension (N+2)
12441 List of indices pointing to beginning of submatrices stored
12442 in QSTORE. The submatrices are numbered starting at the
12443 bottom left of the divide and conquer tree, from left to
12444 right and bottom to top.
12445
12446 PRMPTR (input) INTEGER array, dimension (N lg N)
12447 Contains a list of pointers which indicate where in PERM a
12448 level's permutation is stored. PRMPTR(i+1) - PRMPTR(i)
12449 indicates the size of the permutation and also the size of
12450 the full, non-deflated problem.
12451
12452 PERM (input) INTEGER array, dimension (N lg N)
12453 Contains the permutations (from deflation and sorting) to be
12454 applied to each eigenblock.
12455
12456 GIVPTR (input) INTEGER array, dimension (N lg N)
12457 Contains a list of pointers which indicate where in GIVCOL a
12458 level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i)
12459 indicates the number of Givens rotations.
12460
12461 GIVCOL (input) INTEGER array, dimension (2, N lg N)
12462 Each pair of numbers indicates a pair of columns to take place
12463 in a Givens rotation.
12464
12465 GIVNUM (input) REAL array, dimension (2, N lg N)
12466 Each number indicates the S value to be used in the
12467 corresponding Givens rotation.
12468
12469 WORK (workspace) REAL array, dimension (3*N+QSIZ*N)
12470
12471 IWORK (workspace) INTEGER array, dimension (4*N)
12472
12473 INFO (output) INTEGER
12474 = 0: successful exit.
12475 < 0: if INFO = -i, the i-th argument had an illegal value.
12476 > 0: if INFO = 1, an eigenvalue did not converge
12477
12478 Further Details
12479 ===============
12480
12481 Based on contributions by
12482 Jeff Rutter, Computer Science Division, University of California
12483 at Berkeley, USA
12484
12485 =====================================================================
12486
12487
12488 Test the input parameters.
12489
12490 Parameter adjustments */
12491 /* Table of constant values */
12492 static integer c__2 = 2;
12493 static integer c__1 = 1;
12494 static real c_b10 = 1.f;
12495 static real c_b11 = 0.f;
12496 static integer c_n1 = -1;
12497
12498 /* System generated locals */
12499 integer q_dim1, q_offset, i__1, i__2;
12500 /* Builtin functions */
12501 integer pow_ii(integer *, integer *);
12502 /* Local variables */
12503 static integer indx, curr, i__, k, indxc;
12504 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
12505 integer *, real *, real *, integer *, real *, integer *, real *,
12506 real *, integer *);
12507 static integer indxp, n1, n2;
12508 extern /* Subroutine */ int slaed8_(integer *, integer *, integer *,
12509 integer *, real *, real *, integer *, integer *, real *, integer *
12510 , real *, real *, real *, integer *, real *, integer *, integer *,
12511 integer *, real *, integer *, integer *, integer *), slaed9_(
12512 integer *, integer *, integer *, integer *, real *, real *,
12513 integer *, real *, real *, real *, real *, integer *, integer *),
12514 slaeda_(integer *, integer *, integer *, integer *, integer *,
12515 integer *, integer *, integer *, real *, real *, integer *, real *
12516 , real *, integer *);
12517 static integer idlmda, is, iw, iz;
12518 extern /* Subroutine */ int xerbla_(const char *, integer *), slamrg_(
12519 integer *, integer *, real *, integer *, integer *, integer *);
12520 static integer coltyp, iq2, ptr, ldq2;
12521 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1]
12522 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1]
12523
12524
12525 --d__;
12526 q_dim1 = *ldq;
12527 q_offset = 1 + q_dim1 * 1;
12528 q -= q_offset;
12529 --indxq;
12530 --qstore;
12531 --qptr;
12532 --prmptr;
12533 --perm;
12534 --givptr;
12535 givcol -= 3;
12536 givnum -= 3;
12537 --work;
12538 --iwork;
12539
12540 /* Function Body */
12541 *info = 0;
12542
12543 if (*icompq < 0 || *icompq > 1) {
12544 *info = -1;
12545 } else if (*n < 0) {
12546 *info = -2;
12547 } else if (*icompq == 1 && *qsiz < *n) {
12548 *info = -4;
12549 } else if (*ldq < f2cmax(1,*n)) {
12550 *info = -9;
12551 } else if (f2cmin(1,*n) > *cutpnt || *n < *cutpnt) {
12552 *info = -12;
12553 }
12554 if (*info != 0) {
12555 i__1 = -(*info);
12556 xerbla_("SLAED7", &i__1);
12557 return 0;
12558 }
12559
12560 /* Quick return if possible */
12561
12562 if (*n == 0) {
12563 return 0;
12564 }
12565
12566 /* The following values are for bookkeeping purposes only. They are
12567 integer pointers which indicate the portion of the workspace
12568 used by a particular array in SLAED8 and SLAED9. */
12569
12570 if (*icompq == 1) {
12571 ldq2 = *qsiz;
12572 } else {
12573 ldq2 = *n;
12574 }
12575
12576 iz = 1;
12577 idlmda = iz + *n;
12578 iw = idlmda + *n;
12579 iq2 = iw + *n;
12580 is = iq2 + *n * ldq2;
12581
12582 indx = 1;
12583 indxc = indx + *n;
12584 coltyp = indxc + *n;
12585 indxp = coltyp + *n;
12586
12587 /* Form the z-vector which consists of the last row of Q_1 and the
12588 first row of Q_2. */
12589
12590 ptr = pow_ii(&c__2, tlvls) + 1;
12591 i__1 = *curlvl - 1;
12592 for (i__ = 1; i__ <= i__1; ++i__) {
12593 i__2 = *tlvls - i__;
12594 ptr += pow_ii(&c__2, &i__2);
12595 /* L10: */
12596 }
12597 curr = ptr + *curpbm;
12598 slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
12599 givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
12600 + *n], info);
12601
12602 /* When solving the final problem, we no longer need the stored data,
12603 so we will overwrite the data from this level onto the previously
12604 used storage space. */
12605
12606 if (*curlvl == *tlvls) {
12607 qptr[curr] = 1;
12608 prmptr[curr] = 1;
12609 givptr[curr] = 1;
12610 }
12611
12612 /* Sort and Deflate eigenvalues. */
12613
12614 slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
12615 cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
12616 perm[prmptr[curr]], &givptr[curr + 1], &givcol_ref(1, givptr[curr]
12617 ), &givnum_ref(1, givptr[curr]), &iwork[indxp], &iwork[indx],
12618 info);
12619 prmptr[curr + 1] = prmptr[curr] + *n;
12620 givptr[curr + 1] += givptr[curr];
12621
12622 /* Solve Secular Equation. */
12623
12624 if (k != 0) {
12625 slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
12626 &work[iw], &qstore[qptr[curr]], &k, info);
12627 if (*info != 0) {
12628 goto L30;
12629 }
12630 if (*icompq == 1) {
12631 sgemm_("N", "N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[
12632 qptr[curr]], &k, &c_b11, &q[q_offset], ldq);
12633 }
12634 /* Computing 2nd power */
12635 i__1 = k;
12636 qptr[curr + 1] = qptr[curr] + i__1 * i__1;
12637
12638 /* Prepare the INDXQ sorting permutation. */
12639
12640 n1 = k;
12641 n2 = *n - k;
12642 slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
12643 } else {
12644 qptr[curr + 1] = qptr[curr];
12645 i__1 = *n;
12646 for (i__ = 1; i__ <= i__1; ++i__) {
12647 indxq[i__] = i__;
12648 /* L20: */
12649 }
12650 }
12651
12652 L30:
12653 return 0;
12654
12655 /* End of SLAED7 */
12656
12657 } /* slaed7_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 13103 of file lapackblas.cpp. References c__1, c_b3, dabs, doublereal, f2cmax, givcol_ref, givnum_ref, integer, isamax_(), q, q2_ref, q_ref, real, scopy_(), slacpy_(), slamch_(), slamrg_(), slapy2_(), sqrt(), srot_(), sscal_(), t, and xerbla_(). Referenced by slaed7_(). 13108 {
13109 /* -- LAPACK routine (version 3.0) --
13110 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
13111 Courant Institute, NAG Ltd., and Rice University
13112 September 30, 1994
13113
13114
13115 Purpose
13116 =======
13117
13118 SLAED8 merges the two sets of eigenvalues together into a single
13119 sorted set. Then it tries to deflate the size of the problem.
13120 There are two ways in which deflation can occur: when two or more
13121 eigenvalues are close together or if there is a tiny element in the
13122 Z vector. For each such occurrence the order of the related secular
13123 equation problem is reduced by one.
13124
13125 Arguments
13126 =========
13127
13128 ICOMPQ (input) INTEGER
13129 = 0: Compute eigenvalues only.
13130 = 1: Compute eigenvectors of original dense symmetric matrix
13131 also. On entry, Q contains the orthogonal matrix used
13132 to reduce the original matrix to tridiagonal form.
13133
13134 K (output) INTEGER
13135 The number of non-deflated eigenvalues, and the order of the
13136 related secular equation.
13137
13138 N (input) INTEGER
13139 The dimension of the symmetric tridiagonal matrix. N >= 0.
13140
13141 QSIZ (input) INTEGER
13142 The dimension of the orthogonal matrix used to reduce
13143 the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1.
13144
13145 D (input/output) REAL array, dimension (N)
13146 On entry, the eigenvalues of the two submatrices to be
13147 combined. On exit, the trailing (N-K) updated eigenvalues
13148 (those which were deflated) sorted into increasing order.
13149
13150 Q (input/output) REAL array, dimension (LDQ,N)
13151 If ICOMPQ = 0, Q is not referenced. Otherwise,
13152 on entry, Q contains the eigenvectors of the partially solved
13153 system which has been previously updated in matrix
13154 multiplies with other partially solved eigensystems.
13155 On exit, Q contains the trailing (N-K) updated eigenvectors
13156 (those which were deflated) in its last N-K columns.
13157
13158 LDQ (input) INTEGER
13159 The leading dimension of the array Q. LDQ >= max(1,N).
13160
13161 INDXQ (input) INTEGER array, dimension (N)
13162 The permutation which separately sorts the two sub-problems
13163 in D into ascending order. Note that elements in the second
13164 half of this permutation must first have CUTPNT added to
13165 their values in order to be accurate.
13166
13167 RHO (input/output) REAL
13168 On entry, the off-diagonal element associated with the rank-1
13169 cut which originally split the two submatrices which are now
13170 being recombined.
13171 On exit, RHO has been modified to the value required by
13172 SLAED3.
13173
13174 CUTPNT (input) INTEGER
13175 The location of the last eigenvalue in the leading
13176 sub-matrix. min(1,N) <= CUTPNT <= N.
13177
13178 Z (input) REAL array, dimension (N)
13179 On entry, Z contains the updating vector (the last row of
13180 the first sub-eigenvector matrix and the first row of the
13181 second sub-eigenvector matrix).
13182 On exit, the contents of Z are destroyed by the updating
13183 process.
13184
13185 DLAMDA (output) REAL array, dimension (N)
13186 A copy of the first K eigenvalues which will be used by
13187 SLAED3 to form the secular equation.
13188
13189 Q2 (output) REAL array, dimension (LDQ2,N)
13190 If ICOMPQ = 0, Q2 is not referenced. Otherwise,
13191 a copy of the first K eigenvectors which will be used by
13192 SLAED7 in a matrix multiply (SGEMM) to update the new
13193 eigenvectors.
13194
13195 LDQ2 (input) INTEGER
13196 The leading dimension of the array Q2. LDQ2 >= max(1,N).
13197
13198 W (output) REAL array, dimension (N)
13199 The first k values of the final deflation-altered z-vector and
13200 will be passed to SLAED3.
13201
13202 PERM (output) INTEGER array, dimension (N)
13203 The permutations (from deflation and sorting) to be applied
13204 to each eigenblock.
13205
13206 GIVPTR (output) INTEGER
13207 The number of Givens rotations which took place in this
13208 subproblem.
13209
13210 GIVCOL (output) INTEGER array, dimension (2, N)
13211 Each pair of numbers indicates a pair of columns to take place
13212 in a Givens rotation.
13213
13214 GIVNUM (output) REAL array, dimension (2, N)
13215 Each number indicates the S value to be used in the
13216 corresponding Givens rotation.
13217
13218 INDXP (workspace) INTEGER array, dimension (N)
13219 The permutation used to place deflated values of D at the end
13220 of the array. INDXP(1:K) points to the nondeflated D-values
13221 and INDXP(K+1:N) points to the deflated eigenvalues.
13222
13223 INDX (workspace) INTEGER array, dimension (N)
13224 The permutation used to sort the contents of D into ascending
13225 order.
13226
13227 INFO (output) INTEGER
13228 = 0: successful exit.
13229 < 0: if INFO = -i, the i-th argument had an illegal value.
13230
13231 Further Details
13232 ===============
13233
13234 Based on contributions by
13235 Jeff Rutter, Computer Science Division, University of California
13236 at Berkeley, USA
13237
13238 =====================================================================
13239
13240
13241
13242 Test the input parameters.
13243
13244 Parameter adjustments */
13245 /* Table of constant values */
13246 static real c_b3 = -1.f;
13247 static integer c__1 = 1;
13248
13249 /* System generated locals */
13250 integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
13251 real r__1;
13252 /* Builtin functions */
13253 // double sqrt(doublereal);
13254 /* Local variables */
13255 static integer jlam, imax, jmax;
13256 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
13257 integer *, real *, real *);
13258 static real c__;
13259 static integer i__, j;
13260 static real s, t;
13261 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
13262 static integer k2;
13263 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
13264 integer *);
13265 static integer n1, n2;
13266 extern doublereal slapy2_(real *, real *);
13267 static integer jp;
13268 extern doublereal slamch_(const char *);
13269 extern /* Subroutine */ int xerbla_(const char *, integer *);
13270 extern integer isamax_(integer *, real *, integer *);
13271 extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
13272 *, integer *, integer *), slacpy_(const char *, integer *, integer *,
13273 real *, integer *, real *, integer *);
13274 static integer n1p1;
13275 static real eps, tau, tol;
13276 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
13277 #define q2_ref(a_1,a_2) q2[(a_2)*q2_dim1 + a_1]
13278 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1]
13279 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1]
13280
13281
13282 --d__;
13283 q_dim1 = *ldq;
13284 q_offset = 1 + q_dim1 * 1;
13285 q -= q_offset;
13286 --indxq;
13287 --z__;
13288 --dlamda;
13289 q2_dim1 = *ldq2;
13290 q2_offset = 1 + q2_dim1 * 1;
13291 q2 -= q2_offset;
13292 --w;
13293 --perm;
13294 givcol -= 3;
13295 givnum -= 3;
13296 --indxp;
13297 --indx;
13298
13299 /* Function Body */
13300 *info = 0;
13301
13302 if (*icompq < 0 || *icompq > 1) {
13303 *info = -1;
13304 } else if (*n < 0) {
13305 *info = -3;
13306 } else if (*icompq == 1 && *qsiz < *n) {
13307 *info = -4;
13308 } else if (*ldq < f2cmax(1,*n)) {
13309 *info = -7;
13310 } else if (*cutpnt < f2cmin(1,*n) || *cutpnt > *n) {
13311 *info = -10;
13312 } else if (*ldq2 < f2cmax(1,*n)) {
13313 *info = -14;
13314 }
13315 if (*info != 0) {
13316 i__1 = -(*info);
13317 xerbla_("SLAED8", &i__1);
13318 return 0;
13319 }
13320
13321 /* Quick return if possible */
13322
13323 if (*n == 0) {
13324 return 0;
13325 }
13326
13327 n1 = *cutpnt;
13328 n2 = *n - n1;
13329 n1p1 = n1 + 1;
13330
13331 if (*rho < 0.f) {
13332 sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
13333 }
13334
13335 /* Normalize z so that norm(z) = 1 */
13336
13337 t = 1.f / sqrt(2.f);
13338 i__1 = *n;
13339 for (j = 1; j <= i__1; ++j) {
13340 indx[j] = j;
13341 /* L10: */
13342 }
13343 sscal_(n, &t, &z__[1], &c__1);
13344 *rho = (r__1 = *rho * 2.f, dabs(r__1));
13345
13346 /* Sort the eigenvalues into increasing order */
13347
13348 i__1 = *n;
13349 for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
13350 indxq[i__] += *cutpnt;
13351 /* L20: */
13352 }
13353 i__1 = *n;
13354 for (i__ = 1; i__ <= i__1; ++i__) {
13355 dlamda[i__] = d__[indxq[i__]];
13356 w[i__] = z__[indxq[i__]];
13357 /* L30: */
13358 }
13359 i__ = 1;
13360 j = *cutpnt + 1;
13361 slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
13362 i__1 = *n;
13363 for (i__ = 1; i__ <= i__1; ++i__) {
13364 d__[i__] = dlamda[indx[i__]];
13365 z__[i__] = w[indx[i__]];
13366 /* L40: */
13367 }
13368
13369 /* Calculate the allowable deflation tolerence */
13370
13371 imax = isamax_(n, &z__[1], &c__1);
13372 jmax = isamax_(n, &d__[1], &c__1);
13373 eps = slamch_("Epsilon");
13374 tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
13375
13376 /* If the rank-1 modifier is small enough, no more needs to be done
13377 except to reorganize Q so that its columns correspond with the
13378 elements in D. */
13379
13380 if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
13381 *k = 0;
13382 if (*icompq == 0) {
13383 i__1 = *n;
13384 for (j = 1; j <= i__1; ++j) {
13385 perm[j] = indxq[indx[j]];
13386 /* L50: */
13387 }
13388 } else {
13389 i__1 = *n;
13390 for (j = 1; j <= i__1; ++j) {
13391 perm[j] = indxq[indx[j]];
13392 scopy_(qsiz, &q_ref(1, perm[j]), &c__1, &q2_ref(1, j), &c__1);
13393 /* L60: */
13394 }
13395 slacpy_("A", qsiz, n, &q2_ref(1, 1), ldq2, &q_ref(1, 1), ldq);
13396 }
13397 return 0;
13398 }
13399
13400 /* If there are multiple eigenvalues then the problem deflates. Here
13401 the number of equal eigenvalues are found. As each equal
13402 eigenvalue is found, an elementary reflector is computed to rotate
13403 the corresponding eigensubspace so that the corresponding
13404 components of Z are zero in this new basis. */
13405
13406 *k = 0;
13407 *givptr = 0;
13408 k2 = *n + 1;
13409 i__1 = *n;
13410 for (j = 1; j <= i__1; ++j) {
13411 if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
13412
13413 /* Deflate due to small z component. */
13414
13415 --k2;
13416 indxp[k2] = j;
13417 if (j == *n) {
13418 goto L110;
13419 }
13420 } else {
13421 jlam = j;
13422 goto L80;
13423 }
13424 /* L70: */
13425 }
13426 L80:
13427 ++j;
13428 if (j > *n) {
13429 goto L100;
13430 }
13431 if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
13432
13433 /* Deflate due to small z component. */
13434
13435 --k2;
13436 indxp[k2] = j;
13437 } else {
13438
13439 /* Check if eigenvalues are close enough to allow deflation. */
13440
13441 s = z__[jlam];
13442 c__ = z__[j];
13443
13444 /* Find sqrt(a**2+b**2) without overflow or
13445 destructive underflow. */
13446
13447 tau = slapy2_(&c__, &s);
13448 t = d__[j] - d__[jlam];
13449 c__ /= tau;
13450 s = -s / tau;
13451 if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
13452
13453 /* Deflation is possible. */
13454
13455 z__[j] = tau;
13456 z__[jlam] = 0.f;
13457
13458 /* Record the appropriate Givens rotation */
13459
13460 ++(*givptr);
13461 givcol_ref(1, *givptr) = indxq[indx[jlam]];
13462 givcol_ref(2, *givptr) = indxq[indx[j]];
13463 givnum_ref(1, *givptr) = c__;
13464 givnum_ref(2, *givptr) = s;
13465 if (*icompq == 1) {
13466 srot_(qsiz, &q_ref(1, indxq[indx[jlam]]), &c__1, &q_ref(1,
13467 indxq[indx[j]]), &c__1, &c__, &s);
13468 }
13469 t = d__[jlam] * c__ * c__ + d__[j] * s * s;
13470 d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
13471 d__[jlam] = t;
13472 --k2;
13473 i__ = 1;
13474 L90:
13475 if (k2 + i__ <= *n) {
13476 if (d__[jlam] < d__[indxp[k2 + i__]]) {
13477 indxp[k2 + i__ - 1] = indxp[k2 + i__];
13478 indxp[k2 + i__] = jlam;
13479 ++i__;
13480 goto L90;
13481 } else {
13482 indxp[k2 + i__ - 1] = jlam;
13483 }
13484 } else {
13485 indxp[k2 + i__ - 1] = jlam;
13486 }
13487 jlam = j;
13488 } else {
13489 ++(*k);
13490 w[*k] = z__[jlam];
13491 dlamda[*k] = d__[jlam];
13492 indxp[*k] = jlam;
13493 jlam = j;
13494 }
13495 }
13496 goto L80;
13497 L100:
13498
13499 /* Record the last eigenvalue. */
13500
13501 ++(*k);
13502 w[*k] = z__[jlam];
13503 dlamda[*k] = d__[jlam];
13504 indxp[*k] = jlam;
13505
13506 L110:
13507
13508 /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
13509 and Q2 respectively. The eigenvalues/vectors which were not
13510 deflated go into the first K slots of DLAMDA and Q2 respectively,
13511 while those which were deflated go into the last N - K slots. */
13512
13513 if (*icompq == 0) {
13514 i__1 = *n;
13515 for (j = 1; j <= i__1; ++j) {
13516 jp = indxp[j];
13517 dlamda[j] = d__[jp];
13518 perm[j] = indxq[indx[jp]];
13519 /* L120: */
13520 }
13521 } else {
13522 i__1 = *n;
13523 for (j = 1; j <= i__1; ++j) {
13524 jp = indxp[j];
13525 dlamda[j] = d__[jp];
13526 perm[j] = indxq[indx[jp]];
13527 scopy_(qsiz, &q_ref(1, perm[j]), &c__1, &q2_ref(1, j), &c__1);
13528 /* L130: */
13529 }
13530 }
13531
13532 /* The deflated eigenvalues and their corresponding vectors go back
13533 into the last N - K slots of D and Q respectively. */
13534
13535 if (*k < *n) {
13536 if (*icompq == 0) {
13537 i__1 = *n - *k;
13538 scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
13539 } else {
13540 i__1 = *n - *k;
13541 scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
13542 i__1 = *n - *k;
13543 slacpy_("A", qsiz, &i__1, &q2_ref(1, *k + 1), ldq2, &q_ref(1, *k
13544 + 1), ldq);
13545 }
13546 }
13547
13548 return 0;
13549
13550 /* End of SLAED8 */
13551
13552 } /* slaed8_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 14075 of file lapackblas.cpp. References c__1, doublereal, f2cmax, integer, q, q_ref, r_sign(), real, s_ref, scopy_(), slaed4_(), slamc3_(), snrm2_(), sqrt(), and xerbla_(). Referenced by slaed7_(). 14078 {
14079 /* -- LAPACK routine (version 3.0) --
14080 Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
14081 Courant Institute, NAG Ltd., and Rice University
14082 September 30, 1994
14083
14084
14085 Purpose
14086 =======
14087
14088 SLAED9 finds the roots of the secular equation, as defined by the
14089 values in D, Z, and RHO, between KSTART and KSTOP. It makes the
14090 appropriate calls to SLAED4 and then stores the new matrix of
14091 eigenvectors for use in calculating the next level of Z vectors.
14092
14093 Arguments
14094 =========
14095
14096 K (input) INTEGER
14097 The number of terms in the rational function to be solved by
14098 SLAED4. K >= 0.
14099
14100 KSTART (input) INTEGER
14101 KSTOP (input) INTEGER
14102 The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
14103 are to be computed. 1 <= KSTART <= KSTOP <= K.
14104
14105 N (input) INTEGER
14106 The number of rows and columns in the Q matrix.
14107 N >= K (delation may result in N > K).
14108
14109 D (output) REAL array, dimension (N)
14110 D(I) contains the updated eigenvalues
14111 for KSTART <= I <= KSTOP.
14112
14113 Q (workspace) REAL array, dimension (LDQ,N)
14114
14115 LDQ (input) INTEGER
14116 The leading dimension of the array Q. LDQ >= max( 1, N ).
14117
14118 RHO (input) REAL
14119 The value of the parameter in the rank one update equation.
14120 RHO >= 0 required.
14121
14122 DLAMDA (input) REAL array, dimension (K)
14123 The first K elements of this array contain the old roots
14124 of the deflated updating problem. These are the poles
14125 of the secular equation.
14126
14127 W (input) REAL array, dimension (K)
14128 The first K elements of this array contain the components
14129 of the deflation-adjusted updating vector.
14130
14131 S (output) REAL array, dimension (LDS, K)
14132 Will contain the eigenvectors of the repaired matrix which
14133 will be stored for subsequent Z vector calculation and
14134 multiplied by the previously accumulated eigenvectors
14135 to update the system.
14136
14137 LDS (input) INTEGER
14138 The leading dimension of S. LDS >= max( 1, K ).
14139
14140 INFO (output) INTEGER
14141 = 0: successful exit.
14142 < 0: if INFO = -i, the i-th argument had an illegal value.
14143 > 0: if INFO = 1, an eigenvalue did not converge
14144
14145 Further Details
14146 ===============
14147
14148 Based on contributions by
14149 Jeff Rutter, Computer Science Division, University of California
14150 at Berkeley, USA
14151
14152 =====================================================================
14153
14154
14155 Test the input parameters.
14156
14157 Parameter adjustments */
14158 /* Table of constant values */
14159 static integer c__1 = 1;
14160
14161 /* System generated locals */
14162 integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
14163 real r__1;
14164 /* Builtin functions */
14165 // double sqrt(doublereal), r_sign(real *, real *);
14166 /* Local variables */
14167 static real temp;
14168 extern doublereal snrm2_(integer *, real *, integer *);
14169 static integer i__, j;
14170 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
14171 integer *), slaed4_(integer *, integer *, real *, real *, real *,
14172 real *, real *, integer *);
14173 extern doublereal slamc3_(real *, real *);
14174 extern /* Subroutine */ int xerbla_(const char *, integer *);
14175 #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
14176 #define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1]
14177
14178
14179 --d__;
14180 q_dim1 = *ldq;
14181 q_offset = 1 + q_dim1 * 1;
14182 q -= q_offset;
14183 --dlamda;
14184 --w;
14185 s_dim1 = *lds;
14186 s_offset = 1 + s_dim1 * 1;
14187 s -= s_offset;
14188
14189 /* Function Body */
14190 *info = 0;
14191
14192 if (*k < 0) {
14193 *info = -1;
14194 } else if (*kstart < 1 || *kstart > f2cmax(1,*k)) {
14195 *info = -2;
14196 } else if (f2cmax(1,*kstop) < *kstart || *kstop > f2cmax(1,*k)) {
14197 *info = -3;
14198 } else if (*n < *k) {
14199 *info = -4;
14200 } else if (*ldq < f2cmax(1,*k)) {
14201 *info = -7;
14202 } else if (*lds < f2cmax(1,*k)) {
14203 *info = -12;
14204 }
14205 if (*info != 0) {
14206 i__1 = -(*info);
14207 xerbla_("SLAED9", &i__1);
14208 return 0;
14209 }
14210
14211 /* Quick return if possible */
14212
14213 if (*k == 0) {
14214 return 0;
14215 }
14216
14217 /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
14218 be computed with high relative accuracy (barring over/underflow).
14219 This is a problem on machines without a guard digit in
14220 add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
14221 The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
14222 which on any of these machines zeros out the bottommost
14223 bit of DLAMDA(I) if it is 1; this makes the subsequent
14224 subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
14225 occurs. On binary machines with a guard digit (almost all
14226 machines) it does not change DLAMDA(I) at all. On hexadecimal
14227 and decimal machines with a guard digit, it slightly
14228 changes the bottommost bits of DLAMDA(I). It does not account
14229 for hexadecimal or decimal machines without guard digits
14230 (we know of none). We use a subroutine call to compute
14231 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
14232 this code. */
14233
14234 i__1 = *n;
14235 for (i__ = 1; i__ <= i__1; ++i__) {
14236 dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
14237 /* L10: */
14238 }
14239
14240 i__1 = *kstop;
14241 for (j = *kstart; j <= i__1; ++j) {
14242 slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info);
14243
14244 /* If the zero finder fails, the computation is terminated. */
14245
14246 if (*info != 0) {
14247 goto L120;
14248 }
14249 /* L20: */
14250 }
14251
14252 if (*k == 1 || *k == 2) {
14253 i__1 = *k;
14254 for (i__ = 1; i__ <= i__1; ++i__) {
14255 i__2 = *k;
14256 for (j = 1; j <= i__2; ++j) {
14257 s_ref(j, i__) = q_ref(j, i__);
14258 /* L30: */
14259 }
14260 /* L40: */
14261 }
14262 goto L120;
14263 }
14264
14265 /* Compute updated W. */
14266
14267 scopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
14268
14269 /* Initialize W(I) = Q(I,I) */
14270
14271 i__1 = *ldq + 1;
14272 scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
14273 i__1 = *k;
14274 for (j = 1; j <= i__1; ++j) {
14275 i__2 = j - 1;
14276 for (i__ = 1; i__ <= i__2; ++i__) {
14277 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
14278 /* L50: */
14279 }
14280 i__2 = *k;
14281 for (i__ = j + 1; i__ <= i__2; ++i__) {
14282 w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
14283 /* L60: */
14284 }
14285 /* L70: */
14286 }
14287 i__1 = *k;
14288 for (i__ = 1; i__ <= i__1; ++i__) {
14289 r__1 = sqrt(-w[i__]);
14290 w[i__] = r_sign(&r__1, &s_ref(i__, 1));
14291 /* L80: */
14292 }
14293
14294 /* Compute eigenvectors of the modified rank-1 modification. */
14295
14296 i__1 = *k;
14297 for (j = 1; j <= i__1; ++j) {
14298 i__2 = *k;
14299 for (i__ = 1; i__ <= i__2; ++i__) {
14300 q_ref(i__, j) = w[i__] / q_ref(i__, j);
14301 /* L90: */
14302 }
14303 temp = snrm2_(k, &q_ref(1, j), &c__1);
14304 i__2 = *k;
14305 for (i__ = 1; i__ <= i__2; ++i__) {
14306 s_ref(i__, j) = q_ref(i__, j) / temp;
14307 /* L100: */
14308 }
14309 /* L110: */
14310 }
14311
14312 L120:
14313 return 0;
14314
14315 /* End of SLAED9 */
14316
14317 } /* slaed9_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 15355 of file lapackblas.cpp. References c__1, c__2, givcol_ref, givnum_ref, integer, pow_ii(), q, real, scopy_(), sgemv_(), sqrt(), srot_(), and xerbla_(). Referenced by slaed7_(). 15359 {
15360 /* -- LAPACK routine (version 3.0) --
15361 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
15362 Courant Institute, Argonne National Lab, and Rice University
15363 September 30, 1994
15364
15365
15366 Purpose
15367 =======
15368
15369 SLAEDA computes the Z vector corresponding to the merge step in the
15370 CURLVLth step of the merge process with TLVLS steps for the CURPBMth
15371 problem.
15372
15373 Arguments
15374 =========
15375
15376 N (input) INTEGER
15377 The dimension of the symmetric tridiagonal matrix. N >= 0.
15378
15379 TLVLS (input) INTEGER
15380 The total number of merging levels in the overall divide and
15381 conquer tree.
15382
15383 CURLVL (input) INTEGER
15384 The current level in the overall merge routine,
15385 0 <= curlvl <= tlvls.
15386
15387 CURPBM (input) INTEGER
15388 The current problem in the current level in the overall
15389 merge routine (counting from upper left to lower right).
15390
15391 PRMPTR (input) INTEGER array, dimension (N lg N)
15392 Contains a list of pointers which indicate where in PERM a
15393 level's permutation is stored. PRMPTR(i+1) - PRMPTR(i)
15394 indicates the size of the permutation and incidentally the
15395 size of the full, non-deflated problem.
15396
15397 PERM (input) INTEGER array, dimension (N lg N)
15398 Contains the permutations (from deflation and sorting) to be
15399 applied to each eigenblock.
15400
15401 GIVPTR (input) INTEGER array, dimension (N lg N)
15402 Contains a list of pointers which indicate where in GIVCOL a
15403 level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i)
15404 indicates the number of Givens rotations.
15405
15406 GIVCOL (input) INTEGER array, dimension (2, N lg N)
15407 Each pair of numbers indicates a pair of columns to take place
15408 in a Givens rotation.
15409
15410 GIVNUM (input) REAL array, dimension (2, N lg N)
15411 Each number indicates the S value to be used in the
15412 corresponding Givens rotation.
15413
15414 Q (input) REAL array, dimension (N**2)
15415 Contains the square eigenblocks from previous levels, the
15416 starting positions for blocks are given by QPTR.
15417
15418 QPTR (input) INTEGER array, dimension (N+2)
15419 Contains a list of pointers which indicate where in Q an
15420 eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates
15421 the size of the block.
15422
15423 Z (output) REAL array, dimension (N)
15424 On output this vector contains the updating vector (the last
15425 row of the first sub-eigenvector matrix and the first row of
15426 the second sub-eigenvector matrix).
15427
15428 ZTEMP (workspace) REAL array, dimension (N)
15429
15430 INFO (output) INTEGER
15431 = 0: successful exit.
15432 < 0: if INFO = -i, the i-th argument had an illegal value.
15433
15434 Further Details
15435 ===============
15436
15437 Based on contributions by
15438 Jeff Rutter, Computer Science Division, University of California
15439 at Berkeley, USA
15440
15441 =====================================================================
15442
15443
15444 Test the input parameters.
15445
15446 Parameter adjustments */
15447 /* Table of constant values */
15448 static integer c__2 = 2;
15449 static integer c__1 = 1;
15450 static real c_b24 = 1.f;
15451 static real c_b26 = 0.f;
15452
15453 /* System generated locals */
15454 integer i__1, i__2, i__3;
15455 /* Builtin functions */
15456 integer pow_ii(integer *, integer *);
15457 // double sqrt(doublereal);
15458 /* Local variables */
15459 static integer curr;
15460 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
15461 integer *, real *, real *);
15462 static integer bsiz1, bsiz2, psiz1, psiz2, i__, k, zptr1;
15463 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *,
15464 real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
15465 xerbla_(const char *, integer *);
15466 static integer mid, ptr;
15467 #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1]
15468 #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1]
15469
15470
15471 --ztemp;
15472 --z__;
15473 --qptr;
15474 --q;
15475 givnum -= 3;
15476 givcol -= 3;
15477 --givptr;
15478 --perm;
15479 --prmptr;
15480
15481 /* Function Body */
15482 *info = 0;
15483
15484 if (*n < 0) {
15485 *info = -1;
15486 }
15487 if (*info != 0) {
15488 i__1 = -(*info);
15489 xerbla_("SLAEDA", &i__1);
15490 return 0;
15491 }
15492
15493 /* Quick return if possible */
15494
15495 if (*n == 0) {
15496 return 0;
15497 }
15498
15499 /* Determine location of first number in second half. */
15500
15501 mid = *n / 2 + 1;
15502
15503 /* Gather last/first rows of appropriate eigenblocks into center of Z */
15504
15505 ptr = 1;
15506
15507 /* Determine location of lowest level subproblem in the full storage
15508 scheme */
15509
15510 i__1 = *curlvl - 1;
15511 curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
15512
15513 /* Determine size of these matrices. We add HALF to the value of
15514 the SQRT in case the machine underestimates one of these square
15515 roots. */
15516
15517 bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
15518 bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
15519 i__1 = mid - bsiz1 - 1;
15520 for (k = 1; k <= i__1; ++k) {
15521 z__[k] = 0.f;
15522 /* L10: */
15523 }
15524 scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
15525 c__1);
15526 scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
15527 i__1 = *n;
15528 for (k = mid + bsiz2; k <= i__1; ++k) {
15529 z__[k] = 0.f;
15530 /* L20: */
15531 }
15532
15533 /* Loop thru remaining levels 1 -> CURLVL applying the Givens
15534 rotations and permutation and then multiplying the center matrices
15535 against the current Z. */
15536
15537 ptr = pow_ii(&c__2, tlvls) + 1;
15538 i__1 = *curlvl - 1;
15539 for (k = 1; k <= i__1; ++k) {
15540 i__2 = *curlvl - k;
15541 i__3 = *curlvl - k - 1;
15542 curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
15543 1;
15544 psiz1 = prmptr[curr + 1] - prmptr[curr];
15545 psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
15546 zptr1 = mid - psiz1;
15547
15548 /* Apply Givens at CURR and CURR+1 */
15549
15550 i__2 = givptr[curr + 1] - 1;
15551 for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
15552 srot_(&c__1, &z__[zptr1 + givcol_ref(1, i__) - 1], &c__1, &z__[
15553 zptr1 + givcol_ref(2, i__) - 1], &c__1, &givnum_ref(1,
15554 i__), &givnum_ref(2, i__));
15555 /* L30: */
15556 }
15557 i__2 = givptr[curr + 2] - 1;
15558 for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
15559 srot_(&c__1, &z__[mid - 1 + givcol_ref(1, i__)], &c__1, &z__[mid
15560 - 1 + givcol_ref(2, i__)], &c__1, &givnum_ref(1, i__), &
15561 givnum_ref(2, i__));
15562 /* L40: */
15563 }
15564 psiz1 = prmptr[curr + 1] - prmptr[curr];
15565 psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
15566 i__2 = psiz1 - 1;
15567 for (i__ = 0; i__ <= i__2; ++i__) {
15568 ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
15569 /* L50: */
15570 }
15571 i__2 = psiz2 - 1;
15572 for (i__ = 0; i__ <= i__2; ++i__) {
15573 ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
15574 1];
15575 /* L60: */
15576 }
15577
15578 /* Multiply Blocks at CURR and CURR+1
15579
15580 Determine size of these matrices. We add HALF to the value of
15581 the SQRT in case the machine underestimates one of these
15582 square roots. */
15583
15584 bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
15585 bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) +
15586 .5f);
15587 if (bsiz1 > 0) {
15588 sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, &
15589 ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1);
15590 }
15591 i__2 = psiz1 - bsiz1;
15592 scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
15593 if (bsiz2 > 0) {
15594 sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, &
15595 ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1);
15596 }
15597 i__2 = psiz2 - bsiz2;
15598 scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
15599 c__1);
15600
15601 i__2 = *tlvls - k;
15602 ptr += pow_ii(&c__2, &i__2);
15603 /* L70: */
15604 }
15605
15606 return 0;
15607
15608 /* End of SLAEDA */
15609
15610 } /* slaeda_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 2131 of file lapackblas.cpp. References b, dabs, integer, real, and sqrt(). Referenced by ssteqr_(). 02133 {
02134 /* -- LAPACK auxiliary routine (version 3.0) --
02135 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02136 Courant Institute, Argonne National Lab, and Rice University
02137 October 31, 1992
02138
02139
02140 Purpose
02141 =======
02142
02143 SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
02144 [ A B ]
02145 [ B C ].
02146 On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
02147 eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
02148 eigenvector for RT1, giving the decomposition
02149
02150 [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ]
02151 [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ].
02152
02153 Arguments
02154 =========
02155
02156 A (input) REAL
02157 The (1,1) element of the 2-by-2 matrix.
02158
02159 B (input) REAL
02160 The (1,2) element and the conjugate of the (2,1) element of
02161 the 2-by-2 matrix.
02162
02163 C (input) REAL
02164 The (2,2) element of the 2-by-2 matrix.
02165
02166 RT1 (output) REAL
02167 The eigenvalue of larger absolute value.
02168
02169 RT2 (output) REAL
02170 The eigenvalue of smaller absolute value.
02171
02172 CS1 (output) REAL
02173 SN1 (output) REAL
02174 The vector (CS1, SN1) is a unit right eigenvector for RT1.
02175
02176 Further Details
02177 ===============
02178
02179 RT1 is accurate to a few ulps barring over/underflow.
02180
02181 RT2 may be inaccurate if there is massive cancellation in the
02182 determinant A*C-B*B; higher precision or correctly rounded or
02183 correctly truncated arithmetic would be needed to compute RT2
02184 accurately in all cases.
02185
02186 CS1 and SN1 are accurate to a few ulps barring over/underflow.
02187
02188 Overflow is possible only if RT1 is within a factor of 5 of overflow.
02189 Underflow is harmless if the input data is 0 or exceeds
02190 underflow_threshold / macheps.
02191
02192 =====================================================================
02193
02194
02195 Compute the eigenvalues */
02196 /* System generated locals */
02197 real r__1;
02198 /* Builtin functions */
02199 // double sqrt(doublereal);
02200 /* Local variables */
02201 static real acmn, acmx, ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
02202 static integer sgn1, sgn2;
02203
02204
02205 sm = *a + *c__;
02206 df = *a - *c__;
02207 adf = dabs(df);
02208 tb = *b + *b;
02209 ab = dabs(tb);
02210 if (dabs(*a) > dabs(*c__)) {
02211 acmx = *a;
02212 acmn = *c__;
02213 } else {
02214 acmx = *c__;
02215 acmn = *a;
02216 }
02217 if (adf > ab) {
02218 /* Computing 2nd power */
02219 r__1 = ab / adf;
02220 rt = adf * sqrt(r__1 * r__1 + 1.f);
02221 } else if (adf < ab) {
02222 /* Computing 2nd power */
02223 r__1 = adf / ab;
02224 rt = ab * sqrt(r__1 * r__1 + 1.f);
02225 } else {
02226
02227 /* Includes case AB=ADF=0 */
02228
02229 rt = ab * sqrt(2.f);
02230 }
02231 if (sm < 0.f) {
02232 *rt1 = (sm - rt) * .5f;
02233 sgn1 = -1;
02234
02235 /* Order of execution important.
02236 To get fully accurate smaller eigenvalue,
02237 next line needs to be executed in higher precision. */
02238
02239 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
02240 } else if (sm > 0.f) {
02241 *rt1 = (sm + rt) * .5f;
02242 sgn1 = 1;
02243
02244 /* Order of execution important.
02245 To get fully accurate smaller eigenvalue,
02246 next line needs to be executed in higher precision. */
02247
02248 *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
02249 } else {
02250
02251 /* Includes case RT1 = RT2 = 0 */
02252
02253 *rt1 = rt * .5f;
02254 *rt2 = rt * -.5f;
02255 sgn1 = 1;
02256 }
02257
02258 /* Compute the eigenvector */
02259
02260 if (df >= 0.f) {
02261 cs = df + rt;
02262 sgn2 = 1;
02263 } else {
02264 cs = df - rt;
02265 sgn2 = -1;
02266 }
02267 acs = dabs(cs);
02268 if (acs > ab) {
02269 ct = -tb / cs;
02270 *sn1 = 1.f / sqrt(ct * ct + 1.f);
02271 *cs1 = ct * *sn1;
02272 } else {
02273 if (ab == 0.f) {
02274 *cs1 = 1.f;
02275 *sn1 = 0.f;
02276 } else {
02277 tn = -cs / tb;
02278 *cs1 = 1.f / sqrt(tn * tn + 1.f);
02279 *sn1 = tn * *cs1;
02280 }
02281 }
02282 if (sgn1 == sgn2) {
02283 tn = *cs1;
02284 *cs1 = -(*sn1);
02285 *sn1 = tn;
02286 }
02287 return 0;
02288
02289 /* End of SLAEV2 */
02290
02291 } /* slaev2_ */
|
|
||||||||||||||||||||
|
"e2msa.py test.hdf basis.hdf --nbasis=5 --verbose" will occupying 100% cpu and never finish on some platform. This printf statement can fix it. Don't now why. --Grant Tang Definition at line 2423 of file lapackblas.cpp. References b, doublereal, integer, logical, real, slamc3_(), and t. Referenced by slamc2_(). 02425 {
02426 /* -- LAPACK auxiliary routine (version 3.0) --
02427 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02428 Courant Institute, Argonne National Lab, and Rice University
02429 October 31, 1992
02430
02431
02432 Purpose
02433 =======
02434
02435 SLAMC1 determines the machine parameters given by BETA, T, RND, and
02436 IEEE1.
02437
02438 Arguments
02439 =========
02440
02441 BETA (output) INTEGER
02442 The base of the machine.
02443
02444 T (output) INTEGER
02445 The number of ( BETA ) digits in the mantissa.
02446
02447 RND (output) LOGICAL
02448 Specifies whether proper rounding ( RND = .TRUE. ) or
02449 chopping ( RND = .FALSE. ) occurs in addition. This may not
02450
02451 be a reliable guide to the way in which the machine performs
02452
02453 its arithmetic.
02454
02455 IEEE1 (output) LOGICAL
02456 Specifies whether rounding appears to be done in the IEEE
02457 'round to nearest' style.
02458
02459 Further Details
02460 ===============
02461
02462 The routine is based on the routine ENVRON by Malcolm and
02463 incorporates suggestions by Gentleman and Marovich. See
02464
02465 Malcolm M. A. (1972) Algorithms to reveal properties of
02466 floating-point arithmetic. Comms. of the ACM, 15, 949-951.
02467
02468 Gentleman W. M. and Marovich S. B. (1974) More on algorithms
02469 that reveal properties of floating point arithmetic units.
02470 Comms. of the ACM, 17, 276-277.
02471
02472 =====================================================================
02473 */
02474 /* Initialized data */
02475 static logical first = TRUE_;
02476 /* System generated locals */
02477 real r__1, r__2;
02478 /* Local variables */
02479 static logical lrnd;
02480 static real a, b, c, f;
02481 static integer lbeta;
02482 static real savec;
02483 static logical lieee1;
02484 static real t1, t2;
02485 extern doublereal slamc3_(real *, real *);
02486 static integer lt;
02487 static real one, qtr;
02488
02489
02490
02491 if (first) {
02492 first = FALSE_;
02493 one = 1.f;
02494
02495 /* LBETA, LIEEE1, LT and LRND are the local values of BE
02496 TA,
02497 IEEE1, T and RND.
02498
02499 Throughout this routine we use the function SLAMC3 to ens
02500 ure
02501 that relevant values are stored and not held in registers,
02502 or
02503 are not affected by optimizers.
02504
02505 Compute a = 2.0**m with the smallest positive integer m s
02506 uch
02507 that
02508
02509 fl( a + 1.0 ) = a. */
02510
02511 a = 1.f;
02512 c = 1.f;
02513
02514 /* + WHILE( C.EQ.ONE )LOOP */
02515 L10:
02516 if (c == one) {
02517 a *= 2;
02518 c = slamc3_(&a, &one);
02519 r__1 = -(doublereal)a;
02520 c = slamc3_(&c, &r__1);
02521 goto L10;
02522 }
02523 /* + END WHILE
02524
02525 Now compute b = 2.0**m with the smallest positive integer
02526 m
02527 such that
02528
02529 fl( a + b ) .gt. a. */
02530
02531 b = 1.f;
02532 c = slamc3_(&a, &b);
02533
02537 printf("\n");
02538
02539 /* + WHILE( C.EQ.A )LOOP */
02540 L20:
02541 if (c == a) {
02542 b *= 2;
02543 c = slamc3_(&a, &b);
02544 goto L20;
02545 }
02546 /* + END WHILE
02547
02548 Now compute the base. a and c are neighbouring floating po
02549 int
02550 numbers in the interval ( beta**t, beta**( t + 1 ) ) and
02551 so
02552 their difference is beta. Adding 0.25 to c is to ensure that
02553 it
02554 is truncated to beta and not ( beta - 1 ). */
02555
02556 qtr = one / 4;
02557 savec = c;
02558 r__1 = -(doublereal)a;
02559 c = slamc3_(&c, &r__1);
02560 lbeta = static_cast<integer>(c + qtr);
02561
02562 /* Now determine whether rounding or chopping occurs, by addin
02563 g a
02564 bit less than beta/2 and a bit more than beta/2 to
02565 a. */
02566
02567 b = (real) lbeta;
02568 r__1 = b / 2;
02569 r__2 = -(doublereal)b / 100;
02570 f = slamc3_(&r__1, &r__2);
02571 c = slamc3_(&f, &a);
02572 if (c == a) {
02573 lrnd = TRUE_;
02574 } else {
02575 lrnd = FALSE_;
02576 }
02577 r__1 = b / 2;
02578 r__2 = b / 100;
02579 f = slamc3_(&r__1, &r__2);
02580 c = slamc3_(&f, &a);
02581 if (lrnd && c == a) {
02582 lrnd = FALSE_;
02583 }
02584
02585 /* Try and decide whether rounding is done in the IEEE 'round
02586 to
02587 nearest' style. B/2 is half a unit in the last place of the
02588 two
02589 numbers A and SAVEC. Furthermore, A is even, i.e. has last
02590 bit
02591 zero, and SAVEC is odd. Thus adding B/2 to A should not cha
02592 nge
02593 A, but adding B/2 to SAVEC should change SAVEC. */
02594
02595 r__1 = b / 2;
02596 t1 = slamc3_(&r__1, &a);
02597 r__1 = b / 2;
02598 t2 = slamc3_(&r__1, &savec);
02599 lieee1 = t1 == a && t2 > savec && lrnd;
02600
02601 /* Now find the mantissa, t. It should be the integer part
02602 of
02603 log to the base beta of a, however it is safer to determine
02604 t
02605 by powering. So we find t as the smallest positive integer
02606 for
02607 which
02608
02609 fl( beta**t + 1.0 ) = 1.0. */
02610
02611 lt = 0;
02612 a = 1.f;
02613 c = 1.f;
02614
02615 /* + WHILE( C.EQ.ONE )LOOP */
02616 L30:
02617 if (c == one) {
02618 ++lt;
02619 a *= lbeta;
02620 c = slamc3_(&a, &one);
02621 r__1 = -(doublereal)a;
02622 c = slamc3_(&c, &r__1);
02623 goto L30;
02624 }
02625 /* + END WHILE */
02626
02627 }
02628
02629 *beta = lbeta;
02630 *t = lt;
02631 *rnd = lrnd;
02632 *ieee1 = lieee1;
02633 return 0;
02634
02635 /* End of SLAMC1 */
02636
02637 } /* slamc1_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 2641 of file lapackblas.cpp. References abs, b, dabs, doublereal, f2cmax, f2cmin, integer, logical, pow_ri(), real, slamc1_(), slamc3_(), slamc4_(), slamc5_(), and t. Referenced by slamch_(). 02643 {
02644 /* -- LAPACK auxiliary routine (version 3.0) --
02645 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02646 Courant Institute, Argonne National Lab, and Rice University
02647 October 31, 1992
02648
02649
02650 Purpose
02651 =======
02652
02653 SLAMC2 determines the machine parameters specified in its argument
02654 list.
02655
02656 Arguments
02657 =========
02658
02659 BETA (output) INTEGER
02660 The base of the machine.
02661
02662 T (output) INTEGER
02663 The number of ( BETA ) digits in the mantissa.
02664
02665 RND (output) LOGICAL
02666 Specifies whether proper rounding ( RND = .TRUE. ) or
02667 chopping ( RND = .FALSE. ) occurs in addition. This may not
02668
02669 be a reliable guide to the way in which the machine performs
02670
02671 its arithmetic.
02672
02673 EPS (output) REAL
02674 The smallest positive number such that
02675
02676 fl( 1.0 - EPS ) .LT. 1.0,
02677
02678 where fl denotes the computed value.
02679
02680 EMIN (output) INTEGER
02681 The minimum exponent before (gradual) underflow occurs.
02682
02683 RMIN (output) REAL
02684 The smallest normalized number for the machine, given by
02685 BASE**( EMIN - 1 ), where BASE is the floating point value
02686
02687 of BETA.
02688
02689 EMAX (output) INTEGER
02690 The maximum exponent before overflow occurs.
02691
02692 RMAX (output) REAL
02693 The largest positive number for the machine, given by
02694 BASE**EMAX * ( 1 - EPS ), where BASE is the floating point
02695
02696 value of BETA.
02697
02698 Further Details
02699 ===============
02700
02701 The computation of EPS is based on a routine PARANOIA by
02702 W. Kahan of the University of California at Berkeley.
02703
02704 =====================================================================
02705 */
02706 /* Table of constant values */
02707 // static integer c__1 = 1; //not used in this function
02708
02709 /* Initialized data */
02710 static logical first = TRUE_;
02711 static logical iwarn = FALSE_;
02712 /* System generated locals */
02713 integer i__1;
02714 real r__1, r__2, r__3, r__4, r__5;
02715 /* Builtin functions */
02716 double pow_ri(real *, integer *);
02717 /* Local variables */
02718 static logical ieee;
02719 static real half;
02720 static logical lrnd;
02721 static real leps, zero, a, b, c;
02722 static integer i, lbeta;
02723 static real rbase;
02724 static integer lemin, lemax, gnmin;
02725 static real small;
02726 static integer gpmin;
02727 static real third, lrmin, lrmax, sixth;
02728 static logical lieee1;
02729 extern /* Subroutine */ int slamc1_(integer *, integer *, logical *,
02730 logical *);
02731 extern doublereal slamc3_(real *, real *);
02732 extern /* Subroutine */ int slamc4_(integer *, real *, integer *),
02733 slamc5_(integer *, integer *, integer *, logical *, integer *,
02734 real *);
02735 static integer lt, ngnmin, ngpmin;
02736 static real one, two;
02737
02738
02739
02740 if (first) {
02741 first = FALSE_;
02742 zero = 0.f;
02743 one = 1.f;
02744 two = 2.f;
02745
02746 /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values
02747 of
02748 BETA, T, RND, EPS, EMIN and RMIN.
02749
02750 Throughout this routine we use the function SLAMC3 to ens
02751 ure
02752 that relevant values are stored and not held in registers,
02753 or
02754 are not affected by optimizers.
02755
02756 SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1.
02757 */
02758
02759 slamc1_(&lbeta, <, &lrnd, &lieee1);
02760
02761 /* Start to find EPS. */
02762
02763 b = (real) lbeta;
02764 i__1 = -lt;
02765 a = pow_ri(&b, &i__1);
02766 leps = a;
02767
02768 /* Try some tricks to see whether or not this is the correct E
02769 PS. */
02770
02771 b = two / 3;
02772 half = one / 2;
02773 r__1 = -(doublereal)half;
02774 sixth = slamc3_(&b, &r__1);
02775 third = slamc3_(&sixth, &sixth);
02776 r__1 = -(doublereal)half;
02777 b = slamc3_(&third, &r__1);
02778 b = slamc3_(&b, &sixth);
02779 b = dabs(b);
02780 if (b < leps) {
02781 b = leps;
02782 }
02783
02784 leps = 1.f;
02785
02786 /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
02787 L10:
02788 if (leps > b && b > zero) {
02789 leps = b;
02790 r__1 = half * leps;
02791 /* Computing 5th power */
02792 r__3 = two, r__4 = r__3, r__3 *= r__3;
02793 /* Computing 2nd power */
02794 r__5 = leps;
02795 r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
02796 c = slamc3_(&r__1, &r__2);
02797 r__1 = -(doublereal)c;
02798 c = slamc3_(&half, &r__1);
02799 b = slamc3_(&half, &c);
02800 r__1 = -(doublereal)b;
02801 c = slamc3_(&half, &r__1);
02802 b = slamc3_(&half, &c);
02803 goto L10;
02804 }
02805 /* + END WHILE */
02806
02807 if (a < leps) {
02808 leps = a;
02809 }
02810
02811 /* Computation of EPS complete.
02812
02813 Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3
02814 )).
02815 Keep dividing A by BETA until (gradual) underflow occurs. T
02816 his
02817 is detected when we cannot recover the previous A. */
02818
02819 rbase = one / lbeta;
02820 small = one;
02821 for (i = 1; i <= 3; ++i) {
02822 r__1 = small * rbase;
02823 small = slamc3_(&r__1, &zero);
02824 /* L20: */
02825 }
02826 a = slamc3_(&one, &small);
02827 slamc4_(&ngpmin, &one, &lbeta);
02828 r__1 = -(doublereal)one;
02829 slamc4_(&ngnmin, &r__1, &lbeta);
02830 slamc4_(&gpmin, &a, &lbeta);
02831 r__1 = -(doublereal)a;
02832 slamc4_(&gnmin, &r__1, &lbeta);
02833 ieee = FALSE_;
02834
02835 if (ngpmin == ngnmin && gpmin == gnmin) {
02836 if (ngpmin == gpmin) {
02837 lemin = ngpmin;
02838 /* ( Non twos-complement machines, no gradual under
02839 flow;
02840 e.g., VAX ) */
02841 } else if (gpmin - ngpmin == 3) {
02842 lemin = ngpmin - 1 + lt;
02843 ieee = TRUE_;
02844 /* ( Non twos-complement machines, with gradual und
02845 erflow;
02846 e.g., IEEE standard followers ) */
02847 } else {
02848 lemin = f2cmin(ngpmin,gpmin);
02849 /* ( A guess; no known machine ) */
02850 iwarn = TRUE_;
02851 }
02852
02853 } else if (ngpmin == gpmin && ngnmin == gnmin) {
02854 if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
02855 lemin = f2cmax(ngpmin,ngnmin);
02856 /* ( Twos-complement machines, no gradual underflow
02857 ;
02858 e.g., CYBER 205 ) */
02859 } else {
02860 lemin = f2cmin(ngpmin,ngnmin);
02861 /* ( A guess; no known machine ) */
02862 iwarn = TRUE_;
02863 }
02864
02865 } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
02866 {
02867 if (gpmin - f2cmin(ngpmin,ngnmin) == 3) {
02868 lemin = f2cmax(ngpmin,ngnmin) - 1 + lt;
02869 /* ( Twos-complement machines with gradual underflo
02870 w;
02871 no known machine ) */
02872 } else {
02873 lemin = f2cmin(ngpmin,ngnmin);
02874 /* ( A guess; no known machine ) */
02875 iwarn = TRUE_;
02876 }
02877
02878 } else {
02879 /* Computing MIN */
02880 i__1 = f2cmin(ngpmin,ngnmin), i__1 = f2cmin(i__1,gpmin);
02881 lemin = f2cmin(i__1,gnmin);
02882 /* ( A guess; no known machine ) */
02883 iwarn = TRUE_;
02884 }
02885 /* **
02886 Comment out this if block if EMIN is ok */
02887 if (iwarn) {
02888 first = TRUE_;
02889 printf("\n\n WARNING. The value EMIN may be incorrect:- ");
02890 printf("EMIN = %8i\n",lemin);
02891 printf("If, after inspection, the value EMIN looks acceptable");
02892 printf("please comment out \n the IF block as marked within the");
02893 printf("code of routine SLAMC2, \n otherwise supply EMIN");
02894 printf("explicitly.\n");
02895 }
02896 /* **
02897
02898 Assume IEEE arithmetic if we found denormalised numbers abo
02899 ve,
02900 or if arithmetic seems to round in the IEEE style, determi
02901 ned
02902 in routine SLAMC1. A true IEEE machine should have both thi
02903 ngs
02904 true; however, faulty machines may have one or the other. */
02905
02906 ieee = ieee || lieee1;
02907
02908 /* Compute RMIN by successive division by BETA. We could comp
02909 ute
02910 RMIN as BASE**( EMIN - 1 ), but some machines underflow dur
02911 ing
02912 this computation. */
02913
02914 lrmin = 1.f;
02915 i__1 = 1 - lemin;
02916 for (i = 1; i <= 1-lemin; ++i) {
02917 r__1 = lrmin * rbase;
02918 lrmin = slamc3_(&r__1, &zero);
02919 /* L30: */
02920 }
02921
02922 /* Finally, call SLAMC5 to compute EMAX and RMAX. */
02923
02924 slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax);
02925 }
02926
02927 *beta = lbeta;
02928 *t = lt;
02929 *rnd = lrnd;
02930 *eps = leps;
02931 *emin = lemin;
02932 *rmin = lrmin;
02933 *emax = lemax;
02934 *rmax = lrmax;
02935
02936 return 0;
02937
02938
02939 /* End of SLAMC2 */
02940
02941 } /* slamc2_ */
|
|
||||||||||||
|
Definition at line 2945 of file lapackblas.cpp. References doublereal, and real. Referenced by slaed3_(), slaed9_(), slamc1_(), slamc2_(), slamc4_(), and slamc5_(). 02946 {
02947 /* -- LAPACK auxiliary routine (version 3.0) --
02948 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02949 Courant Institute, Argonne National Lab, and Rice University
02950 October 31, 1992
02951
02952
02953 Purpose
02954 =======
02955
02956 SLAMC3 is intended to force A and B to be stored prior to doing
02957
02958 the addition of A and B , for use in situations where optimizers
02959
02960 might hold one of these in a register.
02961
02962 Arguments
02963 =========
02964
02965 A, B (input) REAL
02966 The values A and B.
02967
02968 =====================================================================
02969 */
02970 /* >>Start of File<<
02971 System generated locals */
02972 real ret_val;
02973
02974
02975
02976 ret_val = *a + *b;
02977
02978 return ret_val;
02979
02980 /* End of SLAMC3 */
02981
02982 } /* slamc3_ */
|
|
||||||||||||||||
|
Definition at line 2986 of file lapackblas.cpp. References doublereal, integer, real, and slamc3_(). Referenced by slamc2_(). 02987 {
02988 /* -- LAPACK auxiliary routine (version 3.0) --
02989 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02990 Courant Institute, Argonne National Lab, and Rice University
02991 October 31, 1992
02992
02993
02994 Purpose
02995 =======
02996
02997 SLAMC4 is a service routine for SLAMC2.
02998
02999 Arguments
03000 =========
03001
03002 EMIN (output) EMIN
03003 The minimum exponent before (gradual) underflow, computed by
03004
03005 setting A = START and dividing by BASE until the previous A
03006 can not be recovered.
03007
03008 START (input) REAL
03009 The starting point for determining EMIN.
03010
03011 BASE (input) INTEGER
03012 The base of the machine.
03013
03014 =====================================================================
03015 */
03016 /* System generated locals */
03017 integer i__1;
03018 real r__1;
03019 /* Local variables */
03020 static real zero, a;
03021 static integer i;
03022 static real rbase, b1, b2, c1, c2, d1, d2;
03023 extern doublereal slamc3_(real *, real *);
03024 static real one;
03025
03026
03027
03028 a = *start;
03029 one = 1.f;
03030 rbase = one / *base;
03031 zero = 0.f;
03032 *emin = 1;
03033 r__1 = a * rbase;
03034 b1 = slamc3_(&r__1, &zero);
03035 c1 = a;
03036 c2 = a;
03037 d1 = a;
03038 d2 = a;
03039 /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
03040 $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */
03041 L10:
03042 if (c1 == a && c2 == a && d1 == a && d2 == a) {
03043 --(*emin);
03044 a = b1;
03045 r__1 = a / *base;
03046 b1 = slamc3_(&r__1, &zero);
03047 r__1 = b1 * *base;
03048 c1 = slamc3_(&r__1, &zero);
03049 d1 = zero;
03050 i__1 = *base;
03051 for (i = 1; i <= *base; ++i) {
03052 d1 += b1;
03053 /* L20: */
03054 }
03055 r__1 = a * rbase;
03056 b2 = slamc3_(&r__1, &zero);
03057 r__1 = b2 / rbase;
03058 c2 = slamc3_(&r__1, &zero);
03059 d2 = zero;
03060 i__1 = *base;
03061 for (i = 1; i <= *base; ++i) {
03062 d2 += b2;
03063 /* L30: */
03064 }
03065 goto L10;
03066 }
03067 /* + END WHILE */
03068
03069 return 0;
03070
03071 /* End of SLAMC4 */
03072
03073 } /* slamc4_ */
|
|
||||||||||||||||||||||||||||
|
Definition at line 3077 of file lapackblas.cpp. References doublereal, integer, real, slamc3_(), and y. Referenced by slamc2_(). 03079 {
03080 /* -- LAPACK auxiliary routine (version 3.0) --
03081 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
03082 Courant Institute, Argonne National Lab, and Rice University
03083 October 31, 1992
03084
03085
03086 Purpose
03087 =======
03088
03089 SLAMC5 attempts to compute RMAX, the largest machine floating-point
03090 number, without overflow. It assumes that EMAX + abs(EMIN) sum
03091 approximately to a power of 2. It will fail on machines where this
03092 assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
03093
03094 EMAX = 28718). It will also fail if the value supplied for EMIN is
03095 too large (i.e. too close to zero), probably with overflow.
03096
03097 Arguments
03098 =========
03099
03100 BETA (input) INTEGER
03101 The base of floating-point arithmetic.
03102
03103 P (input) INTEGER
03104 The number of base BETA digits in the mantissa of a
03105 floating-point value.
03106
03107 EMIN (input) INTEGER
03108 The minimum exponent before (gradual) underflow.
03109
03110 IEEE (input) LOGICAL
03111 A logical flag specifying whether or not the arithmetic
03112 system is thought to comply with the IEEE standard.
03113
03114 EMAX (output) INTEGER
03115 The largest exponent before overflow
03116
03117 RMAX (output) REAL
03118 The largest machine floating-point number.
03119
03120 =====================================================================
03121
03122
03123
03124 First compute LEXP and UEXP, two powers of 2 that bound
03125 abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
03126 approximately to the bound that is closest to abs(EMIN).
03127 (EMAX is the exponent of the required number RMAX). */
03128 /* Table of constant values */
03129 static real c_b5 = 0.f;
03130
03131 /* System generated locals */
03132 integer i__1;
03133 real r__1;
03134 /* Local variables */
03135 static integer lexp;
03136 static real oldy;
03137 static integer uexp, i;
03138 static real y, z;
03139 static integer nbits;
03140 extern doublereal slamc3_(real *, real *);
03141 static real recbas;
03142 static integer exbits, expsum, try__;
03143
03144
03145
03146 lexp = 1;
03147 exbits = 1;
03148 L10:
03149 try__ = lexp << 1;
03150 if (try__ <= -(*emin)) {
03151 lexp = try__;
03152 ++exbits;
03153 goto L10;
03154 }
03155 if (lexp == -(*emin)) {
03156 uexp = lexp;
03157 } else {
03158 uexp = try__;
03159 ++exbits;
03160 }
03161
03162 /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater
03163 than or equal to EMIN. EXBITS is the number of bits needed to
03164 store the exponent. */
03165
03166 if (uexp + *emin > -lexp - *emin) {
03167 expsum = lexp << 1;
03168 } else {
03169 expsum = uexp << 1;
03170 }
03171
03172 /* EXPSUM is the exponent range, approximately equal to
03173 EMAX - EMIN + 1 . */
03174
03175 *emax = expsum + *emin - 1;
03176 nbits = exbits + 1 + *p;
03177
03178 /* NBITS is the total number of bits needed to store a
03179 floating-point number. */
03180
03181 if (nbits % 2 == 1 && *beta == 2) {
03182
03183 /* Either there are an odd number of bits used to store a
03184 floating-point number, which is unlikely, or some bits are
03185
03186 not used in the representation of numbers, which is possible
03187 ,
03188 (e.g. Cray machines) or the mantissa has an implicit bit,
03189 (e.g. IEEE machines, Dec Vax machines), which is perhaps the
03190
03191 most likely. We have to assume the last alternative.
03192 If this is true, then we need to reduce EMAX by one because
03193
03194 there must be some way of representing zero in an implicit-b
03195 it
03196 system. On machines like Cray, we are reducing EMAX by one
03197
03198 unnecessarily. */
03199
03200 --(*emax);
03201 }
03202
03203 if (*ieee) {
03204
03205 /* Assume we are on an IEEE machine which reserves one exponent
03206
03207 for infinity and NaN. */
03208
03209 --(*emax);
03210 }
03211
03212 /* Now create RMAX, the largest machine number, which should
03213 be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
03214
03215 First compute 1.0 - BETA**(-P), being careful that the
03216 result is less than 1.0 . */
03217
03218 recbas = 1.f / *beta;
03219 z = *beta - 1.f;
03220 y = 0.f;
03221 i__1 = *p;
03222 for (i = 1; i <= *p; ++i) {
03223 z *= recbas;
03224 if (y < 1.f) {
03225 oldy = y;
03226 }
03227 y = slamc3_(&y, &z);
03228 /* L20: */
03229 }
03230 if (y >= 1.f) {
03231 y = oldy;
03232 }
03233
03234 /* Now multiply by BETA**EMAX to get RMAX. */
03235
03236 i__1 = *emax;
03237 for (i = 1; i <= *emax; ++i) {
03238 r__1 = y * *beta;
03239 y = slamc3_(&r__1, &c_b5);
03240 /* L30: */
03241 }
03242
03243 *rmax = y;
03244 return 0;
03245
03246 /* End of SLAMC5 */
03247
03248 } /* slamc5_ */
|
|
|
Definition at line 2295 of file lapackblas.cpp. References doublereal, integer, logical, lsame_(), pow_ri(), real, slamc2_(), and t. Referenced by sbdsqr_(), sgesvd_(), slaed2_(), slaed4_(), slaed6_(), slaed8_(), slarfg_(), slartg_(), slascl_(), slasq1_(), slasq2_(), slasq3_(), slasq6_(), slasv2_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), and ssyev_(). 02296 {
02297 /* -- LAPACK auxiliary routine (version 3.0) --
02298 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
02299 Courant Institute, Argonne National Lab, and Rice University
02300 October 31, 1992
02301
02302
02303 Purpose
02304 =======
02305
02306 SLAMCH determines single precision machine parameters.
02307
02308 Arguments
02309 =========
02310
02311 CMACH (input) CHARACTER*1
02312 Specifies the value to be returned by SLAMCH:
02313 = 'E' or 'e', SLAMCH := eps
02314 = 'S' or 's , SLAMCH := sfmin
02315 = 'B' or 'b', SLAMCH := base
02316 = 'P' or 'p', SLAMCH := eps*base
02317 = 'N' or 'n', SLAMCH := t
02318 = 'R' or 'r', SLAMCH := rnd
02319 = 'M' or 'm', SLAMCH := emin
02320 = 'U' or 'u', SLAMCH := rmin
02321 = 'L' or 'l', SLAMCH := emax
02322 = 'O' or 'o', SLAMCH := rmax
02323
02324 where
02325
02326 eps = relative machine precision
02327 sfmin = safe minimum, such that 1/sfmin does not overflow
02328 base = base of the machine
02329 prec = eps*base
02330 t = number of (base) digits in the mantissa
02331 rnd = 1.0 when rounding occurs in addition, 0.0 otherwise
02332 emin = minimum exponent before (gradual) underflow
02333 rmin = underflow threshold - base**(emin-1)
02334 emax = largest exponent before overflow
02335 rmax = overflow threshold - (base**emax)*(1-eps)
02336
02337 =====================================================================
02338 */
02339 /* >>Start of File<<
02340 Initialized data */
02341 static logical first = TRUE_;
02342 /* System generated locals */
02343 integer i__1;
02344 real ret_val;
02345 /* Builtin functions */
02346 double pow_ri(real *, integer *);
02347 /* Local variables */
02348 static real base;
02349 static integer beta;
02350 static real emin, prec, emax;
02351 static integer imin, imax;
02352 static logical lrnd;
02353 static real rmin, rmax, t, rmach;
02354 extern logical lsame_(const char *, const char *);
02355 static real small, sfmin;
02356 extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real
02357 *, integer *, real *, integer *, real *);
02358 static integer it;
02359 static real rnd, eps;
02360
02361
02362
02363 if (first) {
02364 first = FALSE_;
02365 slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
02366 base = (real) beta;
02367 t = (real) it;
02368 if (lrnd) {
02369 rnd = 1.f;
02370 i__1 = 1 - it;
02371 eps = pow_ri(&base, &i__1) / 2;
02372 } else {
02373 rnd = 0.f;
02374 i__1 = 1 - it;
02375 eps = pow_ri(&base, &i__1);
02376 }
02377 prec = eps * base;
02378 emin = (real) imin;
02379 emax = (real) imax;
02380 sfmin = rmin;
02381 small = 1.f / rmax;
02382 if (small >= sfmin) {
02383
02384 /* Use SMALL plus a bit, to avoid the possibility of rou
02385 nding
02386 causing overflow when computing 1/sfmin. */
02387
02388 sfmin = small * (eps + 1.f);
02389 }
02390 }
02391
02392 if (lsame_(cmach, "E")) {
02393 rmach = eps;
02394 } else if (lsame_(cmach, "S")) {
02395 rmach = sfmin;
02396 } else if (lsame_(cmach, "B")) {
02397 rmach = base;
02398 } else if (lsame_(cmach, "P")) {
02399 rmach = prec;
02400 } else if (lsame_(cmach, "N")) {
02401 rmach = t;
02402 } else if (lsame_(cmach, "R")) {
02403 rmach = rnd;
02404 } else if (lsame_(cmach, "M")) {
02405 rmach = emin;
02406 } else if (lsame_(cmach, "U")) {
02407 rmach = rmin;
02408 } else if (lsame_(cmach, "L")) {
02409 rmach = emax;
02410 } else if (lsame_(cmach, "O")) {
02411 rmach = rmax;
02412 }
02413
02414 ret_val = rmach;
02415 return ret_val;
02416
02417 /* End of SLAMCH */
02418
02419 } /* slamch_ */
|
|
||||||||||||||||||||||||||||
|
Definition at line 12994 of file lapackblas.cpp. References integer. Referenced by slaed1_(), slaed2_(), slaed7_(), and slaed8_(). 12996 {
12997 /* -- LAPACK routine (version 3.0) --
12998 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12999 Courant Institute, Argonne National Lab, and Rice University
13000 September 30, 1994
13001
13002
13003 Purpose
13004 =======
13005
13006 SLAMRG will create a permutation list which will merge the elements
13007 of A (which is composed of two independently sorted sets) into a
13008 single set which is sorted in ascending order.
13009
13010 Arguments
13011 =========
13012
13013 N1 (input) INTEGER
13014 N2 (input) INTEGER
13015 These arguements contain the respective lengths of the two
13016 sorted lists to be merged.
13017
13018 A (input) REAL array, dimension (N1+N2)
13019 The first N1 elements of A contain a list of numbers which
13020 are sorted in either ascending or descending order. Likewise
13021 for the final N2 elements.
13022
13023 STRD1 (input) INTEGER
13024 STRD2 (input) INTEGER
13025 These are the strides to be taken through the array A.
13026 Allowable strides are 1 and -1. They indicate whether a
13027 subset of A is sorted in ascending (STRDx = 1) or descending
13028 (STRDx = -1) order.
13029
13030 INDEX (output) INTEGER array, dimension (N1+N2)
13031 On exit this array will contain a permutation such that
13032 if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
13033 sorted in ascending order.
13034
13035 =====================================================================
13036
13037
13038 Parameter adjustments */
13039 /* System generated locals */
13040 integer i__1;
13041 /* Local variables */
13042 static integer i__, ind1, ind2, n1sv, n2sv;
13043
13044 --index;
13045 --a;
13046
13047 /* Function Body */
13048 n1sv = *n1;
13049 n2sv = *n2;
13050 if (*strd1 > 0) {
13051 ind1 = 1;
13052 } else {
13053 ind1 = *n1;
13054 }
13055 if (*strd2 > 0) {
13056 ind2 = *n1 + 1;
13057 } else {
13058 ind2 = *n1 + *n2;
13059 }
13060 i__ = 1;
13061 /* while ( (N1SV > 0) & (N2SV > 0) ) */
13062 L10:
13063 if (n1sv > 0 && n2sv > 0) {
13064 if (a[ind1] <= a[ind2]) {
13065 index[i__] = ind1;
13066 ++i__;
13067 ind1 += *strd1;
13068 --n1sv;
13069 } else {
13070 index[i__] = ind2;
13071 ++i__;
13072 ind2 += *strd2;
13073 --n2sv;
13074 }
13075 goto L10;
13076 }
13077 /* end while */
13078 if (n1sv == 0) {
13079 i__1 = n2sv;
13080 for (n1sv = 1; n1sv <= i__1; ++n1sv) {
13081 index[i__] = ind2;
13082 ++i__;
13083 ind2 += *strd2;
13084 /* L20: */
13085 }
13086 } else {
13087 /* N2SV .EQ. 0 */
13088 i__1 = n1sv;
13089 for (n2sv = 1; n2sv <= i__1; ++n2sv) {
13090 index[i__] = ind1;
13091 ++i__;
13092 ind1 += *strd1;
13093 /* L30: */
13094 }
13095 }
13096
13097 return 0;
13098
13099 /* End of SLAMRG */
13100
13101 } /* slamrg_ */
|
|
||||||||||||||||||||||||||||
|
Definition at line 20853 of file lapackblas.cpp. References a_ref, c__1, dabs, df2cmax, doublereal, f2cmin, integer, logical, lsame_(), norm(), real, slassq_(), and sqrt(). Referenced by sgesvd_(). 20855 {
20856 /* -- LAPACK auxiliary routine (version 3.0) --
20857 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20858 Courant Institute, Argonne National Lab, and Rice University
20859 October 31, 1992
20860
20861
20862 Purpose
20863 =======
20864
20865 SLANGE returns the value of the one norm, or the Frobenius norm, or
20866 the infinity norm, or the element of largest absolute value of a
20867 real matrix A.
20868
20869 Description
20870 ===========
20871
20872 SLANGE returns the value
20873
20874 SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
20875 (
20876 ( norm1(A), NORM = '1', 'O' or 'o'
20877 (
20878 ( normI(A), NORM = 'I' or 'i'
20879 (
20880 ( normF(A), NORM = 'F', 'f', 'E' or 'e'
20881
20882 where norm1 denotes the one norm of a matrix (maximum column sum),
20883 normI denotes the infinity norm of a matrix (maximum row sum) and
20884 normF denotes the Frobenius norm of a matrix (square root of sum of
20885 squares). Note that max(abs(A(i,j))) is not a matrix norm.
20886
20887 Arguments
20888 =========
20889
20890 NORM (input) CHARACTER*1
20891 Specifies the value to be returned in SLANGE as described
20892 above.
20893
20894 M (input) INTEGER
20895 The number of rows of the matrix A. M >= 0. When M = 0,
20896 SLANGE is set to zero.
20897
20898 N (input) INTEGER
20899 The number of columns of the matrix A. N >= 0. When N = 0,
20900 SLANGE is set to zero.
20901
20902 A (input) REAL array, dimension (LDA,N)
20903 The m by n matrix A.
20904
20905 LDA (input) INTEGER
20906 The leading dimension of the array A. LDA >= max(M,1).
20907
20908 WORK (workspace) REAL array, dimension (LWORK),
20909 where LWORK >= M when NORM = 'I'; otherwise, WORK is not
20910 referenced.
20911
20912 =====================================================================
20913
20914
20915 Parameter adjustments */
20916 /* Table of constant values */
20917 static integer c__1 = 1;
20918
20919 /* System generated locals */
20920 integer a_dim1, a_offset, i__1, i__2;
20921 real ret_val, r__1, r__2, r__3;
20922 /* Builtin functions */
20923 //double sqrt(doublereal);
20924 /* Local variables */
20925 static integer i__, j;
20926 static real scale;
20927 extern logical lsame_(const char *, const char *);
20928 static real value;
20929 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
20930 real *);
20931 static real sum;
20932 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
20933
20934
20935 a_dim1 = *lda;
20936 a_offset = 1 + a_dim1 * 1;
20937 a -= a_offset;
20938 --work;
20939
20940 /* Function Body */
20941 if (f2cmin(*m,*n) == 0) {
20942 value = 0.f;
20943 } else if (lsame_(norm, "M")) {
20944
20945 /* Find max(abs(A(i,j))). */
20946
20947 value = 0.f;
20948 i__1 = *n;
20949 for (j = 1; j <= i__1; ++j) {
20950 i__2 = *m;
20951 for (i__ = 1; i__ <= i__2; ++i__) {
20952 /* Computing MAX */
20953 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1));
20954 value = df2cmax(r__2,r__3);
20955 /* L10: */
20956 }
20957 /* L20: */
20958 }
20959 } else if (lsame_(norm, "O") || *(unsigned char *)
20960 norm == '1') {
20961
20962 /* Find norm1(A). */
20963
20964 value = 0.f;
20965 i__1 = *n;
20966 for (j = 1; j <= i__1; ++j) {
20967 sum = 0.f;
20968 i__2 = *m;
20969 for (i__ = 1; i__ <= i__2; ++i__) {
20970 sum += (r__1 = a_ref(i__, j), dabs(r__1));
20971 /* L30: */
20972 }
20973 value = df2cmax(value,sum);
20974 /* L40: */
20975 }
20976 } else if (lsame_(norm, "I")) {
20977
20978 /* Find normI(A). */
20979
20980 i__1 = *m;
20981 for (i__ = 1; i__ <= i__1; ++i__) {
20982 work[i__] = 0.f;
20983 /* L50: */
20984 }
20985 i__1 = *n;
20986 for (j = 1; j <= i__1; ++j) {
20987 i__2 = *m;
20988 for (i__ = 1; i__ <= i__2; ++i__) {
20989 work[i__] += (r__1 = a_ref(i__, j), dabs(r__1));
20990 /* L60: */
20991 }
20992 /* L70: */
20993 }
20994 value = 0.f;
20995 i__1 = *m;
20996 for (i__ = 1; i__ <= i__1; ++i__) {
20997 /* Computing MAX */
20998 r__1 = value, r__2 = work[i__];
20999 value = df2cmax(r__1,r__2);
21000 /* L80: */
21001 }
21002 } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
21003
21004 /* Find normF(A). */
21005
21006 scale = 0.f;
21007 sum = 1.f;
21008 i__1 = *n;
21009 for (j = 1; j <= i__1; ++j) {
21010 slassq_(m, &a_ref(1, j), &c__1, &scale, &sum);
21011 /* L90: */
21012 }
21013 value = scale * sqrt(sum);
21014 }
21015
21016 ret_val = value;
21017 return ret_val;
21018
21019 /* End of SLANGE */
21020
21021 } /* slange_ */
|
|
||||||||||||||||||||
|
Definition at line 3253 of file lapackblas.cpp. References c__1, dabs, df2cmax, doublereal, integer, logical, lsame_(), norm(), real, slassq_(), and sqrt(). Referenced by sstedc_(), ssteqr_(), ssterf_(), and sstevd_(). 03254 {
03255 /* -- LAPACK auxiliary routine (version 3.0) --
03256 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
03257 Courant Institute, Argonne National Lab, and Rice University
03258 February 29, 1992
03259
03260
03261 Purpose
03262 =======
03263
03264 SLANST returns the value of the one norm, or the Frobenius norm, or
03265 the infinity norm, or the element of largest absolute value of a
03266 real symmetric tridiagonal matrix A.
03267
03268 Description
03269 ===========
03270
03271 SLANST returns the value
03272
03273 SLANST = ( f2cmax(abs(A(i,j))), NORM = 'M' or 'm'
03274 (
03275 ( norm1(A), NORM = '1', 'O' or 'o'
03276 (
03277 ( normI(A), NORM = 'I' or 'i'
03278 (
03279 ( normF(A), NORM = 'F', 'f', 'E' or 'e'
03280
03281 where norm1 denotes the one norm of a matrix (maximum column sum),
03282 normI denotes the infinity norm of a matrix (maximum row sum) and
03283 normF denotes the Frobenius norm of a matrix (square root of sum of
03284 squares). Note that f2cmax(abs(A(i,j))) is not a matrix norm.
03285
03286 Arguments
03287 =========
03288
03289 NORM (input) CHARACTER*1
03290 Specifies the value to be returned in SLANST as described
03291 above.
03292
03293 N (input) INTEGER
03294 The order of the matrix A. N >= 0. When N = 0, SLANST is
03295 set to zero.
03296
03297 D (input) REAL array, dimension (N)
03298 The diagonal elements of A.
03299
03300 E (input) REAL array, dimension (N-1)
03301 The (n-1) sub-diagonal or super-diagonal elements of A.
03302
03303 =====================================================================
03304
03305
03306 Parameter adjustments */
03307 /* Table of constant values */
03308 static integer c__1 = 1;
03309
03310 /* System generated locals */
03311 integer i__1;
03312 real ret_val, r__1, r__2, r__3, r__4, r__5;
03313 /* Builtin functions */
03314 // double sqrt(doublereal);
03315 /* Local variables */
03316 static integer i__;
03317 static real scale;
03318 extern logical lsame_(const char *, const char *);
03319 static real anorm;
03320 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
03321 real *);
03322 static real sum;
03323
03324
03325 --e;
03326 --d__;
03327
03328 /* Function Body */
03329 if (*n <= 0) {
03330 anorm = 0.f;
03331 } else if (lsame_(norm, "M")) {
03332
03333 /* Find f2cmax(abs(A(i,j))). */
03334
03335 anorm = (r__1 = d__[*n], dabs(r__1));
03336 i__1 = *n - 1;
03337 for (i__ = 1; i__ <= i__1; ++i__) {
03338 /* Computing MAX */
03339 r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1));
03340 anorm = df2cmax(r__2,r__3);
03341 /* Computing MAX */
03342 r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1));
03343 anorm = df2cmax(r__2,r__3);
03344 /* L10: */
03345 }
03346 } else if (lsame_(norm, "O") || *(unsigned char *)
03347 norm == '1' || lsame_(norm, "I")) {
03348
03349 /* Find norm1(A). */
03350
03351 if (*n == 1) {
03352 anorm = dabs(d__[1]);
03353 } else {
03354 /* Computing MAX */
03355 r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs(
03356 r__1)) + (r__2 = d__[*n], dabs(r__2));
03357 anorm = df2cmax(r__3,r__4);
03358 i__1 = *n - 1;
03359 for (i__ = 2; i__ <= i__1; ++i__) {
03360 /* Computing MAX */
03361 r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 =
03362 e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3));
03363 anorm = df2cmax(r__4,r__5);
03364 /* L20: */
03365 }
03366 }
03367 } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
03368
03369 /* Find normF(A). */
03370
03371 scale = 0.f;
03372 sum = 1.f;
03373 if (*n > 1) {
03374 i__1 = *n - 1;
03375 slassq_(&i__1, &e[1], &c__1, &scale, &sum);
03376 sum *= 2;
03377 }
03378 slassq_(n, &d__[1], &c__1, &scale, &sum);
03379 anorm = scale * sqrt(sum);
03380 }
03381
03382 ret_val = anorm;
03383 return ret_val;
03384
03385 /* End of SLANST */
03386
03387 } /* slanst_ */
|
|
||||||||||||||||||||||||||||
|
Definition at line 3392 of file lapackblas.cpp. References a_ref, c__1, dabs, df2cmax, doublereal, integer, logical, lsame_(), norm(), real, slassq_(), and sqrt(). Referenced by ssyev_(). 03394 {
03395 /* -- LAPACK auxiliary routine (version 3.0) --
03396 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
03397 Courant Institute, Argonne National Lab, and Rice University
03398 October 31, 1992
03399
03400
03401 Purpose
03402 =======
03403
03404 SLANSY returns the value of the one norm, or the Frobenius norm, or
03405 the infinity norm, or the element of largest absolute value of a
03406 real symmetric matrix A.
03407
03408 Description
03409 ===========
03410
03411 SLANSY returns the value
03412
03413 SLANSY = ( f2cmax(abs(A(i,j))), NORM = 'M' or 'm'
03414 (
03415 ( norm1(A), NORM = '1', 'O' or 'o'
03416 (
03417 ( normI(A), NORM = 'I' or 'i'
03418 (
03419 ( normF(A), NORM = 'F', 'f', 'E' or 'e'
03420
03421 where norm1 denotes the one norm of a matrix (maximum column sum),
03422 normI denotes the infinity norm of a matrix (maximum row sum) and
03423 normF denotes the Frobenius norm of a matrix (square root of sum of
03424 squares). Note that f2cmax(abs(A(i,j))) is not a matrix norm.
03425
03426 Arguments
03427 =========
03428
03429 NORM (input) CHARACTER*1
03430 Specifies the value to be returned in SLANSY as described
03431 above.
03432
03433 UPLO (input) CHARACTER*1
03434 Specifies whether the upper or lower triangular part of the
03435 symmetric matrix A is to be referenced.
03436 = 'U': Upper triangular part of A is referenced
03437 = 'L': Lower triangular part of A is referenced
03438
03439 N (input) INTEGER
03440 The order of the matrix A. N >= 0. When N = 0, SLANSY is
03441 set to zero.
03442
03443 A (input) REAL array, dimension (LDA,N)
03444 The symmetric matrix A. If UPLO = 'U', the leading n by n
03445 upper triangular part of A contains the upper triangular part
03446 of the matrix A, and the strictly lower triangular part of A
03447 is not referenced. If UPLO = 'L', the leading n by n lower
03448 triangular part of A contains the lower triangular part of
03449 the matrix A, and the strictly upper triangular part of A is
03450 not referenced.
03451
03452 LDA (input) INTEGER
03453 The leading dimension of the array A. LDA >= f2cmax(N,1).
03454
03455 WORK (workspace) REAL array, dimension (LWORK),
03456 where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
03457 WORK is not referenced.
03458
03459 =====================================================================
03460
03461
03462 Parameter adjustments */
03463 /* Table of constant values */
03464 static integer c__1 = 1;
03465
03466 /* System generated locals */
03467 integer a_dim1, a_offset, i__1, i__2;
03468 real ret_val, r__1, r__2, r__3;
03469 /* Builtin functions */
03470 // double sqrt(doublereal);
03471 /* Local variables */
03472 static real absa;
03473 static integer i__, j;
03474 static real scale;
03475 extern logical lsame_(const char *, const char *);
03476 static real value;
03477 extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
03478 real *);
03479 static real sum;
03480 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
03481
03482
03483 a_dim1 = *lda;
03484 a_offset = 1 + a_dim1 * 1;
03485 a -= a_offset;
03486 --work;
03487
03488 /* Function Body */
03489 if (*n == 0) {
03490 value = 0.f;
03491 } else if (lsame_(norm, "M")) {
03492
03493 /* Find f2cmax(abs(A(i,j))). */
03494
03495 value = 0.f;
03496 if (lsame_(uplo, "U")) {
03497 i__1 = *n;
03498 for (j = 1; j <= i__1; ++j) {
03499 i__2 = j;
03500 for (i__ = 1; i__ <= i__2; ++i__) {
03501 /* Computing MAX */
03502 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1));
03503 value = df2cmax(r__2,r__3);
03504 /* L10: */
03505 }
03506 /* L20: */
03507 }
03508 } else {
03509 i__1 = *n;
03510 for (j = 1; j <= i__1; ++j) {
03511 i__2 = *n;
03512 for (i__ = j; i__ <= i__2; ++i__) {
03513 /* Computing MAX */
03514 r__2 = value, r__3 = (r__1 = a_ref(i__, j), dabs(r__1));
03515 value = df2cmax(r__2,r__3);
03516 /* L30: */
03517 }
03518 /* L40: */
03519 }
03520 }
03521 } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
03522
03523 /* Find normI(A) ( = norm1(A), since A is symmetric). */
03524
03525 value = 0.f;
03526 if (lsame_(uplo, "U")) {
03527 i__1 = *n;
03528 for (j = 1; j <= i__1; ++j) {
03529 sum = 0.f;
03530 i__2 = j - 1;
03531 for (i__ = 1; i__ <= i__2; ++i__) {
03532 absa = (r__1 = a_ref(i__, j), dabs(r__1));
03533 sum += absa;
03534 work[i__] += absa;
03535 /* L50: */
03536 }
03537 work[j] = sum + (r__1 = a_ref(j, j), dabs(r__1));
03538 /* L60: */
03539 }
03540 i__1 = *n;
03541 for (i__ = 1; i__ <= i__1; ++i__) {
03542 /* Computing MAX */
03543 r__1 = value, r__2 = work[i__];
03544 value = df2cmax(r__1,r__2);
03545 /* L70: */
03546 }
03547 } else {
03548 i__1 = *n;
03549 for (i__ = 1; i__ <= i__1; ++i__) {
03550 work[i__] = 0.f;
03551 /* L80: */
03552 }
03553 i__1 = *n;
03554 for (j = 1; j <= i__1; ++j) {
03555 sum = work[j] + (r__1 = a_ref(j, j), dabs(r__1));
03556 i__2 = *n;
03557 for (i__ = j + 1; i__ <= i__2; ++i__) {
03558 absa = (r__1 = a_ref(i__, j), dabs(r__1));
03559 sum += absa;
03560 work[i__] += absa;
03561 /* L90: */
03562 }
03563 value = df2cmax(value,sum);
03564 /* L100: */
03565 }
03566 }
03567 } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
03568
03569 /* Find normF(A). */
03570
03571 scale = 0.f;
03572 sum = 1.f;
03573 if (lsame_(uplo, "U")) {
03574 i__1 = *n;
03575 for (j = 2; j <= i__1; ++j) {
03576 i__2 = j - 1;
03577 slassq_(&i__2, &a_ref(1, j), &c__1, &scale, &sum);
03578 /* L110: */
03579 }
03580 } else {
03581 i__1 = *n - 1;
03582 for (j = 1; j <= i__1; ++j) {
03583 i__2 = *n - j;
03584 slassq_(&i__2, &a_ref(j + 1, j), &c__1, &scale, &sum);
03585 /* L120: */
03586 }
03587 }
03588 sum *= 2;
03589 i__1 = *lda + 1;
03590 slassq_(n, &a[a_offset], &i__1, &scale, &sum);
03591 value = scale * sqrt(sum);
03592 }
03593
03594 ret_val = value;
03595 return ret_val;
03596
03597 /* End of SLANSY */
03598
03599 } /* slansy_ */
|
|
||||||||||||
|
Definition at line 3607 of file lapackblas.cpp. References dabs, df2cmax, df2cmin, doublereal, real, sqrt(), x, and y. Referenced by slaed2_(), slaed8_(), slarfg_(), ssteqr_(), and ssterf_(). 03608 {
03609 /* -- LAPACK auxiliary routine (version 3.0) --
03610 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
03611 Courant Institute, Argonne National Lab, and Rice University
03612 October 31, 1992
03613
03614
03615 Purpose
03616 =======
03617
03618 SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
03619 overflow.
03620
03621 Arguments
03622 =========
03623
03624 X (input) REAL
03625 Y (input) REAL
03626 X and Y specify the values x and y.
03627
03628 ===================================================================== */
03629 /* System generated locals */
03630 real ret_val, r__1;
03631 /* Builtin functions */
03632 // double sqrt(doublereal);
03633 /* Local variables */
03634 static real xabs, yabs, w, z__;
03635
03636
03637
03638 xabs = dabs(*x);
03639 yabs = dabs(*y);
03640 w = df2cmax(xabs,yabs);
03641 z__ = df2cmin(xabs,yabs);
03642 if (z__ == 0.f) {
03643 ret_val = w;
03644 } else {
03645 /* Computing 2nd power */
03646 r__1 = z__ / w;
03647 ret_val = w * sqrt(r__1 * r__1 + 1.f);
03648 }
03649 return ret_val;
03650
03651 /* End of SLAPY2 */
03652
03653 } /* slapy2_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 4362 of file lapackblas.cpp. References c__1, integer, logical, lsame_(), real, sgemv_(), sger_(), and v. Referenced by sgebd2_(), sgelq2_(), sgeqr2_(), sorg2l_(), sorg2r_(), sorgl2_(), sorm2r_(), and sorml2_(). 04364 {
04365 /* -- LAPACK auxiliary routine (version 3.0) --
04366 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
04367 Courant Institute, Argonne National Lab, and Rice University
04368 February 29, 1992
04369
04370
04371 Purpose
04372 =======
04373
04374 SLARF applies a real elementary reflector H to a real m by n matrix
04375 C, from either the left or the right. H is represented in the form
04376
04377 H = I - tau * v * v'
04378
04379 where tau is a real scalar and v is a real vector.
04380
04381 If tau = 0, then H is taken to be the unit matrix.
04382
04383 Arguments
04384 =========
04385
04386 SIDE (input) CHARACTER*1
04387 = 'L': form H * C
04388 = 'R': form C * H
04389
04390 M (input) INTEGER
04391 The number of rows of the matrix C.
04392
04393 N (input) INTEGER
04394 The number of columns of the matrix C.
04395
04396 V (input) REAL array, dimension
04397 (1 + (M-1)*abs(INCV)) if SIDE = 'L'
04398 or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
04399 The vector v in the representation of H. V is not used if
04400 TAU = 0.
04401
04402 INCV (input) INTEGER
04403 The increment between elements of v. INCV <> 0.
04404
04405 TAU (input) REAL
04406 The value tau in the representation of H.
04407
04408 C (input/output) REAL array, dimension (LDC,N)
04409 On entry, the m by n matrix C.
04410 On exit, C is overwritten by the matrix H * C if SIDE = 'L',
04411 or C * H if SIDE = 'R'.
04412
04413 LDC (input) INTEGER
04414 The leading dimension of the array C. LDC >= f2cmax(1,M).
04415
04416 WORK (workspace) REAL array, dimension
04417 (N) if SIDE = 'L'
04418 or (M) if SIDE = 'R'
04419
04420 =====================================================================
04421
04422
04423 Parameter adjustments */
04424 /* Table of constant values */
04425 static real c_b4 = 1.f;
04426 static real c_b5 = 0.f;
04427 static integer c__1 = 1;
04428
04429 /* System generated locals */
04430 integer c_dim1, c_offset;
04431 real r__1;
04432 /* Local variables */
04433 extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
04434 integer *, real *, integer *, real *, integer *);
04435 extern logical lsame_(const char *, const char *);
04436 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *,
04437 real *, integer *, real *, integer *, real *, real *, integer *);
04438
04439
04440 --v;
04441 c_dim1 = *ldc;
04442 c_offset = 1 + c_dim1 * 1;
04443 c__ -= c_offset;
04444 --work;
04445
04446 /* Function Body */
04447 if (lsame_(side, "L")) {
04448
04449 /* Form H * C */
04450
04451 if (*tau != 0.f) {
04452
04453 /* w := C' * v */
04454
04455 sgemv_("Transpose", m, n, &c_b4, &c__[c_offset], ldc, &v[1], incv,
04456 &c_b5, &work[1], &c__1);
04457
04458 /* C := C - v * w' */
04459
04460 r__1 = -(*tau);
04461 sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
04462 ldc);
04463 }
04464 } else {
04465
04466 /* Form C * H */
04467
04468 if (*tau != 0.f) {
04469
04470 /* w := C * v */
04471
04472 sgemv_("No transpose", m, n, &c_b4, &c__[c_offset], ldc, &v[1],
04473 incv, &c_b5, &work[1], &c__1);
04474
04475 /* C := C - w * v' */
04476
04477 r__1 = -(*tau);
04478 sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
04479 ldc);
04480 }
04481 }
04482 return 0;
04483
04484 /* End of SLARF */
04485
04486 } /* slarf_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 3658 of file lapackblas.cpp. References c__1, c___ref, integer, logical, lsame_(), real, scopy_(), sgemm_(), strmm_(), t, v, v_ref, and work_ref. Referenced by sgelqf_(), sgeqrf_(), sorglq_(), sorgql_(), sorgqr_(), sormlq_(), and sormqr_(). 03662 {
03663 /* -- LAPACK auxiliary routine (version 3.0) --
03664 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
03665 Courant Institute, Argonne National Lab, and Rice University
03666 February 29, 1992
03667
03668
03669 Purpose
03670 =======
03671
03672 SLARFB applies a real block reflector H or its transpose H' to a
03673 real m by n matrix C, from either the left or the right.
03674
03675 Arguments
03676 =========
03677
03678 SIDE (input) CHARACTER*1
03679 = 'L': apply H or H' from the Left
03680 = 'R': apply H or H' from the Right
03681
03682 TRANS (input) CHARACTER*1
03683 = 'N': apply H (No transpose)
03684 = 'T': apply H' (Transpose)
03685
03686 DIRECT (input) CHARACTER*1
03687 Indicates how H is formed from a product of elementary
03688 reflectors
03689 = 'F': H = H(1) H(2) . . . H(k) (Forward)
03690 = 'B': H = H(k) . . . H(2) H(1) (Backward)
03691
03692 STOREV (input) CHARACTER*1
03693 Indicates how the vectors which define the elementary
03694 reflectors are stored:
03695 = 'C': Columnwise
03696 = 'R': Rowwise
03697
03698 M (input) INTEGER
03699 The number of rows of the matrix C.
03700
03701 N (input) INTEGER
03702 The number of columns of the matrix C.
03703
03704 K (input) INTEGER
03705 The order of the matrix T (= the number of elementary
03706 reflectors whose product defines the block reflector).
03707
03708 V (input) REAL array, dimension
03709 (LDV,K) if STOREV = 'C'
03710 (LDV,M) if STOREV = 'R' and SIDE = 'L'
03711 (LDV,N) if STOREV = 'R' and SIDE = 'R'
03712 The matrix V. See further details.
03713
03714 LDV (input) INTEGER
03715 The leading dimension of the array V.
03716 If STOREV = 'C' and SIDE = 'L', LDV >= f2cmax(1,M);
03717 if STOREV = 'C' and SIDE = 'R', LDV >= f2cmax(1,N);
03718 if STOREV = 'R', LDV >= K.
03719
03720 T (input) REAL array, dimension (LDT,K)
03721 The triangular k by k matrix T in the representation of the
03722 block reflector.
03723
03724 LDT (input) INTEGER
03725 The leading dimension of the array T. LDT >= K.
03726
03727 C (input/output) REAL array, dimension (LDC,N)
03728 On entry, the m by n matrix C.
03729 On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
03730
03731 LDC (input) INTEGER
03732 The leading dimension of the array C. LDA >= f2cmax(1,M).
03733
03734 WORK (workspace) REAL array, dimension (LDWORK,K)
03735
03736 LDWORK (input) INTEGER
03737 The leading dimension of the array WORK.
03738 If SIDE = 'L', LDWORK >= f2cmax(1,N);
03739 if SIDE = 'R', LDWORK >= f2cmax(1,M).
03740
03741 =====================================================================
03742
03743
03744 Quick return if possible
03745
03746 Parameter adjustments */
03747 /* Table of constant values */
03748 static integer c__1 = 1;
03749 static real c_b14 = 1.f;
03750 static real c_b25 = -1.f;
03751
03752 /* System generated locals */
03753 integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
03754 work_offset, i__1, i__2;
03755 /* Local variables */
03756 static integer i__, j;
03757 extern logical lsame_(const char *, const char *);
03758 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
03759 integer *, real *, real *, integer *, real *, integer *, real *,
03760 real *, integer *), scopy_(integer *, real *,
03761 integer *, real *, integer *), strmm_(const char *, const char *, const char *,
03762 const char *, integer *, integer *, real *, real *, integer *, real *,
03763 integer *);
03764 static char transt[1];
03765 #define work_ref(a_1,a_2) work[(a_2)*work_dim1 + a_1]
03766 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
03767 #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1]
03768
03769
03770 v_dim1 = *ldv;
03771 v_offset = 1 + v_dim1 * 1;
03772 v -= v_offset;
03773 t_dim1 = *ldt;
03774 t_offset = 1 + t_dim1 * 1;
03775 t -= t_offset;
03776 c_dim1 = *ldc;
03777 c_offset = 1 + c_dim1 * 1;
03778 c__ -= c_offset;
03779 work_dim1 = *ldwork;
03780 work_offset = 1 + work_dim1 * 1;
03781 work -= work_offset;
03782
03783 /* Function Body */
03784 if (*m <= 0 || *n <= 0) {
03785 return 0;
03786 }
03787
03788 if (lsame_(trans, "N")) {
03789 *(unsigned char *)transt = 'T';
03790 } else {
03791 *(unsigned char *)transt = 'N';
03792 }
03793
03794 if (lsame_(storev, "C")) {
03795
03796 if (lsame_(direct, "F")) {
03797
03798 /* Let V = ( V1 ) (first K rows)
03799 ( V2 )
03800 where V1 is unit lower triangular. */
03801
03802 if (lsame_(side, "L")) {
03803
03804 /* Form H * C or H' * C where C = ( C1 )
03805 ( C2 )
03806
03807 W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
03808
03809 W := C1' */
03810
03811 i__1 = *k;
03812 for (j = 1; j <= i__1; ++j) {
03813 scopy_(n, &c___ref(j, 1), ldc, &work_ref(1, j), &c__1);
03814 /* L10: */
03815 }
03816
03817 /* W := W * V1 */
03818
03819 strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b14,
03820 &v[v_offset], ldv, &work[work_offset], ldwork);
03821 if (*m > *k) {
03822
03823 /* W := W + C2'*V2 */
03824
03825 i__1 = *m - *k;
03826 sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b14, &
03827 c___ref(*k + 1, 1), ldc, &v_ref(*k + 1, 1), ldv, &
03828 c_b14, &work[work_offset], ldwork);
03829 }
03830
03831 /* W := W * T' or W * T */
03832
03833 strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b14, &t[
03834 t_offset], ldt, &work[work_offset], ldwork);
03835
03836 /* C := C - V * W' */
03837
03838 if (*m > *k) {
03839
03840 /* C2 := C2 - V2 * W' */
03841
03842 i__1 = *m - *k;
03843 sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b25, &
03844 v_ref(*k + 1, 1), ldv, &work[work_offset], ldwork,
03845 &c_b14, &c___ref(*k + 1, 1), ldc);
03846 }
03847
03848 /* W := W * V1' */
03849
03850 strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b14, &
03851 v[v_offset], ldv, &work[work_offset], ldwork);
03852
03853 /* C1 := C1 - W' */
03854
03855 i__1 = *k;
03856 for (j = 1; j <= i__1; ++j) {
03857 i__2 = *n;
03858 for (i__ = 1; i__ <= i__2; ++i__) {
03859 c___ref(j, i__) = c___ref(j, i__) - work_ref(i__, j);
03860 /* L20: */
03861 }
03862 /* L30: */
03863 }
03864
03865 } else if (lsame_(side, "R")) {
03866
03867 /* Form C * H or C * H' where C = ( C1 C2 )
03868
03869 W := C * V = (C1*V1 + C2*V2) (stored in WORK)
03870
03871 W := C1 */
03872
03873 i__1 = *k;
03874 for (j = 1; j <= i__1; ++j) {
03875 scopy_(m, &c___ref(1, j), &c__1, &work_ref(1, j), &c__1);
03876 /* L40: */
03877 }
03878
03879 /* W := W * V1 */
03880
03881 strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b14,
03882 &v[v_offset], ldv, &work[work_offset], ldwork);
03883 if (*n > *k) {
03884
03885 /* W := W + C2 * V2 */
03886
03887 i__1 = *n - *k;
03888 sgemm_("No transpose", "No transpose", m, k, &i__1, &
03889 c_b14, &c___ref(1, *k + 1), ldc, &v_ref(*k + 1, 1)
03890 , ldv, &c_b14, &work[work_offset], ldwork);
03891 }
03892
03893 /* W := W * T or W * T' */
03894
03895 strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b14, &t[
03896 t_offset], ldt, &work[work_offset], ldwork);
03897
03898 /* C := C - W * V' */
03899
03900 if (*n > *k) {
03901
03902 /* C2 := C2 - W * V2' */
03903
03904 i__1 = *n - *k;
03905 sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b25, &
03906 work[work_offset], ldwork, &v_ref(*k + 1, 1), ldv,
03907 &c_b14, &c___ref(1, *k + 1), ldc);
03908 }
03909
03910 /* W := W * V1' */
03911
03912 strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b14, &
03913 v[v_offset], ldv, &work[work_offset], ldwork);
03914
03915 /* C1 := C1 - W */
03916
03917 i__1 = *k;
03918 for (j = 1; j <= i__1; ++j) {
03919 i__2 = *m;
03920 for (i__ = 1; i__ <= i__2; ++i__) {
03921 c___ref(i__, j) = c___ref(i__, j) - work_ref(i__, j);
03922 /* L50: */
03923 }
03924 /* L60: */
03925 }
03926 }
03927
03928 } else {
03929
03930 /* Let V = ( V1 )
03931 ( V2 ) (last K rows)
03932 where V2 is unit upper triangular. */
03933
03934 if (lsame_(side, "L")) {
03935
03936 /* Form H * C or H' * C where C = ( C1 )
03937 ( C2 )
03938
03939 W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
03940
03941 W := C2' */
03942
03943 i__1 = *k;
03944 for (j = 1; j <= i__1; ++j) {
03945 scopy_(n, &c___ref(*m - *k + j, 1), ldc, &work_ref(1, j),
03946 &c__1);
03947 /* L70: */
03948 }
03949
03950 /* W := W * V2 */
03951
03952 strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b14,
03953 &v_ref(*m - *k + 1, 1), ldv, &work[work_offset],
03954 ldwork);
03955 if (*m > *k) {
03956
03957 /* W := W + C1'*V1 */
03958
03959 i__1 = *m - *k;
03960 sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b14, &
03961 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
03962 work[work_offset], ldwork);
03963 }
03964
03965 /* W := W * T' or W * T */
03966
03967 strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b14, &t[
03968 t_offset], ldt, &work[work_offset], ldwork);
03969
03970 /* C := C - V * W' */
03971
03972 if (*m > *k) {
03973
03974 /* C1 := C1 - V1 * W' */
03975
03976 i__1 = *m - *k;
03977 sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b25, &
03978 v[v_offset], ldv, &work[work_offset], ldwork, &
03979 c_b14, &c__[c_offset], ldc)
03980 ;
03981 }
03982
03983 /* W := W * V2' */
03984
03985 strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b14, &
03986 v_ref(*m - *k + 1, 1), ldv, &work[work_offset],
03987 ldwork);
03988
03989 /* C2 := C2 - W' */
03990
03991 i__1 = *k;
03992 for (j = 1; j <= i__1; ++j) {
03993 i__2 = *n;
03994 for (i__ = 1; i__ <= i__2; ++i__) {
03995 c___ref(*m - *k + j, i__) = c___ref(*m - *k + j, i__)
03996 - work_ref(i__, j);
03997 /* L80: */
03998 }
03999 /* L90: */
04000 }
04001
04002 } else if (lsame_(side, "R")) {
04003
04004 /* Form C * H or C * H' where C = ( C1 C2 )
04005
04006 W := C * V = (C1*V1 + C2*V2) (stored in WORK)
04007
04008 W := C2 */
04009
04010 i__1 = *k;
04011 for (j = 1; j <= i__1; ++j) {
04012 scopy_(m, &c___ref(1, *n - *k + j), &c__1, &work_ref(1, j)
04013 , &c__1);
04014 /* L100: */
04015 }
04016
04017 /* W := W * V2 */
04018
04019 strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b14,
04020 &v_ref(*n - *k + 1, 1), ldv, &work[work_offset],
04021 ldwork);
04022 if (*n > *k) {
04023
04024 /* W := W + C1 * V1 */
04025
04026 i__1 = *n - *k;
04027 sgemm_("No transpose", "No transpose", m, k, &i__1, &
04028 c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
04029 c_b14, &work[work_offset], ldwork);
04030 }
04031
04032 /* W := W * T or W * T' */
04033
04034 strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b14, &t[
04035 t_offset], ldt, &work[work_offset], ldwork);
04036
04037 /* C := C - W * V' */
04038
04039 if (*n > *k) {
04040
04041 /* C1 := C1 - W * V1' */
04042
04043 i__1 = *n - *k;
04044 sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b25, &
04045 work[work_offset], ldwork, &v[v_offset], ldv, &
04046 c_b14, &c__[c_offset], ldc)
04047 ;
04048 }
04049
04050 /* W := W * V2' */
04051
04052 strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b14, &
04053 v_ref(*n - *k + 1, 1), ldv, &work[work_offset],
04054 ldwork);
04055
04056 /* C2 := C2 - W */
04057
04058 i__1 = *k;
04059 for (j = 1; j <= i__1; ++j) {
04060 i__2 = *m;
04061 for (i__ = 1; i__ <= i__2; ++i__) {
04062 c___ref(i__, *n - *k + j) = c___ref(i__, *n - *k + j)
04063 - work_ref(i__, j);
04064 /* L110: */
04065 }
04066 /* L120: */
04067 }
04068 }
04069 }
04070
04071 } else if (lsame_(storev, "R")) {
04072
04073 if (lsame_(direct, "F")) {
04074
04075 /* Let V = ( V1 V2 ) (V1: first K columns)
04076 where V1 is unit upper triangular. */
04077
04078 if (lsame_(side, "L")) {
04079
04080 /* Form H * C or H' * C where C = ( C1 )
04081 ( C2 )
04082
04083 W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
04084
04085 W := C1' */
04086
04087 i__1 = *k;
04088 for (j = 1; j <= i__1; ++j) {
04089 scopy_(n, &c___ref(j, 1), ldc, &work_ref(1, j), &c__1);
04090 /* L130: */
04091 }
04092
04093 /* W := W * V1' */
04094
04095 strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b14, &
04096 v[v_offset], ldv, &work[work_offset], ldwork);
04097 if (*m > *k) {
04098
04099 /* W := W + C2'*V2' */
04100
04101 i__1 = *m - *k;
04102 sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b14, &
04103 c___ref(*k + 1, 1), ldc, &v_ref(1, *k + 1), ldv, &
04104 c_b14, &work[work_offset], ldwork);
04105 }
04106
04107 /* W := W * T' or W * T */
04108
04109 strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b14, &t[
04110 t_offset], ldt, &work[work_offset], ldwork);
04111
04112 /* C := C - V' * W' */
04113
04114 if (*m > *k) {
04115
04116 /* C2 := C2 - V2' * W' */
04117
04118 i__1 = *m - *k;
04119 sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b25, &
04120 v_ref(1, *k + 1), ldv, &work[work_offset], ldwork,
04121 &c_b14, &c___ref(*k + 1, 1), ldc);
04122 }
04123
04124 /* W := W * V1 */
04125
04126 strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b14,
04127 &v[v_offset], ldv, &work[work_offset], ldwork);
04128
04129 /* C1 := C1 - W' */
04130
04131 i__1 = *k;
04132 for (j = 1; j <= i__1; ++j) {
04133 i__2 = *n;
04134 for (i__ = 1; i__ <= i__2; ++i__) {
04135 c___ref(j, i__) = c___ref(j, i__) - work_ref(i__, j);
04136 /* L140: */
04137 }
04138 /* L150: */
04139 }
04140
04141 } else if (lsame_(side, "R")) {
04142
04143 /* Form C * H or C * H' where C = ( C1 C2 )
04144
04145 W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
04146
04147 W := C1 */
04148
04149 i__1 = *k;
04150 for (j = 1; j <= i__1; ++j) {
04151 scopy_(m, &c___ref(1, j), &c__1, &work_ref(1, j), &c__1);
04152 /* L160: */
04153 }
04154
04155 /* W := W * V1' */
04156
04157 strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b14, &
04158 v[v_offset], ldv, &work[work_offset], ldwork);
04159 if (*n > *k) {
04160
04161 /* W := W + C2 * V2' */
04162
04163 i__1 = *n - *k;
04164 sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b14, &
04165 c___ref(1, *k + 1), ldc, &v_ref(1, *k + 1), ldv, &
04166 c_b14, &work[work_offset], ldwork);
04167 }
04168
04169 /* W := W * T or W * T' */
04170
04171 strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b14, &t[
04172 t_offset], ldt, &work[work_offset], ldwork);
04173
04174 /* C := C - W * V */
04175
04176 if (*n > *k) {
04177
04178 /* C2 := C2 - W * V2 */
04179
04180 i__1 = *n - *k;
04181 sgemm_("No transpose", "No transpose", m, &i__1, k, &
04182 c_b25, &work[work_offset], ldwork, &v_ref(1, *k +
04183 1), ldv, &c_b14, &c___ref(1, *k + 1), ldc);
04184 }
04185
04186 /* W := W * V1 */
04187
04188 strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b14,
04189 &v[v_offset], ldv, &work[work_offset], ldwork);
04190
04191 /* C1 := C1 - W */
04192
04193 i__1 = *k;
04194 for (j = 1; j <= i__1; ++j) {
04195 i__2 = *m;
04196 for (i__ = 1; i__ <= i__2; ++i__) {
04197 c___ref(i__, j) = c___ref(i__, j) - work_ref(i__, j);
04198 /* L170: */
04199 }
04200 /* L180: */
04201 }
04202
04203 }
04204
04205 } else {
04206
04207 /* Let V = ( V1 V2 ) (V2: last K columns)
04208 where V2 is unit lower triangular. */
04209
04210 if (lsame_(side, "L")) {
04211
04212 /* Form H * C or H' * C where C = ( C1 )
04213 ( C2 )
04214
04215 W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
04216
04217 W := C2' */
04218
04219 i__1 = *k;
04220 for (j = 1; j <= i__1; ++j) {
04221 scopy_(n, &c___ref(*m - *k + j, 1), ldc, &work_ref(1, j),
04222 &c__1);
04223 /* L190: */
04224 }
04225
04226 /* W := W * V2' */
04227
04228 strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b14, &
04229 v_ref(1, *m - *k + 1), ldv, &work[work_offset],
04230 ldwork);
04231 if (*m > *k) {
04232
04233 /* W := W + C1'*V1' */
04234
04235 i__1 = *m - *k;
04236 sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b14, &
04237 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
04238 work[work_offset], ldwork);
04239 }
04240
04241 /* W := W * T' or W * T */
04242
04243 strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b14, &t[
04244 t_offset], ldt, &work[work_offset], ldwork);
04245
04246 /* C := C - V' * W' */
04247
04248 if (*m > *k) {
04249
04250 /* C1 := C1 - V1' * W' */
04251
04252 i__1 = *m - *k;
04253 sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b25, &v[
04254 v_offset], ldv, &work[work_offset], ldwork, &
04255 c_b14, &c__[c_offset], ldc);
04256 }
04257
04258 /* W := W * V2 */
04259
04260 strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b14,
04261 &v_ref(1, *m - *k + 1), ldv, &work[work_offset],
04262 ldwork);
04263
04264 /* C2 := C2 - W' */
04265
04266 i__1 = *k;
04267 for (j = 1; j <= i__1; ++j) {
04268 i__2 = *n;
04269 for (i__ = 1; i__ <= i__2; ++i__) {
04270 c___ref(*m - *k + j, i__) = c___ref(*m - *k + j, i__)
04271 - work_ref(i__, j);
04272 /* L200: */
04273 }
04274 /* L210: */
04275 }
04276
04277 } else if (lsame_(side, "R")) {
04278
04279 /* Form C * H or C * H' where C = ( C1 C2 )
04280
04281 W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
04282
04283 W := C2 */
04284
04285 i__1 = *k;
04286 for (j = 1; j <= i__1; ++j) {
04287 scopy_(m, &c___ref(1, *n - *k + j), &c__1, &work_ref(1, j)
04288 , &c__1);
04289 /* L220: */
04290 }
04291
04292 /* W := W * V2' */
04293
04294 strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b14, &
04295 v_ref(1, *n - *k + 1), ldv, &work[work_offset],
04296 ldwork);
04297 if (*n > *k) {
04298
04299 /* W := W + C1 * V1' */
04300
04301 i__1 = *n - *k;
04302 sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b14, &
04303 c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
04304 work[work_offset], ldwork);
04305 }
04306
04307 /* W := W * T or W * T' */
04308
04309 strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b14, &t[
04310 t_offset], ldt, &work[work_offset], ldwork);
04311
04312 /* C := C - W * V */
04313
04314 if (*n > *k) {
04315
04316 /* C1 := C1 - W * V1 */
04317
04318 i__1 = *n - *k;
04319 sgemm_("No transpose", "No transpose", m, &i__1, k, &
04320 c_b25, &work[work_offset], ldwork, &v[v_offset],
04321 ldv, &c_b14, &c__[c_offset], ldc);
04322 }
04323
04324 /* W := W * V2 */
04325
04326 strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b14,
04327 &v_ref(1, *n - *k + 1), ldv, &work[work_offset],
04328 ldwork);
04329
04330 /* C1 := C1 - W */
04331
04332 i__1 = *k;
04333 for (j = 1; j <= i__1; ++j) {
04334 i__2 = *m;
04335 for (i__ = 1; i__ <= i__2; ++i__) {
04336 c___ref(i__, *n - *k + j) = c___ref(i__, *n - *k + j)
04337 - work_ref(i__, j);
04338 /* L230: */
04339 }
04340 /* L240: */
04341 }
04342
04343 }
04344
04345 }
04346 }
04347
04348 return 0;
04349
04350 /* End of SLARFB */
04351
04352 } /* slarfb_ */
|
|
||||||||||||||||||||||||
|
Definition at line 4491 of file lapackblas.cpp. References dabs, doublereal, integer, r_sign(), real, slamch_(), slapy2_(), snrm2_(), sscal_(), and x. Referenced by sgebd2_(), sgelq2_(), sgeqr2_(), slabrd_(), slatrd_(), and ssytd2_(). 04493 {
04494 /* -- LAPACK auxiliary routine (version 3.0) --
04495 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
04496 Courant Institute, Argonne National Lab, and Rice University
04497 September 30, 1994
04498
04499
04500 Purpose
04501 =======
04502
04503 SLARFG generates a real elementary reflector H of order n, such
04504 that
04505
04506 H * ( alpha ) = ( beta ), H' * H = I.
04507 ( x ) ( 0 )
04508
04509 where alpha and beta are scalars, and x is an (n-1)-element real
04510 vector. H is represented in the form
04511
04512 H = I - tau * ( 1 ) * ( 1 v' ) ,
04513 ( v )
04514
04515 where tau is a real scalar and v is a real (n-1)-element
04516 vector.
04517
04518 If the elements of x are all zero, then tau = 0 and H is taken to be
04519 the unit matrix.
04520
04521 Otherwise 1 <= tau <= 2.
04522
04523 Arguments
04524 =========
04525
04526 N (input) INTEGER
04527 The order of the elementary reflector.
04528
04529 ALPHA (input/output) REAL
04530 On entry, the value alpha.
04531 On exit, it is overwritten with the value beta.
04532
04533 X (input/output) REAL array, dimension
04534 (1+(N-2)*abs(INCX))
04535 On entry, the vector x.
04536 On exit, it is overwritten with the vector v.
04537
04538 INCX (input) INTEGER
04539 The increment between elements of X. INCX > 0.
04540
04541 TAU (output) REAL
04542 The value tau.
04543
04544 =====================================================================
04545
04546
04547 Parameter adjustments */
04548 /* System generated locals */
04549 integer i__1;
04550 real r__1;
04551 /* Builtin functions */
04552 double r_sign(real *, real *);
04553 /* Local variables */
04554 static real beta;
04555 extern doublereal snrm2_(integer *, real *, integer *);
04556 static integer j;
04557 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
04558 static real xnorm;
04559 extern doublereal slapy2_(real *, real *), slamch_(const char *);
04560 static real safmin, rsafmn;
04561 static integer knt;
04562
04563 --x;
04564
04565 /* Function Body */
04566 if (*n <= 1) {
04567 *tau = 0.f;
04568 return 0;
04569 }
04570
04571 i__1 = *n - 1;
04572 xnorm = snrm2_(&i__1, &x[1], incx);
04573
04574 if (xnorm == 0.f) {
04575
04576 /* H = I */
04577
04578 *tau = 0.f;
04579 } else {
04580
04581 /* general case */
04582
04583 r__1 = slapy2_(alpha, &xnorm);
04584 beta = -r_sign(&r__1, alpha);
04585 safmin = slamch_("S") / slamch_("E");
04586 if (dabs(beta) < safmin) {
04587
04588 /* XNORM, BETA may be inaccurate; scale X and recompute them */
04589
04590 rsafmn = 1.f / safmin;
04591 knt = 0;
04592 L10:
04593 ++knt;
04594 i__1 = *n - 1;
04595 sscal_(&i__1, &rsafmn, &x[1], incx);
04596 beta *= rsafmn;
04597 *alpha *= rsafmn;
04598 if (dabs(beta) < safmin) {
04599 goto L10;
04600 }
04601
04602 /* New BETA is at most 1, at least SAFMIN */
04603
04604 i__1 = *n - 1;
04605 xnorm = snrm2_(&i__1, &x[1], incx);
04606 r__1 = slapy2_(alpha, &xnorm);
04607 beta = -r_sign(&r__1, alpha);
04608 *tau = (beta - *alpha) / beta;
04609 i__1 = *n - 1;
04610 r__1 = 1.f / (*alpha - beta);
04611 sscal_(&i__1, &r__1, &x[1], incx);
04612
04613 /* If ALPHA is subnormal, it may lose relative accuracy */
04614
04615 *alpha = beta;
04616 i__1 = knt;
04617 for (j = 1; j <= i__1; ++j) {
04618 *alpha *= safmin;
04619 /* L20: */
04620 }
04621 } else {
04622 *tau = (beta - *alpha) / beta;
04623 i__1 = *n - 1;
04624 r__1 = 1.f / (*alpha - beta);
04625 sscal_(&i__1, &r__1, &x[1], incx);
04626 *alpha = beta;
04627 }
04628 }
04629
04630 return 0;
04631
04632 /* End of SLARFG */
04633
04634 } /* slarfg_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 4639 of file lapackblas.cpp. References c__1, integer, logical, lsame_(), real, sgemv_(), strmv_(), t, t_ref, v, and v_ref. Referenced by sgelqf_(), sgeqrf_(), sorglq_(), sorgql_(), sorgqr_(), sormlq_(), and sormqr_(). 04641 {
04642 /* -- LAPACK auxiliary routine (version 3.0) --
04643 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
04644 Courant Institute, Argonne National Lab, and Rice University
04645 February 29, 1992
04646
04647
04648 Purpose
04649 =======
04650
04651 SLARFT forms the triangular factor T of a real block reflector H
04652 of order n, which is defined as a product of k elementary reflectors.
04653
04654 If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
04655
04656 If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
04657
04658 If STOREV = 'C', the vector which defines the elementary reflector
04659 H(i) is stored in the i-th column of the array V, and
04660
04661 H = I - V * T * V'
04662
04663 If STOREV = 'R', the vector which defines the elementary reflector
04664 H(i) is stored in the i-th row of the array V, and
04665
04666 H = I - V' * T * V
04667
04668 Arguments
04669 =========
04670
04671 DIRECT (input) CHARACTER*1
04672 Specifies the order in which the elementary reflectors are
04673 multiplied to form the block reflector:
04674 = 'F': H = H(1) H(2) . . . H(k) (Forward)
04675 = 'B': H = H(k) . . . H(2) H(1) (Backward)
04676
04677 STOREV (input) CHARACTER*1
04678 Specifies how the vectors which define the elementary
04679 reflectors are stored (see also Further Details):
04680 = 'C': columnwise
04681 = 'R': rowwise
04682
04683 N (input) INTEGER
04684 The order of the block reflector H. N >= 0.
04685
04686 K (input) INTEGER
04687 The order of the triangular factor T (= the number of
04688 elementary reflectors). K >= 1.
04689
04690 V (input/output) REAL array, dimension
04691 (LDV,K) if STOREV = 'C'
04692 (LDV,N) if STOREV = 'R'
04693 The matrix V. See further details.
04694
04695 LDV (input) INTEGER
04696 The leading dimension of the array V.
04697 If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K.
04698
04699 TAU (input) REAL array, dimension (K)
04700 TAU(i) must contain the scalar factor of the elementary
04701 reflector H(i).
04702
04703 T (output) REAL array, dimension (LDT,K)
04704 The k by k triangular factor T of the block reflector.
04705 If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
04706 lower triangular. The rest of the array is not used.
04707
04708 LDT (input) INTEGER
04709 The leading dimension of the array T. LDT >= K.
04710
04711 Further Details
04712 ===============
04713
04714 The shape of the matrix V and the storage of the vectors which define
04715 the H(i) is best illustrated by the following example with n = 5 and
04716 k = 3. The elements equal to 1 are not stored; the corresponding
04717 array elements are modified but restored on exit. The rest of the
04718 array is not used.
04719
04720 DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
04721
04722 V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
04723 ( v1 1 ) ( 1 v2 v2 v2 )
04724 ( v1 v2 1 ) ( 1 v3 v3 )
04725 ( v1 v2 v3 )
04726 ( v1 v2 v3 )
04727
04728 DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
04729
04730 V = ( v1 v2 v3 ) V = ( v1 v1 1 )
04731 ( v1 v2 v3 ) ( v2 v2 v2 1 )
04732 ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
04733 ( 1 v3 )
04734 ( 1 )
04735
04736 =====================================================================
04737
04738
04739 Quick return if possible
04740
04741 Parameter adjustments */
04742 /* Table of constant values */
04743 static integer c__1 = 1;
04744 static real c_b8 = 0.f;
04745
04746 /* System generated locals */
04747 integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
04748 real r__1;
04749 /* Local variables */
04750 static integer i__, j;
04751 extern logical lsame_(const char *, const char *);
04752 extern /* Subroutine */ int sgemv_(const char *, integer *, integer *, real *,
04753 real *, integer *, real *, integer *, real *, real *, integer *), strmv_(const char *, const char *, const char *, integer *, real *,
04754 integer *, real *, integer *);
04755 static real vii;
04756 #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
04757 #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1]
04758
04759
04760 v_dim1 = *ldv;
04761 v_offset = 1 + v_dim1 * 1;
04762 v -= v_offset;
04763 --tau;
04764 t_dim1 = *ldt;
04765 t_offset = 1 + t_dim1 * 1;
04766 t -= t_offset;
04767
04768 /* Function Body */
04769 if (*n == 0) {
04770 return 0;
04771 }
04772
04773 if (lsame_(direct, "F")) {
04774 i__1 = *k;
04775 for (i__ = 1; i__ <= i__1; ++i__) {
04776 if (tau[i__] == 0.f) {
04777
04778 /* H(i) = I */
04779
04780 i__2 = i__;
04781 for (j = 1; j <= i__2; ++j) {
04782 t_ref(j, i__) = 0.f;
04783 /* L10: */
04784 }
04785 } else {
04786
04787 /* general case */
04788
04789 vii = v_ref(i__, i__);
04790 v_ref(i__, i__) = 1.f;
04791 if (lsame_(storev, "C")) {
04792
04793 /* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
04794
04795 i__2 = *n - i__ + 1;
04796 i__3 = i__ - 1;
04797 r__1 = -tau[i__];
04798 sgemv_("Transpose", &i__2, &i__3, &r__1, &v_ref(i__, 1),
04799 ldv, &v_ref(i__, i__), &c__1, &c_b8, &t_ref(1,
04800 i__), &c__1);
04801 } else {
04802
04803 /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
04804
04805 i__2 = i__ - 1;
04806 i__3 = *n - i__ + 1;
04807 r__1 = -tau[i__];
04808 sgemv_("No transpose", &i__2, &i__3, &r__1, &v_ref(1, i__)
04809 , ldv, &v_ref(i__, i__), ldv, &c_b8, &t_ref(1,
04810 i__), &c__1);
04811 }
04812 v_ref(i__, i__) = vii;
04813
04814 /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
04815
04816 i__2 = i__ - 1;
04817 strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
04818 t_offset], ldt, &t_ref(1, i__), &c__1);
04819 t_ref(i__, i__) = tau[i__];
04820 }
04821 /* L20: */
04822 }
04823 } else {
04824 for (i__ = *k; i__ >= 1; --i__) {
04825 if (tau[i__] == 0.f) {
04826
04827 /* H(i) = I */
04828
04829 i__1 = *k;
04830 for (j = i__; j <= i__1; ++j) {
04831 t_ref(j, i__) = 0.f;
04832 /* L30: */
04833 }
04834 } else {
04835
04836 /* general case */
04837
04838 if (i__ < *k) {
04839 if (lsame_(storev, "C")) {
04840 vii = v_ref(*n - *k + i__, i__);
04841 v_ref(*n - *k + i__, i__) = 1.f;
04842
04843 /* T(i+1:k,i) :=
04844 - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) */
04845
04846 i__1 = *n - *k + i__;
04847 i__2 = *k - i__;
04848 r__1 = -tau[i__];
04849 sgemv_("Transpose", &i__1, &i__2, &r__1, &v_ref(1,
04850 i__ + 1), ldv, &v_ref(1, i__), &c__1, &c_b8, &
04851 t_ref(i__ + 1, i__), &c__1);
04852 v_ref(*n - *k + i__, i__) = vii;
04853 } else {
04854 vii = v_ref(i__, *n - *k + i__);
04855 v_ref(i__, *n - *k + i__) = 1.f;
04856
04857 /* T(i+1:k,i) :=
04858 - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' */
04859
04860 i__1 = *k - i__;
04861 i__2 = *n - *k + i__;
04862 r__1 = -tau[i__];
04863 sgemv_("No transpose", &i__1, &i__2, &r__1, &v_ref(
04864 i__ + 1, 1), ldv, &v_ref(i__, 1), ldv, &c_b8,
04865 &t_ref(i__ + 1, i__), &c__1);
04866 v_ref(i__, *n - *k + i__) = vii;
04867 }
04868
04869 /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
04870
04871 i__1 = *k - i__;
04872 strmv_("Lower", "No transpose", "Non-unit", &i__1, &t_ref(
04873 i__ + 1, i__ + 1), ldt, &t_ref(i__ + 1, i__), &
04874 c__1);
04875 }
04876 t_ref(i__, i__) = tau[i__];
04877 }
04878 /* L40: */
04879 }
04880 }
04881 return 0;
04882
04883 /* End of SLARFT */
04884
04885 } /* slarft_ */
|
|
||||||||||||||||||||||||
|
Definition at line 4894 of file lapackblas.cpp. References dabs, df2cmax, doublereal, integer, log(), logical, pow_ri(), real, slamch_(), and sqrt(). Referenced by sbdsqr_(), and ssteqr_(). 04895 {
04896 /* -- LAPACK auxiliary routine (version 3.0) --
04897 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
04898 Courant Institute, Argonne National Lab, and Rice University
04899 September 30, 1994
04900
04901
04902 Purpose
04903 =======
04904
04905 SLARTG generate a plane rotation so that
04906
04907 [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1.
04908 [ -SN CS ] [ G ] [ 0 ]
04909
04910 This is a slower, more accurate version of the BLAS1 routine SROTG,
04911 with the following other differences:
04912 F and G are unchanged on return.
04913 If G=0, then CS=1 and SN=0.
04914 If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
04915 floating point operations (saves work in SBDSQR when
04916 there are zeros on the diagonal).
04917
04918 If F exceeds G in magnitude, CS will be positive.
04919
04920 Arguments
04921 =========
04922
04923 F (input) REAL
04924 The first component of vector to be rotated.
04925
04926 G (input) REAL
04927 The second component of vector to be rotated.
04928
04929 CS (output) REAL
04930 The cosine of the rotation.
04931
04932 SN (output) REAL
04933 The sine of the rotation.
04934
04935 R (output) REAL
04936 The nonzero component of the rotated vector.
04937
04938 ===================================================================== */
04939 /* Initialized data */
04940 static logical first = TRUE_;
04941 /* System generated locals */
04942 integer i__1;
04943 real r__1, r__2;
04944 /* Builtin functions */
04945 // double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
04946 double pow_ri(real *, integer *);
04947 /* Local variables */
04948 static integer i__;
04949 static real scale;
04950 static integer count;
04951 static real f1, g1, safmn2, safmx2;
04952 extern doublereal slamch_(const char *);
04953 static real safmin, eps;
04954
04955
04956
04957 if (first) {
04958 first = FALSE_;
04959 safmin = slamch_("S");
04960 eps = slamch_("E");
04961 r__1 = slamch_("B");
04962 i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) /
04963 2.f);
04964 safmn2 = pow_ri(&r__1, &i__1);
04965 safmx2 = 1.f / safmn2;
04966 }
04967 if (*g == 0.f) {
04968 *cs = 1.f;
04969 *sn = 0.f;
04970 *r__ = *f;
04971 } else if (*f == 0.f) {
04972 *cs = 0.f;
04973 *sn = 1.f;
04974 *r__ = *g;
04975 } else {
04976 f1 = *f;
04977 g1 = *g;
04978 /* Computing MAX */
04979 r__1 = dabs(f1), r__2 = dabs(g1);
04980 scale = df2cmax(r__1,r__2);
04981 if (scale >= safmx2) {
04982 count = 0;
04983 L10:
04984 ++count;
04985 f1 *= safmn2;
04986 g1 *= safmn2;
04987 /* Computing MAX */
04988 r__1 = dabs(f1), r__2 = dabs(g1);
04989 scale = df2cmax(r__1,r__2);
04990 if (scale >= safmx2) {
04991 goto L10;
04992 }
04993 /* Computing 2nd power */
04994 r__1 = f1;
04995 /* Computing 2nd power */
04996 r__2 = g1;
04997 *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
04998 *cs = f1 / *r__;
04999 *sn = g1 / *r__;
05000 i__1 = count;
05001 for (i__ = 1; i__ <= i__1; ++i__) {
05002 *r__ *= safmx2;
05003 /* L20: */
05004 }
05005 } else if (scale <= safmn2) {
05006 count = 0;
05007 L30:
05008 ++count;
05009 f1 *= safmx2;
05010 g1 *= safmx2;
05011 /* Computing MAX */
05012 r__1 = dabs(f1), r__2 = dabs(g1);
05013 scale = df2cmax(r__1,r__2);
05014 if (scale <= safmn2) {
05015 goto L30;
05016 }
05017 /* Computing 2nd power */
05018 r__1 = f1;
05019 /* Computing 2nd power */
05020 r__2 = g1;
05021 *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
05022 *cs = f1 / *r__;
05023 *sn = g1 / *r__;
05024 i__1 = count;
05025 for (i__ = 1; i__ <= i__1; ++i__) {
05026 *r__ *= safmn2;
05027 /* L40: */
05028 }
05029 } else {
05030 /* Computing 2nd power */
05031 r__1 = f1;
05032 /* Computing 2nd power */
05033 r__2 = g1;
05034 *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
05035 *cs = f1 / *r__;
05036 *sn = g1 / *r__;
05037 }
05038 if (dabs(*f) > dabs(*g) && *cs < 0.f) {
05039 *cs = -(*cs);
05040 *sn = -(*sn);
05041 *r__ = -(*r__);
05042 }
05043 }
05044 return 0;
05045
05046 /* End of SLARTG */
05047
05048 } /* slartg_ */
|
|
||||||||||||||||||||||||
|
Definition at line 27334 of file lapackblas.cpp. References dabs, df2cmax, df2cmin, real, and sqrt(). Referenced by sbdsqr_(), and slasq1_(). 27336 {
27337 /* -- LAPACK auxiliary routine (version 3.0) --
27338 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
27339 Courant Institute, Argonne National Lab, and Rice University
27340 September 30, 1994
27341
27342
27343 Purpose
27344 =======
27345
27346 SLAS2 computes the singular values of the 2-by-2 matrix
27347 [ F G ]
27348 [ 0 H ].
27349 On return, SSMIN is the smaller singular value and SSMAX is the
27350 larger singular value.
27351
27352 Arguments
27353 =========
27354
27355 F (input) REAL
27356 The (1,1) element of the 2-by-2 matrix.
27357
27358 G (input) REAL
27359 The (1,2) element of the 2-by-2 matrix.
27360
27361 H (input) REAL
27362 The (2,2) element of the 2-by-2 matrix.
27363
27364 SSMIN (output) REAL
27365 The smaller singular value.
27366
27367 SSMAX (output) REAL
27368 The larger singular value.
27369
27370 Further Details
27371 ===============
27372
27373 Barring over/underflow, all output quantities are correct to within
27374 a few units in the last place (ulps), even in the absence of a guard
27375 digit in addition/subtraction.
27376
27377 In IEEE arithmetic, the code works correctly if one matrix element is
27378 infinite.
27379
27380 Overflow will not occur unless the largest singular value itself
27381 overflows, or is within a few ulps of overflow. (On machines with
27382 partial overflow, like the Cray, overflow may occur if the largest
27383 singular value is within a factor of 2 of overflow.)
27384
27385 Underflow is harmless if underflow is gradual. Otherwise, results
27386 may correspond to a matrix modified by perturbations of size near
27387 the underflow threshold.
27388
27389 ==================================================================== */
27390 /* System generated locals */
27391 real r__1, r__2;
27392 /* Builtin functions */
27393 //double sqrt(doublereal);
27394 /* Local variables */
27395 static real fhmn, fhmx, c__, fa, ga, ha, as, at, au;
27396
27397
27398
27399 fa = dabs(*f);
27400 ga = dabs(*g);
27401 ha = dabs(*h__);
27402 fhmn = df2cmin(fa,ha);
27403 fhmx = df2cmax(fa,ha);
27404 if (fhmn == 0.f) {
27405 *ssmin = 0.f;
27406 if (fhmx == 0.f) {
27407 *ssmax = ga;
27408 } else {
27409 /* Computing 2nd power */
27410 r__1 = df2cmin(fhmx,ga) / df2cmax(fhmx,ga);
27411 *ssmax = df2cmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f);
27412 }
27413 } else {
27414 if (ga < fhmx) {
27415 as = fhmn / fhmx + 1.f;
27416 at = (fhmx - fhmn) / fhmx;
27417 /* Computing 2nd power */
27418 r__1 = ga / fhmx;
27419 au = r__1 * r__1;
27420 c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au));
27421 *ssmin = fhmn * c__;
27422 *ssmax = fhmx / c__;
27423 } else {
27424 au = fhmx / ga;
27425 if (au == 0.f) {
27426
27427 /* Avoid possible harmful underflow if exponent range
27428 asymmetric (true SSMIN may not underflow even if
27429 AU underflows) */
27430
27431 *ssmin = fhmn * fhmx / ga;
27432 *ssmax = ga;
27433 } else {
27434 as = fhmn / fhmx + 1.f;
27435 at = (fhmx - fhmn) / fhmx;
27436 /* Computing 2nd power */
27437 r__1 = as * au;
27438 /* Computing 2nd power */
27439 r__2 = at * au;
27440 c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f)
27441 );
27442 *ssmin = fhmn * c__ * au;
27443 *ssmin += *ssmin;
27444 *ssmax = ga / (c__ + c__);
27445 }
27446 }
27447 }
27448 return 0;
27449
27450 /* End of SLAS2 */
27451
27452 } /* slas2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 5053 of file lapackblas.cpp. References a_ref, dabs, doublereal, f2cmax, f2cmin, integer, logical, lsame_(), real, slamch_(), and xerbla_(). Referenced by sgesvd_(), slasq1_(), sstedc_(), ssteqr_(), ssterf_(), and ssyev_(). 05056 {
05057 /* -- LAPACK auxiliary routine (version 3.0) --
05058 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
05059 Courant Institute, Argonne National Lab, and Rice University
05060 February 29, 1992
05061
05062
05063 Purpose
05064 =======
05065
05066 SLASCL multiplies the M by N real matrix A by the real scalar
05067 CTO/CFROM. This is done without over/underflow as long as the final
05068 result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
05069 A may be full, upper triangular, lower triangular, upper Hessenberg,
05070 or banded.
05071
05072 Arguments
05073 =========
05074
05075 TYPE (input) CHARACTER*1
05076 TYPE indices the storage type of the input matrix.
05077 = 'G': A is a full matrix.
05078 = 'L': A is a lower triangular matrix.
05079 = 'U': A is an upper triangular matrix.
05080 = 'H': A is an upper Hessenberg matrix.
05081 = 'B': A is a symmetric band matrix with lower bandwidth KL
05082 and upper bandwidth KU and with the only the lower
05083 half stored.
05084 = 'Q': A is a symmetric band matrix with lower bandwidth KL
05085 and upper bandwidth KU and with the only the upper
05086 half stored.
05087 = 'Z': A is a band matrix with lower bandwidth KL and upper
05088 bandwidth KU.
05089
05090 KL (input) INTEGER
05091 The lower bandwidth of A. Referenced only if TYPE = 'B',
05092 'Q' or 'Z'.
05093
05094 KU (input) INTEGER
05095 The upper bandwidth of A. Referenced only if TYPE = 'B',
05096 'Q' or 'Z'.
05097
05098 CFROM (input) REAL
05099 CTO (input) REAL
05100 The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
05101 without over/underflow if the final result CTO*A(I,J)/CFROM
05102 can be represented without over/underflow. CFROM must be
05103 nonzero.
05104
05105 M (input) INTEGER
05106 The number of rows of the matrix A. M >= 0.
05107
05108 N (input) INTEGER
05109 The number of columns of the matrix A. N >= 0.
05110
05111 A (input/output) REAL array, dimension (LDA,M)
05112 The matrix to be multiplied by CTO/CFROM. See TYPE for the
05113 storage type.
05114
05115 LDA (input) INTEGER
05116 The leading dimension of the array A. LDA >= f2cmax(1,M).
05117
05118 INFO (output) INTEGER
05119 0 - successful exit
05120 <0 - if INFO = -i, the i-th argument had an illegal value.
05121
05122 =====================================================================
05123
05124
05125 Test the input arguments
05126
05127 Parameter adjustments */
05128 /* System generated locals */
05129 integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
05130 /* Local variables */
05131 static logical done;
05132 static real ctoc;
05133 static integer i__, j;
05134 extern logical lsame_(const char *, const char *);
05135 static integer itype, k1, k2, k3, k4;
05136 static real cfrom1;
05137 extern doublereal slamch_(const char *);
05138 static real cfromc;
05139 extern /* Subroutine */ int xerbla_(const char *, integer *);
05140 static real bignum, smlnum, mul, cto1;
05141 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
05142
05143 a_dim1 = *lda;
05144 a_offset = 1 + a_dim1 * 1;
05145 a -= a_offset;
05146
05147 /* Function Body */
05148 *info = 0;
05149
05150 if (lsame_(type__, "G")) {
05151 itype = 0;
05152 } else if (lsame_(type__, "L")) {
05153 itype = 1;
05154 } else if (lsame_(type__, "U")) {
05155 itype = 2;
05156 } else if (lsame_(type__, "H")) {
05157 itype = 3;
05158 } else if (lsame_(type__, "B")) {
05159 itype = 4;
05160 } else if (lsame_(type__, "Q")) {
05161 itype = 5;
05162 } else if (lsame_(type__, "Z")) {
05163 itype = 6;
05164 } else {
05165 itype = -1;
05166 }
05167
05168 if (itype == -1) {
05169 *info = -1;
05170 } else if (*cfrom == 0.f) {
05171 *info = -4;
05172 } else if (*m < 0) {
05173 *info = -6;
05174 } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
05175 *info = -7;
05176 } else if (itype <= 3 && *lda < f2cmax(1,*m)) {
05177 *info = -9;
05178 } else if (itype >= 4) {
05179 /* Computing MAX */
05180 i__1 = *m - 1;
05181 if (*kl < 0 || *kl > f2cmax(i__1,0)) {
05182 *info = -2;
05183 } else /* if(complicated condition) */ {
05184 /* Computing MAX */
05185 i__1 = *n - 1;
05186 if (*ku < 0 || *ku > f2cmax(i__1,0) || (itype == 4 || itype == 5) &&
05187 *kl != *ku) {
05188 *info = -3;
05189 } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
05190 ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
05191 *info = -9;
05192 }
05193 }
05194 }
05195
05196 if (*info != 0) {
05197 i__1 = -(*info);
05198 xerbla_("SLASCL", &i__1);
05199 return 0;
05200 }
05201
05202 /* Quick return if possible */
05203
05204 if (*n == 0 || *m == 0) {
05205 return 0;
05206 }
05207
05208 /* Get machine parameters */
05209
05210 smlnum = slamch_("S");
05211 bignum = 1.f / smlnum;
05212
05213 cfromc = *cfrom;
05214 ctoc = *cto;
05215
05216 L10:
05217 cfrom1 = cfromc * smlnum;
05218 cto1 = ctoc / bignum;
05219 if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
05220 mul = smlnum;
05221 done = FALSE_;
05222 cfromc = cfrom1;
05223 } else if (dabs(cto1) > dabs(cfromc)) {
05224 mul = bignum;
05225 done = FALSE_;
05226 ctoc = cto1;
05227 } else {
05228 mul = ctoc / cfromc;
05229 done = TRUE_;
05230 }
05231
05232 if (itype == 0) {
05233
05234 /* Full matrix */
05235
05236 i__1 = *n;
05237 for (j = 1; j <= i__1; ++j) {
05238 i__2 = *m;
05239 for (i__ = 1; i__ <= i__2; ++i__) {
05240 a_ref(i__, j) = a_ref(i__, j) * mul;
05241 /* L20: */
05242 }
05243 /* L30: */
05244 }
05245
05246 } else if (itype == 1) {
05247
05248 /* Lower triangular matrix */
05249
05250 i__1 = *n;
05251 for (j = 1; j <= i__1; ++j) {
05252 i__2 = *m;
05253 for (i__ = j; i__ <= i__2; ++i__) {
05254 a_ref(i__, j) = a_ref(i__, j) * mul;
05255 /* L40: */
05256 }
05257 /* L50: */
05258 }
05259
05260 } else if (itype == 2) {
05261
05262 /* Upper triangular matrix */
05263
05264 i__1 = *n;
05265 for (j = 1; j <= i__1; ++j) {
05266 i__2 = f2cmin(j,*m);
05267 for (i__ = 1; i__ <= i__2; ++i__) {
05268 a_ref(i__, j) = a_ref(i__, j) * mul;
05269 /* L60: */
05270 }
05271 /* L70: */
05272 }
05273
05274 } else if (itype == 3) {
05275
05276 /* Upper Hessenberg matrix */
05277
05278 i__1 = *n;
05279 for (j = 1; j <= i__1; ++j) {
05280 /* Computing MIN */
05281 i__3 = j + 1;
05282 i__2 = f2cmin(i__3,*m);
05283 for (i__ = 1; i__ <= i__2; ++i__) {
05284 a_ref(i__, j) = a_ref(i__, j) * mul;
05285 /* L80: */
05286 }
05287 /* L90: */
05288 }
05289
05290 } else if (itype == 4) {
05291
05292 /* Lower half of a symmetric band matrix */
05293
05294 k3 = *kl + 1;
05295 k4 = *n + 1;
05296 i__1 = *n;
05297 for (j = 1; j <= i__1; ++j) {
05298 /* Computing MIN */
05299 i__3 = k3, i__4 = k4 - j;
05300 i__2 = f2cmin(i__3,i__4);
05301 for (i__ = 1; i__ <= i__2; ++i__) {
05302 a_ref(i__, j) = a_ref(i__, j) * mul;
05303 /* L100: */
05304 }
05305 /* L110: */
05306 }
05307
05308 } else if (itype == 5) {
05309
05310 /* Upper half of a symmetric band matrix */
05311
05312 k1 = *ku + 2;
05313 k3 = *ku + 1;
05314 i__1 = *n;
05315 for (j = 1; j <= i__1; ++j) {
05316 /* Computing MAX */
05317 i__2 = k1 - j;
05318 i__3 = k3;
05319 for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
05320 a_ref(i__, j) = a_ref(i__, j) * mul;
05321 /* L120: */
05322 }
05323 /* L130: */
05324 }
05325
05326 } else if (itype == 6) {
05327
05328 /* Band matrix */
05329
05330 k1 = *kl + *ku + 2;
05331 k2 = *kl + 1;
05332 k3 = (*kl << 1) + *ku + 1;
05333 k4 = *kl + *ku + 1 + *m;
05334 i__1 = *n;
05335 for (j = 1; j <= i__1; ++j) {
05336 /* Computing MAX */
05337 i__3 = k1 - j;
05338 /* Computing MIN */
05339 i__4 = k3, i__5 = k4 - j;
05340 i__2 = f2cmin(i__4,i__5);
05341 for (i__ = f2cmax(i__3,k2); i__ <= i__2; ++i__) {
05342 a_ref(i__, j) = a_ref(i__, j) * mul;
05343 /* L140: */
05344 }
05345 /* L150: */
05346 }
05347
05348 }
05349
05350 if (! done) {
05351 goto L10;
05352 }
05353
05354 return 0;
05355
05356 /* End of SLASCL */
05357
05358 } /* slascl_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 5366 of file lapackblas.cpp. References a_ref, f2cmin, integer, logical, and lsame_(). Referenced by sgesvd_(), slaed3_(), sstedc_(), and ssteqr_(). 05368 {
05369 /* -- LAPACK auxiliary routine (version 3.0) --
05370 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
05371 Courant Institute, Argonne National Lab, and Rice University
05372 October 31, 1992
05373
05374
05375 Purpose
05376 =======
05377
05378 SLASET initializes an m-by-n matrix A to BETA on the diagonal and
05379 ALPHA on the offdiagonals.
05380
05381 Arguments
05382 =========
05383
05384 UPLO (input) CHARACTER*1
05385 Specifies the part of the matrix A to be set.
05386 = 'U': Upper triangular part is set; the strictly lower
05387 triangular part of A is not changed.
05388 = 'L': Lower triangular part is set; the strictly upper
05389 triangular part of A is not changed.
05390 Otherwise: All of the matrix A is set.
05391
05392 M (input) INTEGER
05393 The number of rows of the matrix A. M >= 0.
05394
05395 N (input) INTEGER
05396 The number of columns of the matrix A. N >= 0.
05397
05398 ALPHA (input) REAL
05399 The constant to which the offdiagonal elements are to be set.
05400
05401 BETA (input) REAL
05402 The constant to which the diagonal elements are to be set.
05403
05404 A (input/output) REAL array, dimension (LDA,N)
05405 On exit, the leading m-by-n submatrix of A is set as follows:
05406
05407 if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
05408 if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
05409 otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
05410
05411 and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
05412
05413 LDA (input) INTEGER
05414 The leading dimension of the array A. LDA >= f2cmax(1,M).
05415
05416 =====================================================================
05417
05418
05419 Parameter adjustments */
05420 /* System generated locals */
05421 integer a_dim1, a_offset, i__1, i__2, i__3;
05422 /* Local variables */
05423 static integer i__, j;
05424 extern logical lsame_(const char *, const char *);
05425 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
05426
05427 a_dim1 = *lda;
05428 a_offset = 1 + a_dim1 * 1;
05429 a -= a_offset;
05430
05431 /* Function Body */
05432 if (lsame_(uplo, "U")) {
05433
05434 /* Set the strictly upper triangular or trapezoidal part of the
05435 array to ALPHA. */
05436
05437 i__1 = *n;
05438 for (j = 2; j <= i__1; ++j) {
05439 /* Computing MIN */
05440 i__3 = j - 1;
05441 i__2 = f2cmin(i__3,*m);
05442 for (i__ = 1; i__ <= i__2; ++i__) {
05443 a_ref(i__, j) = *alpha;
05444 /* L10: */
05445 }
05446 /* L20: */
05447 }
05448
05449 } else if (lsame_(uplo, "L")) {
05450
05451 /* Set the strictly lower triangular or trapezoidal part of the
05452 array to ALPHA. */
05453
05454 i__1 = f2cmin(*m,*n);
05455 for (j = 1; j <= i__1; ++j) {
05456 i__2 = *m;
05457 for (i__ = j + 1; i__ <= i__2; ++i__) {
05458 a_ref(i__, j) = *alpha;
05459 /* L30: */
05460 }
05461 /* L40: */
05462 }
05463
05464 } else {
05465
05466 /* Set the leading m-by-n submatrix to ALPHA. */
05467
05468 i__1 = *n;
05469 for (j = 1; j <= i__1; ++j) {
05470 i__2 = *m;
05471 for (i__ = 1; i__ <= i__2; ++i__) {
05472 a_ref(i__, j) = *alpha;
05473 /* L50: */
05474 }
05475 /* L60: */
05476 }
05477 }
05478
05479 /* Set the first f2cmin(M,N) diagonal elements to BETA. */
05480
05481 i__1 = f2cmin(*m,*n);
05482 for (i__ = 1; i__ <= i__1; ++i__) {
05483 a_ref(i__, i__) = *beta;
05484 /* L70: */
05485 }
05486
05487 return 0;
05488
05489 /* End of SLASET */
05490
05491 } /* slaset_ */
|
|
||||||||||||||||||||||||
|
Definition at line 25318 of file lapackblas.cpp. References c__0, c__1, c__2, dabs, df2cmax, doublereal, integer, real, scopy_(), slamch_(), slas2_(), slascl_(), slasq2_(), slasrt_(), sqrt(), and xerbla_(). Referenced by sbdsqr_(). 25320 {
25321 /* System generated locals */
25322 integer i__1, i__2;
25323 real r__1, r__2, r__3;
25324
25325 /* Builtin functions */
25326 //double sqrt(doublereal);
25327
25328 /* Local variables */
25329 extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *)
25330 ;
25331 static integer i__;
25332 static real scale;
25333 static integer iinfo;
25334 static real sigmn, sigmx;
25335 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
25336 integer *), slasq2_(integer *, real *, integer *);
25337 extern doublereal slamch_(const char *);
25338 static real safmin;
25339 extern /* Subroutine */ int xerbla_(const char *, integer *), slascl_(
25340 const char *, integer *, integer *, real *, real *, integer *, integer *
25341 , real *, integer *, integer *), slasrt_(const char *, integer *
25342 , real *, integer *);
25343 static real eps;
25344
25345
25346 /* -- LAPACK routine (version 3.0) --
25347 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
25348 Courant Institute, Argonne National Lab, and Rice University
25349 October 31, 1999
25350
25351
25352 Purpose
25353 =======
25354
25355 SLASQ1 computes the singular values of a real N-by-N bidiagonal
25356 matrix with diagonal D and off-diagonal E. The singular values
25357 are computed to high relative accuracy, in the absence of
25358 denormalization, underflow and overflow. The algorithm was first
25359 presented in
25360
25361 "Accurate singular values and differential qd algorithms" by K. V.
25362 Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
25363 1994,
25364
25365 and the present implementation is described in "An implementation of
25366 the dqds Algorithm (Positive Case)", LAPACK Working Note.
25367
25368 Arguments
25369 =========
25370
25371 N (input) INTEGER
25372 The number of rows and columns in the matrix. N >= 0.
25373
25374 D (input/output) REAL array, dimension (N)
25375 On entry, D contains the diagonal elements of the
25376 bidiagonal matrix whose SVD is desired. On normal exit,
25377 D contains the singular values in decreasing order.
25378
25379 E (input/output) REAL array, dimension (N)
25380 On entry, elements E(1:N-1) contain the off-diagonal elements
25381 of the bidiagonal matrix whose SVD is desired.
25382 On exit, E is overwritten.
25383
25384 WORK (workspace) REAL array, dimension (4*N)
25385
25386 INFO (output) INTEGER
25387 = 0: successful exit
25388 < 0: if INFO = -i, the i-th argument had an illegal value
25389 > 0: the algorithm failed
25390 = 1, a split was marked by a positive value in E
25391 = 2, current block of Z not diagonalized after 30*N
25392 iterations (in inner while loop)
25393 = 3, termination criterion of outer while loop not met
25394 (program created more than N unreduced blocks)
25395
25396 =====================================================================
25397
25398
25399 Parameter adjustments */
25400 --work;
25401 --e;
25402 --d__;
25403
25404 /* Function Body */
25405 *info = 0;
25406 if (*n < 0) {
25407 *info = -2;
25408 i__1 = -(*info);
25409 xerbla_("SLASQ1", &i__1);
25410 return 0;
25411 } else if (*n == 0) {
25412 return 0;
25413 } else if (*n == 1) {
25414 d__[1] = dabs(d__[1]);
25415 return 0;
25416 } else if (*n == 2) {
25417 slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
25418 d__[1] = sigmx;
25419 d__[2] = sigmn;
25420 return 0;
25421 }
25422
25423 /* Estimate the largest singular value. */
25424
25425 sigmx = 0.f;
25426 i__1 = *n - 1;
25427 for (i__ = 1; i__ <= i__1; ++i__) {
25428 d__[i__] = (r__1 = d__[i__], dabs(r__1));
25429 /* Computing MAX */
25430 r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1));
25431 sigmx = df2cmax(r__2,r__3);
25432 /* L10: */
25433 }
25434 d__[*n] = (r__1 = d__[*n], dabs(r__1));
25435
25436 /* Early return if SIGMX is zero (matrix is already diagonal). */
25437
25438 if (sigmx == 0.f) {
25439 slasrt_("D", n, &d__[1], &iinfo);
25440 return 0;
25441 }
25442
25443 i__1 = *n;
25444 for (i__ = 1; i__ <= i__1; ++i__) {
25445 /* Computing MAX */
25446 r__1 = sigmx, r__2 = d__[i__];
25447 sigmx = df2cmax(r__1,r__2);
25448 /* L20: */
25449 }
25450
25451 /* Copy D and E into WORK (in the Z format) and scale (squaring the
25452 input data makes scaling by a power of the radix pointless). */
25453
25454 eps = slamch_("Precision");
25455 safmin = slamch_("Safe minimum");
25456 scale = sqrt(eps / safmin);
25457 scopy_(n, &d__[1], &c__1, &work[1], &c__2);
25458 i__1 = *n - 1;
25459 scopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
25460 i__1 = (*n << 1) - 1;
25461 i__2 = (*n << 1) - 1;
25462 slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
25463 &iinfo);
25464
25465 /* Compute the q's and e's. */
25466
25467 i__1 = (*n << 1) - 1;
25468 for (i__ = 1; i__ <= i__1; ++i__) {
25469 /* Computing 2nd power */
25470 r__1 = work[i__];
25471 work[i__] = r__1 * r__1;
25472 /* L30: */
25473 }
25474 work[*n * 2] = 0.f;
25475
25476 slasq2_(n, &work[1], info);
25477
25478 if (*info == 0) {
25479 i__1 = *n;
25480 for (i__ = 1; i__ <= i__1; ++i__) {
25481 d__[i__] = sqrt(work[i__]);
25482 /* L40: */
25483 }
25484 slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
25485 iinfo);
25486 }
25487
25488 return 0;
25489
25490 /* End of SLASQ1 */
25491
25492 } /* slasq1_ */
|
|
||||||||||||||||
|
Definition at line 25503 of file lapackblas.cpp. References c__1, c__10, c__11, c__2, c__3, c__4, dabs, df2cmax, df2cmin, doublereal, f2cmax, ftnlen, ilaenv_(), integer, logical, real, slamch_(), slasq3_(), slasrt_(), sqrt(), t, and xerbla_(). Referenced by slasq1_(). 25504 {
25505 /* System generated locals */
25506 integer i__1, i__2, i__3;
25507 real r__1, r__2;
25508
25509 /* Builtin functions */
25510 //double sqrt(doublereal);
25511
25512 /* Local variables */
25513 static logical ieee;
25514 static integer nbig;
25515 static real dmin__, emin, emax;
25516 static integer ndiv, iter;
25517 static real qmin, temp, qmax, zmax;
25518 static integer splt;
25519 static real d__, e;
25520 static integer k;
25521 static real s, t;
25522 static integer nfail;
25523 static real desig, trace, sigma;
25524 static integer iinfo, i0, i4, n0;
25525 extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer
25526 *, real *, real *, real *, real *, integer *, integer *, integer *
25527 , logical *);
25528 static integer pp;
25529 extern doublereal slamch_(const char *);
25530 static integer iwhila, iwhilb;
25531 static real oldemn, safmin;
25532 extern /* Subroutine */ int xerbla_(const char *, integer *);
25533 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
25534 integer *, integer *, ftnlen, ftnlen);
25535 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *);
25536 static real eps, tol;
25537 static integer ipn4;
25538 static real tol2;
25539
25540
25541 /* -- LAPACK routine (version 3.0) --
25542 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
25543 Courant Institute, Argonne National Lab, and Rice University
25544 October 31, 1999
25545
25546
25547 Purpose
25548 =======
25549
25550 SLASQ2 computes all the eigenvalues of the symmetric positive
25551 definite tridiagonal matrix associated with the qd array Z to high
25552 relative accuracy are computed to high relative accuracy, in the
25553 absence of denormalization, underflow and overflow.
25554
25555 To see the relation of Z to the tridiagonal matrix, let L be a
25556 unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
25557 let U be an upper bidiagonal matrix with 1's above and diagonal
25558 Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
25559 symmetric tridiagonal to which it is similar.
25560
25561 Note : SLASQ2 defines a logical variable, IEEE, which is true
25562 on machines which follow ieee-754 floating-point standard in their
25563 handling of infinities and NaNs, and false otherwise. This variable
25564 is passed to SLASQ3.
25565
25566 Arguments
25567 =========
25568
25569 N (input) INTEGER
25570 The number of rows and columns in the matrix. N >= 0.
25571
25572 Z (workspace) REAL array, dimension ( 4*N )
25573 On entry Z holds the qd array. On exit, entries 1 to N hold
25574 the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
25575 trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
25576 N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
25577 holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
25578 shifts that failed.
25579
25580 INFO (output) INTEGER
25581 = 0: successful exit
25582 < 0: if the i-th argument is a scalar and had an illegal
25583 value, then INFO = -i, if the i-th argument is an
25584 array and the j-entry had an illegal value, then
25585 INFO = -(i*100+j)
25586 > 0: the algorithm failed
25587 = 1, a split was marked by a positive value in E
25588 = 2, current block of Z not diagonalized after 30*N
25589 iterations (in inner while loop)
25590 = 3, termination criterion of outer while loop not met
25591 (program created more than N unreduced blocks)
25592
25593 Further Details
25594 ===============
25595 Local Variables: I0:N0 defines a current unreduced segment of Z.
25596 The shifts are accumulated in SIGMA. Iteration count is in ITER.
25597 Ping-pong is controlled by PP (alternates between 0 and 1).
25598
25599 =====================================================================
25600
25601
25602 Test the input arguments.
25603 (in case SLASQ2 is not called by SLASQ1)
25604
25605 Parameter adjustments */
25606 --z__;
25607
25608 /* Function Body */
25609 *info = 0;
25610 eps = slamch_("Precision");
25611 safmin = slamch_("Safe minimum");
25612 tol = eps * 100.f;
25613 /* Computing 2nd power */
25614 r__1 = tol;
25615 tol2 = r__1 * r__1;
25616
25617 if (*n < 0) {
25618 *info = -1;
25619 xerbla_("SLASQ2", &c__1);
25620 return 0;
25621 } else if (*n == 0) {
25622 return 0;
25623 } else if (*n == 1) {
25624
25625 /* 1-by-1 case. */
25626
25627 if (z__[1] < 0.f) {
25628 *info = -201;
25629 xerbla_("SLASQ2", &c__2);
25630 }
25631 return 0;
25632 } else if (*n == 2) {
25633
25634 /* 2-by-2 case. */
25635
25636 if (z__[2] < 0.f || z__[3] < 0.f) {
25637 *info = -2;
25638 xerbla_("SLASQ2", &c__2);
25639 return 0;
25640 } else if (z__[3] > z__[1]) {
25641 d__ = z__[3];
25642 z__[3] = z__[1];
25643 z__[1] = d__;
25644 }
25645 z__[5] = z__[1] + z__[2] + z__[3];
25646 if (z__[2] > z__[3] * tol2) {
25647 t = (z__[1] - z__[3] + z__[2]) * .5f;
25648 s = z__[3] * (z__[2] / t);
25649 if (s <= t) {
25650 s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f)));
25651 } else {
25652 s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
25653 }
25654 t = z__[1] + (s + z__[2]);
25655 z__[3] *= z__[1] / t;
25656 z__[1] = t;
25657 }
25658 z__[2] = z__[3];
25659 z__[6] = z__[2] + z__[1];
25660 return 0;
25661 }
25662
25663 /* Check for negative data and compute sums of q's and e's. */
25664
25665 z__[*n * 2] = 0.f;
25666 emin = z__[2];
25667 qmax = 0.f;
25668 zmax = 0.f;
25669 d__ = 0.f;
25670 e = 0.f;
25671
25672 i__1 = *n - 1 << 1;
25673 for (k = 1; k <= i__1; k += 2) {
25674 if (z__[k] < 0.f) {
25675 *info = -(k + 200);
25676 xerbla_("SLASQ2", &c__2);
25677 return 0;
25678 } else if (z__[k + 1] < 0.f) {
25679 *info = -(k + 201);
25680 xerbla_("SLASQ2", &c__2);
25681 return 0;
25682 }
25683 d__ += z__[k];
25684 e += z__[k + 1];
25685 /* Computing MAX */
25686 r__1 = qmax, r__2 = z__[k];
25687 qmax = df2cmax(r__1,r__2);
25688 /* Computing MIN */
25689 r__1 = emin, r__2 = z__[k + 1];
25690 emin = df2cmin(r__1,r__2);
25691 /* Computing MAX */
25692 r__1 = f2cmax(qmax,zmax), r__2 = z__[k + 1];
25693 zmax = df2cmax(r__1,r__2);
25694 /* L10: */
25695 }
25696 if (z__[(*n << 1) - 1] < 0.f) {
25697 *info = -((*n << 1) + 199);
25698 xerbla_("SLASQ2", &c__2);
25699 return 0;
25700 }
25701 d__ += z__[(*n << 1) - 1];
25702 /* Computing MAX */
25703 r__1 = qmax, r__2 = z__[(*n << 1) - 1];
25704 qmax = df2cmax(r__1,r__2);
25705 zmax = df2cmax(qmax,zmax);
25706
25707 /* Check for diagonality. */
25708
25709 if (e == 0.f) {
25710 i__1 = *n;
25711 for (k = 2; k <= i__1; ++k) {
25712 z__[k] = z__[(k << 1) - 1];
25713 /* L20: */
25714 }
25715 slasrt_("D", n, &z__[1], &iinfo);
25716 z__[(*n << 1) - 1] = d__;
25717 return 0;
25718 }
25719
25720 trace = d__ + e;
25721
25722 /* Check for zero data. */
25723
25724 if (trace == 0.f) {
25725 z__[(*n << 1) - 1] = 0.f;
25726 return 0;
25727 }
25728
25729 /* Check whether the machine is IEEE conformable. */
25730
25731 ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
25732 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2,
25733 &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
25734
25735 /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
25736
25737 for (k = *n << 1; k >= 2; k += -2) {
25738 z__[k * 2] = 0.f;
25739 z__[(k << 1) - 1] = z__[k];
25740 z__[(k << 1) - 2] = 0.f;
25741 z__[(k << 1) - 3] = z__[k - 1];
25742 /* L30: */
25743 }
25744
25745 i0 = 1;
25746 n0 = *n;
25747
25748 /* Reverse the qd-array, if warranted. */
25749
25750 if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) {
25751 ipn4 = i0 + n0 << 2;
25752 i__1 = i0 + n0 - 1 << 1;
25753 for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
25754 temp = z__[i4 - 3];
25755 z__[i4 - 3] = z__[ipn4 - i4 - 3];
25756 z__[ipn4 - i4 - 3] = temp;
25757 temp = z__[i4 - 1];
25758 z__[i4 - 1] = z__[ipn4 - i4 - 5];
25759 z__[ipn4 - i4 - 5] = temp;
25760 /* L40: */
25761 }
25762 }
25763
25764 /* Initial split checking via dqd and Li's test. */
25765
25766 pp = 0;
25767
25768 for (k = 1; k <= 2; ++k) {
25769
25770 d__ = z__[(n0 << 2) + pp - 3];
25771 i__1 = (i0 << 2) + pp;
25772 for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
25773 if (z__[i4 - 1] <= tol2 * d__) {
25774 z__[i4 - 1] = 0.f;
25775 d__ = z__[i4 - 3];
25776 } else {
25777 d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
25778 }
25779 /* L50: */
25780 }
25781
25782 /* dqd maps Z to ZZ plus Li's test. */
25783
25784 emin = z__[(i0 << 2) + pp + 1];
25785 d__ = z__[(i0 << 2) + pp - 3];
25786 i__1 = (n0 - 1 << 2) + pp;
25787 for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
25788 z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
25789 if (z__[i4 - 1] <= tol2 * d__) {
25790 z__[i4 - 1] = 0.f;
25791 z__[i4 - (pp << 1) - 2] = d__;
25792 z__[i4 - (pp << 1)] = 0.f;
25793 d__ = z__[i4 + 1];
25794 } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
25795 safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
25796 temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
25797 z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
25798 d__ *= temp;
25799 } else {
25800 z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
25801 pp << 1) - 2]);
25802 d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
25803 }
25804 /* Computing MIN */
25805 r__1 = emin, r__2 = z__[i4 - (pp << 1)];
25806 emin = df2cmin(r__1,r__2);
25807 /* L60: */
25808 }
25809 z__[(n0 << 2) - pp - 2] = d__;
25810
25811 /* Now find qmax. */
25812
25813 qmax = z__[(i0 << 2) - pp - 2];
25814 i__1 = (n0 << 2) - pp - 2;
25815 for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
25816 /* Computing MAX */
25817 r__1 = qmax, r__2 = z__[i4];
25818 qmax = df2cmax(r__1,r__2);
25819 /* L70: */
25820 }
25821
25822 /* Prepare for the next iteration on K. */
25823
25824 pp = 1 - pp;
25825 /* L80: */
25826 }
25827
25828 iter = 2;
25829 nfail = 0;
25830 ndiv = n0 - i0 << 1;
25831
25832 i__1 = *n + 1;
25833 for (iwhila = 1; iwhila <= i__1; ++iwhila) {
25834 if (n0 < 1) {
25835 goto L150;
25836 }
25837
25838 /* While array unfinished do
25839
25840 E(N0) holds the value of SIGMA when submatrix in I0:N0
25841 splits from the rest of the array, but is negated. */
25842
25843 desig = 0.f;
25844 if (n0 == *n) {
25845 sigma = 0.f;
25846 } else {
25847 sigma = -z__[(n0 << 2) - 1];
25848 }
25849 if (sigma < 0.f) {
25850 *info = 1;
25851 return 0;
25852 }
25853
25854 /* Find last unreduced submatrix's top index I0, find QMAX and
25855 EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
25856
25857 emax = 0.f;
25858 if (n0 > i0) {
25859 emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1));
25860 } else {
25861 emin = 0.f;
25862 }
25863 qmin = z__[(n0 << 2) - 3];
25864 qmax = qmin;
25865 for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
25866 if (z__[i4 - 5] <= 0.f) {
25867 goto L100;
25868 }
25869 if (qmin >= emax * 4.f) {
25870 /* Computing MIN */
25871 r__1 = qmin, r__2 = z__[i4 - 3];
25872 qmin = df2cmin(r__1,r__2);
25873 /* Computing MAX */
25874 r__1 = emax, r__2 = z__[i4 - 5];
25875 emax = df2cmax(r__1,r__2);
25876 }
25877 /* Computing MAX */
25878 r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5];
25879 qmax = df2cmax(r__1,r__2);
25880 /* Computing MIN */
25881 r__1 = emin, r__2 = z__[i4 - 5];
25882 emin = df2cmin(r__1,r__2);
25883 /* L90: */
25884 }
25885 i4 = 4;
25886
25887 L100:
25888 i0 = i4 / 4;
25889
25890 /* Store EMIN for passing to SLASQ3. */
25891
25892 z__[(n0 << 2) - 1] = emin;
25893
25894 /* Put -(initial shift) into DMIN.
25895
25896 Computing MAX */
25897 r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax);
25898 dmin__ = -df2cmax(r__1,r__2);
25899
25900 /* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
25901
25902 pp = 0;
25903
25904 nbig = (n0 - i0 + 1) * 30;
25905 i__2 = nbig;
25906 for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
25907 if (i0 > n0) {
25908 goto L130;
25909 }
25910
25911 /* While submatrix unfinished take a good dqds step. */
25912
25913 slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
25914 nfail, &iter, &ndiv, &ieee);
25915
25916 pp = 1 - pp;
25917
25918 /* When EMIN is very small check for splits. */
25919
25920 if (pp == 0 && n0 - i0 >= 3) {
25921 if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
25922 sigma) {
25923 splt = i0 - 1;
25924 qmax = z__[(i0 << 2) - 3];
25925 emin = z__[(i0 << 2) - 1];
25926 oldemn = z__[i0 * 4];
25927 i__3 = n0 - 3 << 2;
25928 for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
25929 if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
25930 tol2 * sigma) {
25931 z__[i4 - 1] = -sigma;
25932 splt = i4 / 4;
25933 qmax = 0.f;
25934 emin = z__[i4 + 3];
25935 oldemn = z__[i4 + 4];
25936 } else {
25937 /* Computing MAX */
25938 r__1 = qmax, r__2 = z__[i4 + 1];
25939 qmax = df2cmax(r__1,r__2);
25940 /* Computing MIN */
25941 r__1 = emin, r__2 = z__[i4 - 1];
25942 emin = df2cmin(r__1,r__2);
25943 /* Computing MIN */
25944 r__1 = oldemn, r__2 = z__[i4];
25945 oldemn = df2cmin(r__1,r__2);
25946 }
25947 /* L110: */
25948 }
25949 z__[(n0 << 2) - 1] = emin;
25950 z__[n0 * 4] = oldemn;
25951 i0 = splt + 1;
25952 }
25953 }
25954
25955 /* L120: */
25956 }
25957
25958 *info = 2;
25959 return 0;
25960
25961 /* end IWHILB */
25962
25963 L130:
25964
25965 /* L140: */
25966 ;
25967 }
25968
25969 *info = 3;
25970 return 0;
25971
25972 /* end IWHILA */
25973
25974 L150:
25975
25976 /* Move q's to the front. */
25977
25978 i__1 = *n;
25979 for (k = 2; k <= i__1; ++k) {
25980 z__[k] = z__[(k << 2) - 3];
25981 /* L160: */
25982 }
25983
25984 /* Sort and compute sum of eigenvalues. */
25985
25986 slasrt_("D", n, &z__[1], &iinfo);
25987
25988 e = 0.f;
25989 for (k = *n; k >= 1; --k) {
25990 e += z__[k];
25991 /* L170: */
25992 }
25993
25994 /* Store trace, sum(eigenvalues) and information on performance. */
25995
25996 z__[(*n << 1) + 1] = trace;
25997 z__[(*n << 1) + 2] = e;
25998 z__[(*n << 1) + 3] = (real) iter;
25999 /* Computing 2nd power */
26000 i__1 = *n;
26001 z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1);
26002 z__[(*n << 1) + 5] = nfail * 100.f / (real) iter;
26003 return 0;
26004
26005 /* End of SLASQ2 */
26006
26007 } /* slasq2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 26011 of file lapackblas.cpp. References dabs, df2cmax, df2cmin, doublereal, f2cmax, f2cmin, integer, logical, nn(), real, slamch_(), slasq4_(), slasq5_(), slasq6_(), sqrt(), and t. Referenced by slasq2_(). 26014 {
26015 /* -- LAPACK auxiliary routine (version 3.0) --
26016 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
26017 Courant Institute, Argonne National Lab, and Rice University
26018 May 17, 2000
26019
26020
26021 Purpose
26022 =======
26023
26024 SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
26025 In case of failure it changes shifts, and tries again until output
26026 is positive.
26027
26028 Arguments
26029 =========
26030
26031 I0 (input) INTEGER
26032 First index.
26033
26034 N0 (input) INTEGER
26035 Last index.
26036
26037 Z (input) REAL array, dimension ( 4*N )
26038 Z holds the qd array.
26039
26040 PP (input) INTEGER
26041 PP=0 for ping, PP=1 for pong.
26042
26043 DMIN (output) REAL
26044 Minimum value of d.
26045
26046 SIGMA (output) REAL
26047 Sum of shifts used in current segment.
26048
26049 DESIG (input/output) REAL
26050 Lower order part of SIGMA
26051
26052 QMAX (input) REAL
26053 Maximum value of q.
26054
26055 NFAIL (output) INTEGER
26056 Number of times shift was too big.
26057
26058 ITER (output) INTEGER
26059 Number of iterations.
26060
26061 NDIV (output) INTEGER
26062 Number of divisions.
26063
26064 TTYPE (output) INTEGER
26065 Shift type.
26066
26067 IEEE (input) LOGICAL
26068 Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
26069
26070 =====================================================================
26071
26072 Parameter adjustments */
26073 /* Initialized data */
26074 static integer ttype = 0;
26075 static real dmin1 = 0.f;
26076 static real dmin2 = 0.f;
26077 static real dn = 0.f;
26078 static real dn1 = 0.f;
26079 static real dn2 = 0.f;
26080 static real tau = 0.f;
26081 /* System generated locals */
26082 integer i__1;
26083 real r__1, r__2;
26084 /* Builtin functions */
26085 //double sqrt(doublereal);
26086 /* Local variables */
26087 static real temp, s, t;
26088 static integer j4;
26089 extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer
26090 *, integer *, real *, real *, real *, real *, real *, real *,
26091 real *, integer *), slasq5_(integer *, integer *, real *, integer
26092 *, real *, real *, real *, real *, real *, real *, real *,
26093 logical *), slasq6_(integer *, integer *, real *, integer *, real
26094 *, real *, real *, real *, real *, real *);
26095 static integer nn;
26096 extern doublereal slamch_(const char *);
26097 static real safmin, eps, tol;
26098 static integer n0in, ipn4;
26099 static real tol2;
26100
26101 --z__;
26102
26103 /* Function Body */
26104
26105 n0in = *n0;
26106 eps = slamch_("Precision");
26107 safmin = slamch_("Safe minimum");
26108 tol = eps * 100.f;
26109 /* Computing 2nd power */
26110 r__1 = tol;
26111 tol2 = r__1 * r__1;
26112
26113 /* Check for deflation. */
26114
26115 L10:
26116
26117 if (*n0 < *i0) {
26118 return 0;
26119 }
26120 if (*n0 == *i0) {
26121 goto L20;
26122 }
26123 nn = (*n0 << 2) + *pp;
26124 if (*n0 == *i0 + 1) {
26125 goto L40;
26126 }
26127
26128 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */
26129
26130 if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
26131 4] > tol2 * z__[nn - 7]) {
26132 goto L30;
26133 }
26134
26135 L20:
26136
26137 z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
26138 --(*n0);
26139 goto L10;
26140
26141 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */
26142
26143 L30:
26144
26145 if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
26146 nn - 11]) {
26147 goto L50;
26148 }
26149
26150 L40:
26151
26152 if (z__[nn - 3] > z__[nn - 7]) {
26153 s = z__[nn - 3];
26154 z__[nn - 3] = z__[nn - 7];
26155 z__[nn - 7] = s;
26156 }
26157 if (z__[nn - 5] > z__[nn - 3] * tol2) {
26158 t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f;
26159 s = z__[nn - 3] * (z__[nn - 5] / t);
26160 if (s <= t) {
26161 s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f)));
26162 } else {
26163 s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
26164 }
26165 t = z__[nn - 7] + (s + z__[nn - 5]);
26166 z__[nn - 3] *= z__[nn - 7] / t;
26167 z__[nn - 7] = t;
26168 }
26169 z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
26170 z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
26171 *n0 += -2;
26172 goto L10;
26173
26174 L50:
26175
26176 /* Reverse the qd-array, if warranted. */
26177
26178 if (*dmin__ <= 0.f || *n0 < n0in) {
26179 if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) {
26180 ipn4 = *i0 + *n0 << 2;
26181 i__1 = *i0 + *n0 - 1 << 1;
26182 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26183 temp = z__[j4 - 3];
26184 z__[j4 - 3] = z__[ipn4 - j4 - 3];
26185 z__[ipn4 - j4 - 3] = temp;
26186 temp = z__[j4 - 2];
26187 z__[j4 - 2] = z__[ipn4 - j4 - 2];
26188 z__[ipn4 - j4 - 2] = temp;
26189 temp = z__[j4 - 1];
26190 z__[j4 - 1] = z__[ipn4 - j4 - 5];
26191 z__[ipn4 - j4 - 5] = temp;
26192 temp = z__[j4];
26193 z__[j4] = z__[ipn4 - j4 - 4];
26194 z__[ipn4 - j4 - 4] = temp;
26195 /* L60: */
26196 }
26197 if (*n0 - *i0 <= 4) {
26198 z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
26199 z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
26200 }
26201 /* Computing MIN */
26202 r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1];
26203 dmin2 = df2cmin(r__1,r__2);
26204 /* Computing MIN */
26205 r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1]
26206 , r__1 = f2cmin(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3];
26207 z__[(*n0 << 2) + *pp - 1] = df2cmin(r__1,r__2);
26208 /* Computing MIN */
26209 r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 =
26210 f2cmin(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4];
26211 z__[(*n0 << 2) - *pp] = df2cmin(r__1,r__2);
26212 /* Computing MAX */
26213 r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = f2cmax(r__1,
26214 r__2), r__2 = z__[(*i0 << 2) + *pp + 1];
26215 *qmax = df2cmax(r__1,r__2);
26216 *dmin__ = 0.f;
26217 }
26218 }
26219
26220 /* L70:
26221
26222 Computing MIN */
26223 r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 =
26224 f2cmin(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp];
26225 if (*dmin__ < 0.f || safmin * *qmax < df2cmin(r__1,r__2)) {
26226
26227 /* Choose a shift. */
26228
26229 slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
26230 &dn2, &tau, &ttype);
26231
26232 /* Call dqds until DMIN > 0. */
26233
26234 L80:
26235
26236 slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
26237 &dn2, ieee);
26238
26239 *ndiv += *n0 - *i0 + 2;
26240 ++(*iter);
26241
26242 /* Check status. */
26243
26244 if (*dmin__ >= 0.f && dmin1 > 0.f) {
26245
26246 /* Success. */
26247
26248 goto L100;
26249
26250 } else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] <
26251 tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) {
26252
26253 /* Convergence hidden by negative DN. */
26254
26255 z__[(*n0 - 1 << 2) - *pp + 2] = 0.f;
26256 *dmin__ = 0.f;
26257 goto L100;
26258 } else if (*dmin__ < 0.f) {
26259
26260 /* TAU too big. Select new TAU and try again. */
26261
26262 ++(*nfail);
26263 if (ttype < -22) {
26264
26265 /* Failed twice. Play it safe. */
26266
26267 tau = 0.f;
26268 } else if (dmin1 > 0.f) {
26269
26270 /* Late failure. Gives excellent shift. */
26271
26272 tau = (tau + *dmin__) * (1.f - eps * 2.f);
26273 ttype += -11;
26274 } else {
26275
26276 /* Early failure. Divide by 4. */
26277
26278 tau *= .25f;
26279 ttype += -12;
26280 }
26281 goto L80;
26282 } else if (*dmin__ != *dmin__) {
26283
26284 /* NaN. */
26285
26286 tau = 0.f;
26287 goto L80;
26288 } else {
26289
26290 /* Possible underflow. Play it safe. */
26291
26292 goto L90;
26293 }
26294 }
26295
26296 /* Risk of underflow. */
26297
26298 L90:
26299 slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
26300 *ndiv += *n0 - *i0 + 2;
26301 ++(*iter);
26302 tau = 0.f;
26303
26304 L100:
26305 if (tau < *sigma) {
26306 *desig += tau;
26307 t = *sigma + *desig;
26308 *desig -= t - *sigma;
26309 } else {
26310 t = *sigma + tau;
26311 *desig = *sigma - (t - tau) + *desig;
26312 }
26313 *sigma = t;
26314
26315 return 0;
26316
26317 /* End of SLASQ3 */
26318
26319 } /* slasq3_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 26323 of file lapackblas.cpp. References df2cmax, df2cmin, integer, nn(), real, and sqrt(). Referenced by slasq3_(). 26326 {
26327 /* Initialized data */
26328
26329 static real g = 0.f;
26330
26331 /* System generated locals */
26332 integer i__1;
26333 real r__1, r__2;
26334
26335 /* Builtin functions */
26336 //double sqrt(doublereal);
26337
26338 /* Local variables */
26339 static real s, a2, b1, b2;
26340 static integer i4, nn, np;
26341 static real gam, gap1, gap2;
26342
26343
26344 /* -- LAPACK auxiliary routine (version 3.0) --
26345 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
26346 Courant Institute, Argonne National Lab, and Rice University
26347 October 31, 1999
26348
26349
26350 Purpose
26351 =======
26352
26353 SLASQ4 computes an approximation TAU to the smallest eigenvalue
26354 using values of d from the previous transform.
26355
26356 I0 (input) INTEGER
26357 First index.
26358
26359 N0 (input) INTEGER
26360 Last index.
26361
26362 Z (input) REAL array, dimension ( 4*N )
26363 Z holds the qd array.
26364
26365 PP (input) INTEGER
26366 PP=0 for ping, PP=1 for pong.
26367
26368 NOIN (input) INTEGER
26369 The value of N0 at start of EIGTEST.
26370
26371 DMIN (input) REAL
26372 Minimum value of d.
26373
26374 DMIN1 (input) REAL
26375 Minimum value of d, excluding D( N0 ).
26376
26377 DMIN2 (input) REAL
26378 Minimum value of d, excluding D( N0 ) and D( N0-1 ).
26379
26380 DN (input) REAL
26381 d(N)
26382
26383 DN1 (input) REAL
26384 d(N-1)
26385
26386 DN2 (input) REAL
26387 d(N-2)
26388
26389 TAU (output) REAL
26390 This is the shift.
26391
26392 TTYPE (output) INTEGER
26393 Shift type.
26394
26395 Further Details
26396 ===============
26397 CNST1 = 9/16
26398
26399 =====================================================================
26400
26401 Parameter adjustments */
26402 --z__;
26403
26404 /* Function Body
26405
26406 A negative DMIN forces the shift to take that absolute value
26407 TTYPE records the type of shift. */
26408
26409 if (*dmin__ <= 0.f) {
26410 *tau = -(*dmin__);
26411 *ttype = -1;
26412 return 0;
26413 }
26414
26415 nn = (*n0 << 2) + *pp;
26416 if (*n0in == *n0) {
26417
26418 /* No eigenvalues deflated. */
26419
26420 if (*dmin__ == *dn || *dmin__ == *dn1) {
26421
26422 b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
26423 b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
26424 a2 = z__[nn - 7] + z__[nn - 5];
26425
26426 /* Cases 2 and 3. */
26427
26428 if (*dmin__ == *dn && *dmin1 == *dn1) {
26429 gap2 = *dmin2 - a2 - *dmin2 * .25f;
26430 if (gap2 > 0.f && gap2 > b2) {
26431 gap1 = a2 - *dn - b2 / gap2 * b2;
26432 } else {
26433 gap1 = a2 - *dn - (b1 + b2);
26434 }
26435 if (gap1 > 0.f && gap1 > b1) {
26436 /* Computing MAX */
26437 r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f;
26438 s = df2cmax(r__1,r__2);
26439 *ttype = -2;
26440 } else {
26441 s = 0.f;
26442 if (*dn > b1) {
26443 s = *dn - b1;
26444 }
26445 if (a2 > b1 + b2) {
26446 /* Computing MIN */
26447 r__1 = s, r__2 = a2 - (b1 + b2);
26448 s = df2cmin(r__1,r__2);
26449 }
26450 /* Computing MAX */
26451 r__1 = s, r__2 = *dmin__ * .333f;
26452 s = df2cmax(r__1,r__2);
26453 *ttype = -3;
26454 }
26455 } else {
26456
26457 /* Case 4. */
26458
26459 *ttype = -4;
26460 s = *dmin__ * .25f;
26461 if (*dmin__ == *dn) {
26462 gam = *dn;
26463 a2 = 0.f;
26464 if (z__[nn - 5] > z__[nn - 7]) {
26465 return 0;
26466 }
26467 b2 = z__[nn - 5] / z__[nn - 7];
26468 np = nn - 9;
26469 } else {
26470 np = nn - (*pp << 1);
26471 b2 = z__[np - 2];
26472 gam = *dn1;
26473 if (z__[np - 4] > z__[np - 2]) {
26474 return 0;
26475 }
26476 a2 = z__[np - 4] / z__[np - 2];
26477 if (z__[nn - 9] > z__[nn - 11]) {
26478 return 0;
26479 }
26480 b2 = z__[nn - 9] / z__[nn - 11];
26481 np = nn - 13;
26482 }
26483
26484 /* Approximate contribution to norm squared from I < NN-1. */
26485
26486 a2 += b2;
26487 i__1 = (*i0 << 2) - 1 + *pp;
26488 for (i4 = np; i4 >= i__1; i4 += -4) {
26489 if (b2 == 0.f) {
26490 goto L20;
26491 }
26492 b1 = b2;
26493 if (z__[i4] > z__[i4 - 2]) {
26494 return 0;
26495 }
26496 b2 *= z__[i4] / z__[i4 - 2];
26497 a2 += b2;
26498 if (df2cmax(b2,b1) * 100.f < a2 || .563f < a2) {
26499 goto L20;
26500 }
26501 /* L10: */
26502 }
26503 L20:
26504 a2 *= 1.05f;
26505
26506 /* Rayleigh quotient residual bound. */
26507
26508 if (a2 < .563f) {
26509 s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
26510 }
26511 }
26512 } else if (*dmin__ == *dn2) {
26513
26514 /* Case 5. */
26515
26516 *ttype = -5;
26517 s = *dmin__ * .25f;
26518
26519 /* Compute contribution to norm squared from I > NN-2. */
26520
26521 np = nn - (*pp << 1);
26522 b1 = z__[np - 2];
26523 b2 = z__[np - 6];
26524 gam = *dn2;
26525 if (z__[np - 8] > b2 || z__[np - 4] > b1) {
26526 return 0;
26527 }
26528 a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f);
26529
26530 /* Approximate contribution to norm squared from I < NN-2. */
26531
26532 if (*n0 - *i0 > 2) {
26533 b2 = z__[nn - 13] / z__[nn - 15];
26534 a2 += b2;
26535 i__1 = (*i0 << 2) - 1 + *pp;
26536 for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
26537 if (b2 == 0.f) {
26538 goto L40;
26539 }
26540 b1 = b2;
26541 if (z__[i4] > z__[i4 - 2]) {
26542 return 0;
26543 }
26544 b2 *= z__[i4] / z__[i4 - 2];
26545 a2 += b2;
26546 if (df2cmax(b2,b1) * 100.f < a2 || .563f < a2) {
26547 goto L40;
26548 }
26549 /* L30: */
26550 }
26551 L40:
26552 a2 *= 1.05f;
26553 }
26554
26555 if (a2 < .563f) {
26556 s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
26557 }
26558 } else {
26559
26560 /* Case 6, no information to guide us. */
26561
26562 if (*ttype == -6) {
26563 g += (1.f - g) * .333f;
26564 } else if (*ttype == -18) {
26565 g = .083250000000000005f;
26566 } else {
26567 g = .25f;
26568 }
26569 s = g * *dmin__;
26570 *ttype = -6;
26571 }
26572
26573 } else if (*n0in == *n0 + 1) {
26574
26575 /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
26576
26577 if (*dmin1 == *dn1 && *dmin2 == *dn2) {
26578
26579 /* Cases 7 and 8. */
26580
26581 *ttype = -7;
26582 s = *dmin1 * .333f;
26583 if (z__[nn - 5] > z__[nn - 7]) {
26584 return 0;
26585 }
26586 b1 = z__[nn - 5] / z__[nn - 7];
26587 b2 = b1;
26588 if (b2 == 0.f) {
26589 goto L60;
26590 }
26591 i__1 = (*i0 << 2) - 1 + *pp;
26592 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
26593 a2 = b1;
26594 if (z__[i4] > z__[i4 - 2]) {
26595 return 0;
26596 }
26597 b1 *= z__[i4] / z__[i4 - 2];
26598 b2 += b1;
26599 if (df2cmax(b1,a2) * 100.f < b2) {
26600 goto L60;
26601 }
26602 /* L50: */
26603 }
26604 L60:
26605 b2 = sqrt(b2 * 1.05f);
26606 /* Computing 2nd power */
26607 r__1 = b2;
26608 a2 = *dmin1 / (r__1 * r__1 + 1.f);
26609 gap2 = *dmin2 * .5f - a2;
26610 if (gap2 > 0.f && gap2 > b2 * a2) {
26611 /* Computing MAX */
26612 r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
26613 s = df2cmax(r__1,r__2);
26614 } else {
26615 /* Computing MAX */
26616 r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
26617 s = df2cmax(r__1,r__2);
26618 *ttype = -8;
26619 }
26620 } else {
26621
26622 /* Case 9. */
26623
26624 s = *dmin1 * .25f;
26625 if (*dmin1 == *dn1) {
26626 s = *dmin1 * .5f;
26627 }
26628 *ttype = -9;
26629 }
26630
26631 } else if (*n0in == *n0 + 2) {
26632
26633 /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
26634
26635 Cases 10 and 11. */
26636
26637 if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) {
26638 *ttype = -10;
26639 s = *dmin2 * .333f;
26640 if (z__[nn - 5] > z__[nn - 7]) {
26641 return 0;
26642 }
26643 b1 = z__[nn - 5] / z__[nn - 7];
26644 b2 = b1;
26645 if (b2 == 0.f) {
26646 goto L80;
26647 }
26648 i__1 = (*i0 << 2) - 1 + *pp;
26649 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
26650 if (z__[i4] > z__[i4 - 2]) {
26651 return 0;
26652 }
26653 b1 *= z__[i4] / z__[i4 - 2];
26654 b2 += b1;
26655 if (b1 * 100.f < b2) {
26656 goto L80;
26657 }
26658 /* L70: */
26659 }
26660 L80:
26661 b2 = sqrt(b2 * 1.05f);
26662 /* Computing 2nd power */
26663 r__1 = b2;
26664 a2 = *dmin2 / (r__1 * r__1 + 1.f);
26665 gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
26666 nn - 9]) - a2;
26667 if (gap2 > 0.f && gap2 > b2 * a2) {
26668 /* Computing MAX */
26669 r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
26670 s = df2cmax(r__1,r__2);
26671 } else {
26672 /* Computing MAX */
26673 r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
26674 s = df2cmax(r__1,r__2);
26675 }
26676 } else {
26677 s = *dmin2 * .25f;
26678 *ttype = -11;
26679 }
26680 } else if (*n0in > *n0 + 2) {
26681
26682 /* Case 12, more than two eigenvalues deflated. No information. */
26683
26684 s = 0.f;
26685 *ttype = -12;
26686 }
26687
26688 *tau = s;
26689 return 0;
26690
26691 /* End of SLASQ4 */
26692
26693 } /* slasq4_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 26697 of file lapackblas.cpp. References df2cmin, integer, and real. Referenced by slasq3_(). 26700 {
26701 /* -- LAPACK auxiliary routine (version 3.0) --
26702 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
26703 Courant Institute, Argonne National Lab, and Rice University
26704 May 17, 2000
26705
26706
26707 Purpose
26708 =======
26709
26710 SLASQ5 computes one dqds transform in ping-pong form, one
26711 version for IEEE machines another for non IEEE machines.
26712
26713 Arguments
26714 =========
26715
26716 I0 (input) INTEGER
26717 First index.
26718
26719 N0 (input) INTEGER
26720 Last index.
26721
26722 Z (input) REAL array, dimension ( 4*N )
26723 Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
26724 an extra argument.
26725
26726 PP (input) INTEGER
26727 PP=0 for ping, PP=1 for pong.
26728
26729 TAU (input) REAL
26730 This is the shift.
26731
26732 DMIN (output) REAL
26733 Minimum value of d.
26734
26735 DMIN1 (output) REAL
26736 Minimum value of d, excluding D( N0 ).
26737
26738 DMIN2 (output) REAL
26739 Minimum value of d, excluding D( N0 ) and D( N0-1 ).
26740
26741 DN (output) REAL
26742 d(N0), the last value of d.
26743
26744 DNM1 (output) REAL
26745 d(N0-1).
26746
26747 DNM2 (output) REAL
26748 d(N0-2).
26749
26750 IEEE (input) LOGICAL
26751 Flag for IEEE or non IEEE arithmetic.
26752
26753 =====================================================================
26754
26755
26756 Parameter adjustments */
26757 /* System generated locals */
26758 integer i__1;
26759 real r__1, r__2;
26760 /* Local variables */
26761 static real emin, temp, d__;
26762 static integer j4, j4p2;
26763
26764 --z__;
26765
26766 /* Function Body */
26767 if (*n0 - *i0 - 1 <= 0) {
26768 return 0;
26769 }
26770
26771 j4 = (*i0 << 2) + *pp - 3;
26772 emin = z__[j4 + 4];
26773 d__ = z__[j4] - *tau;
26774 *dmin__ = d__;
26775 *dmin1 = -z__[j4];
26776
26777 if (*ieee) {
26778
26779 /* Code for IEEE arithmetic. */
26780
26781 if (*pp == 0) {
26782 i__1 = *n0 - 3 << 2;
26783 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26784 z__[j4 - 2] = d__ + z__[j4 - 1];
26785 temp = z__[j4 + 1] / z__[j4 - 2];
26786 d__ = d__ * temp - *tau;
26787 *dmin__ = df2cmin(*dmin__,d__);
26788 z__[j4] = z__[j4 - 1] * temp;
26789 /* Computing MIN */
26790 r__1 = z__[j4];
26791 emin = df2cmin(r__1,emin);
26792 /* L10: */
26793 }
26794 } else {
26795 i__1 = *n0 - 3 << 2;
26796 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26797 z__[j4 - 3] = d__ + z__[j4];
26798 temp = z__[j4 + 2] / z__[j4 - 3];
26799 d__ = d__ * temp - *tau;
26800 *dmin__ = df2cmin(*dmin__,d__);
26801 z__[j4 - 1] = z__[j4] * temp;
26802 /* Computing MIN */
26803 r__1 = z__[j4 - 1];
26804 emin = df2cmin(r__1,emin);
26805 /* L20: */
26806 }
26807 }
26808
26809 /* Unroll last two steps. */
26810
26811 *dnm2 = d__;
26812 *dmin2 = *dmin__;
26813 j4 = (*n0 - 2 << 2) - *pp;
26814 j4p2 = j4 + (*pp << 1) - 1;
26815 z__[j4 - 2] = *dnm2 + z__[j4p2];
26816 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
26817 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
26818 *dmin__ = df2cmin(*dmin__,*dnm1);
26819
26820 *dmin1 = *dmin__;
26821 j4 += 4;
26822 j4p2 = j4 + (*pp << 1) - 1;
26823 z__[j4 - 2] = *dnm1 + z__[j4p2];
26824 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
26825 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
26826 *dmin__ = df2cmin(*dmin__,*dn);
26827
26828 } else {
26829
26830 /* Code for non IEEE arithmetic. */
26831
26832 if (*pp == 0) {
26833 i__1 = *n0 - 3 << 2;
26834 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26835 z__[j4 - 2] = d__ + z__[j4 - 1];
26836 if (d__ < 0.f) {
26837 return 0;
26838 } else {
26839 z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
26840 d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
26841 }
26842 *dmin__ = df2cmin(*dmin__,d__);
26843 /* Computing MIN */
26844 r__1 = emin, r__2 = z__[j4];
26845 emin = df2cmin(r__1,r__2);
26846 /* L30: */
26847 }
26848 } else {
26849 i__1 = *n0 - 3 << 2;
26850 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26851 z__[j4 - 3] = d__ + z__[j4];
26852 if (d__ < 0.f) {
26853 return 0;
26854 } else {
26855 z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
26856 d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
26857 }
26858 *dmin__ = df2cmin(*dmin__,d__);
26859 /* Computing MIN */
26860 r__1 = emin, r__2 = z__[j4 - 1];
26861 emin = df2cmin(r__1,r__2);
26862 /* L40: */
26863 }
26864 }
26865
26866 /* Unroll last two steps. */
26867
26868 *dnm2 = d__;
26869 *dmin2 = *dmin__;
26870 j4 = (*n0 - 2 << 2) - *pp;
26871 j4p2 = j4 + (*pp << 1) - 1;
26872 z__[j4 - 2] = *dnm2 + z__[j4p2];
26873 if (*dnm2 < 0.f) {
26874 return 0;
26875 } else {
26876 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
26877 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
26878 }
26879 *dmin__ = df2cmin(*dmin__,*dnm1);
26880
26881 *dmin1 = *dmin__;
26882 j4 += 4;
26883 j4p2 = j4 + (*pp << 1) - 1;
26884 z__[j4 - 2] = *dnm1 + z__[j4p2];
26885 if (*dnm1 < 0.f) {
26886 return 0;
26887 } else {
26888 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
26889 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
26890 }
26891 *dmin__ = df2cmin(*dmin__,*dn);
26892
26893 }
26894
26895 z__[j4 + 2] = *dn;
26896 z__[(*n0 << 2) - *pp] = emin;
26897 return 0;
26898
26899 /* End of SLASQ5 */
26900
26901 } /* slasq5_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 26905 of file lapackblas.cpp. References df2cmin, doublereal, integer, real, and slamch_(). Referenced by slasq3_(). 26908 {
26909 /* System generated locals */
26910 integer i__1;
26911 real r__1, r__2;
26912
26913 /* Local variables */
26914 static real emin, temp, d__;
26915 static integer j4;
26916 extern doublereal slamch_(const char *);
26917 static real safmin;
26918 static integer j4p2;
26919
26920
26921 /* -- LAPACK auxiliary routine (version 3.0) --
26922 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
26923 Courant Institute, Argonne National Lab, and Rice University
26924 October 31, 1999
26925
26926
26927 Purpose
26928 =======
26929
26930 SLASQ6 computes one dqd (shift equal to zero) transform in
26931 ping-pong form, with protection against underflow and overflow.
26932
26933 Arguments
26934 =========
26935
26936 I0 (input) INTEGER
26937 First index.
26938
26939 N0 (input) INTEGER
26940 Last index.
26941
26942 Z (input) REAL array, dimension ( 4*N )
26943 Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
26944 an extra argument.
26945
26946 PP (input) INTEGER
26947 PP=0 for ping, PP=1 for pong.
26948
26949 DMIN (output) REAL
26950 Minimum value of d.
26951
26952 DMIN1 (output) REAL
26953 Minimum value of d, excluding D( N0 ).
26954
26955 DMIN2 (output) REAL
26956 Minimum value of d, excluding D( N0 ) and D( N0-1 ).
26957
26958 DN (output) REAL
26959 d(N0), the last value of d.
26960
26961 DNM1 (output) REAL
26962 d(N0-1).
26963
26964 DNM2 (output) REAL
26965 d(N0-2).
26966
26967 =====================================================================
26968
26969
26970 Parameter adjustments */
26971 --z__;
26972
26973 /* Function Body */
26974 if (*n0 - *i0 - 1 <= 0) {
26975 return 0;
26976 }
26977
26978 safmin = slamch_("Safe minimum");
26979 j4 = (*i0 << 2) + *pp - 3;
26980 emin = z__[j4 + 4];
26981 d__ = z__[j4];
26982 *dmin__ = d__;
26983
26984 if (*pp == 0) {
26985 i__1 = *n0 - 3 << 2;
26986 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
26987 z__[j4 - 2] = d__ + z__[j4 - 1];
26988 if (z__[j4 - 2] == 0.f) {
26989 z__[j4] = 0.f;
26990 d__ = z__[j4 + 1];
26991 *dmin__ = d__;
26992 emin = 0.f;
26993 } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
26994 - 2] < z__[j4 + 1]) {
26995 temp = z__[j4 + 1] / z__[j4 - 2];
26996 z__[j4] = z__[j4 - 1] * temp;
26997 d__ *= temp;
26998 } else {
26999 z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
27000 d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
27001 }
27002 *dmin__ = df2cmin(*dmin__,d__);
27003 /* Computing MIN */
27004 r__1 = emin, r__2 = z__[j4];
27005 emin = df2cmin(r__1,r__2);
27006 /* L10: */
27007 }
27008 } else {
27009 i__1 = *n0 - 3 << 2;
27010 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
27011 z__[j4 - 3] = d__ + z__[j4];
27012 if (z__[j4 - 3] == 0.f) {
27013 z__[j4 - 1] = 0.f;
27014 d__ = z__[j4 + 2];
27015 *dmin__ = d__;
27016 emin = 0.f;
27017 } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
27018 - 3] < z__[j4 + 2]) {
27019 temp = z__[j4 + 2] / z__[j4 - 3];
27020 z__[j4 - 1] = z__[j4] * temp;
27021 d__ *= temp;
27022 } else {
27023 z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
27024 d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
27025 }
27026 *dmin__ = df2cmin(*dmin__,d__);
27027 /* Computing MIN */
27028 r__1 = emin, r__2 = z__[j4 - 1];
27029 emin = df2cmin(r__1,r__2);
27030 /* L20: */
27031 }
27032 }
27033
27034 /* Unroll last two steps. */
27035
27036 *dnm2 = d__;
27037 *dmin2 = *dmin__;
27038 j4 = (*n0 - 2 << 2) - *pp;
27039 j4p2 = j4 + (*pp << 1) - 1;
27040 z__[j4 - 2] = *dnm2 + z__[j4p2];
27041 if (z__[j4 - 2] == 0.f) {
27042 z__[j4] = 0.f;
27043 *dnm1 = z__[j4p2 + 2];
27044 *dmin__ = *dnm1;
27045 emin = 0.f;
27046 } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
27047 z__[j4p2 + 2]) {
27048 temp = z__[j4p2 + 2] / z__[j4 - 2];
27049 z__[j4] = z__[j4p2] * temp;
27050 *dnm1 = *dnm2 * temp;
27051 } else {
27052 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
27053 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
27054 }
27055 *dmin__ = df2cmin(*dmin__,*dnm1);
27056
27057 *dmin1 = *dmin__;
27058 j4 += 4;
27059 j4p2 = j4 + (*pp << 1) - 1;
27060 z__[j4 - 2] = *dnm1 + z__[j4p2];
27061 if (z__[j4 - 2] == 0.f) {
27062 z__[j4] = 0.f;
27063 *dn = z__[j4p2 + 2];
27064 *dmin__ = *dn;
27065 emin = 0.f;
27066 } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
27067 z__[j4p2 + 2]) {
27068 temp = z__[j4p2 + 2] / z__[j4 - 2];
27069 z__[j4] = z__[j4p2] * temp;
27070 *dn = *dnm1 * temp;
27071 } else {
27072 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
27073 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
27074 }
27075 *dmin__ = df2cmin(*dmin__,*dn);
27076
27077 z__[j4 + 2] = *dn;
27078 z__[(*n0 << 2) - *pp] = emin;
27079 return 0;
27080
27081 /* End of SLASQ6 */
27082
27083 } /* slasq6_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 5499 of file lapackblas.cpp. References a_ref, f2cmax, integer, logical, lsame_(), real, and xerbla_(). Referenced by sbdsqr_(), and ssteqr_(). 05501 {
05502 /* -- LAPACK auxiliary routine (version 3.0) --
05503 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
05504 Courant Institute, Argonne National Lab, and Rice University
05505 October 31, 1992
05506
05507
05508 Purpose
05509 =======
05510
05511 SLASR performs the transformation
05512
05513 A := P*A, when SIDE = 'L' or 'l' ( Left-hand side )
05514
05515 A := A*P', when SIDE = 'R' or 'r' ( Right-hand side )
05516
05517 where A is an m by n real matrix and P is an orthogonal matrix,
05518 consisting of a sequence of plane rotations determined by the
05519 parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
05520 and z = n when SIDE = 'R' or 'r' ):
05521
05522 When DIRECT = 'F' or 'f' ( Forward sequence ) then
05523
05524 P = P( z - 1 )*...*P( 2 )*P( 1 ),
05525
05526 and when DIRECT = 'B' or 'b' ( Backward sequence ) then
05527
05528 P = P( 1 )*P( 2 )*...*P( z - 1 ),
05529
05530 where P( k ) is a plane rotation matrix for the following planes:
05531
05532 when PIVOT = 'V' or 'v' ( Variable pivot ),
05533 the plane ( k, k + 1 )
05534
05535 when PIVOT = 'T' or 't' ( Top pivot ),
05536 the plane ( 1, k + 1 )
05537
05538 when PIVOT = 'B' or 'b' ( Bottom pivot ),
05539 the plane ( k, z )
05540
05541 c( k ) and s( k ) must contain the cosine and sine that define the
05542 matrix P( k ). The two by two plane rotation part of the matrix
05543 P( k ), R( k ), is assumed to be of the form
05544
05545 R( k ) = ( c( k ) s( k ) ).
05546 ( -s( k ) c( k ) )
05547
05548 This version vectorises across rows of the array A when SIDE = 'L'.
05549
05550 Arguments
05551 =========
05552
05553 SIDE (input) CHARACTER*1
05554 Specifies whether the plane rotation matrix P is applied to
05555 A on the left or the right.
05556 = 'L': Left, compute A := P*A
05557 = 'R': Right, compute A:= A*P'
05558
05559 DIRECT (input) CHARACTER*1
05560 Specifies whether P is a forward or backward sequence of
05561 plane rotations.
05562 = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
05563 = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
05564
05565 PIVOT (input) CHARACTER*1
05566 Specifies the plane for which P(k) is a plane rotation
05567 matrix.
05568 = 'V': Variable pivot, the plane (k,k+1)
05569 = 'T': Top pivot, the plane (1,k+1)
05570 = 'B': Bottom pivot, the plane (k,z)
05571
05572 M (input) INTEGER
05573 The number of rows of the matrix A. If m <= 1, an immediate
05574 return is effected.
05575
05576 N (input) INTEGER
05577 The number of columns of the matrix A. If n <= 1, an
05578 immediate return is effected.
05579
05580 C, S (input) REAL arrays, dimension
05581 (M-1) if SIDE = 'L'
05582 (N-1) if SIDE = 'R'
05583 c(k) and s(k) contain the cosine and sine that define the
05584 matrix P(k). The two by two plane rotation part of the
05585 matrix P(k), R(k), is assumed to be of the form
05586 R( k ) = ( c( k ) s( k ) ).
05587 ( -s( k ) c( k ) )
05588
05589 A (input/output) REAL array, dimension (LDA,N)
05590 The m by n matrix A. On exit, A is overwritten by P*A if
05591 SIDE = 'R' or by A*P' if SIDE = 'L'.
05592
05593 LDA (input) INTEGER
05594 The leading dimension of the array A. LDA >= f2cmax(1,M).
05595
05596 =====================================================================
05597
05598
05599 Test the input parameters
05600
05601 Parameter adjustments */
05602 /* System generated locals */
05603 integer a_dim1, a_offset, i__1, i__2;
05604 /* Local variables */
05605 static integer info;
05606 static real temp;
05607 static integer i__, j;
05608 extern logical lsame_(const char *, const char *);
05609 static real ctemp, stemp;
05610 extern /* Subroutine */ int xerbla_(const char *, integer *);
05611 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
05612
05613 --c__;
05614 --s;
05615 a_dim1 = *lda;
05616 a_offset = 1 + a_dim1 * 1;
05617 a -= a_offset;
05618
05619 /* Function Body */
05620 info = 0;
05621 if (! (lsame_(side, "L") || lsame_(side, "R"))) {
05622 info = 1;
05623 } else if (! (lsame_(pivot, "V") || lsame_(pivot,
05624 "T") || lsame_(pivot, "B"))) {
05625 info = 2;
05626 } else if (! (lsame_(direct, "F") || lsame_(direct,
05627 "B"))) {
05628 info = 3;
05629 } else if (*m < 0) {
05630 info = 4;
05631 } else if (*n < 0) {
05632 info = 5;
05633 } else if (*lda < f2cmax(1,*m)) {
05634 info = 9;
05635 }
05636 if (info != 0) {
05637 xerbla_("SLASR ", &info);
05638 return 0;
05639 }
05640
05641 /* Quick return if possible */
05642
05643 if (*m == 0 || *n == 0) {
05644 return 0;
05645 }
05646 if (lsame_(side, "L")) {
05647
05648 /* Form P * A */
05649
05650 if (lsame_(pivot, "V")) {
05651 if (lsame_(direct, "F")) {
05652 i__1 = *m - 1;
05653 for (j = 1; j <= i__1; ++j) {
05654 ctemp = c__[j];
05655 stemp = s[j];
05656 if (ctemp != 1.f || stemp != 0.f) {
05657 i__2 = *n;
05658 for (i__ = 1; i__ <= i__2; ++i__) {
05659 temp = a_ref(j + 1, i__);
05660 a_ref(j + 1, i__) = ctemp * temp - stemp * a_ref(
05661 j, i__);
05662 a_ref(j, i__) = stemp * temp + ctemp * a_ref(j,
05663 i__);
05664 /* L10: */
05665 }
05666 }
05667 /* L20: */
05668 }
05669 } else if (lsame_(direct, "B")) {
05670 for (j = *m - 1; j >= 1; --j) {
05671 ctemp = c__[j];
05672 stemp = s[j];
05673 if (ctemp != 1.f || stemp != 0.f) {
05674 i__1 = *n;
05675 for (i__ = 1; i__ <= i__1; ++i__) {
05676 temp = a_ref(j + 1, i__);
05677 a_ref(j + 1, i__) = ctemp * temp - stemp * a_ref(
05678 j, i__);
05679 a_ref(j, i__) = stemp * temp + ctemp * a_ref(j,
05680 i__);
05681 /* L30: */
05682 }
05683 }
05684 /* L40: */
05685 }
05686 }
05687 } else if (lsame_(pivot, "T")) {
05688 if (lsame_(direct, "F")) {
05689 i__1 = *m;
05690 for (j = 2; j <= i__1; ++j) {
05691 ctemp = c__[j - 1];
05692 stemp = s[j - 1];
05693 if (ctemp != 1.f || stemp != 0.f) {
05694 i__2 = *n;
05695 for (i__ = 1; i__ <= i__2; ++i__) {
05696 temp = a_ref(j, i__);
05697 a_ref(j, i__) = ctemp * temp - stemp * a_ref(1,
05698 i__);
05699 a_ref(1, i__) = stemp * temp + ctemp * a_ref(1,
05700 i__);
05701 /* L50: */
05702 }
05703 }
05704 /* L60: */
05705 }
05706 } else if (lsame_(direct, "B")) {
05707 for (j = *m; j >= 2; --j) {
05708 ctemp = c__[j - 1];
05709 stemp = s[j - 1];
05710 if (ctemp != 1.f || stemp != 0.f) {
05711 i__1 = *n;
05712 for (i__ = 1; i__ <= i__1; ++i__) {
05713 temp = a_ref(j, i__);
05714 a_ref(j, i__) = ctemp * temp - stemp * a_ref(1,
05715 i__);
05716 a_ref(1, i__) = stemp * temp + ctemp * a_ref(1,
05717 i__);
05718 /* L70: */
05719 }
05720 }
05721 /* L80: */
05722 }
05723 }
05724 } else if (lsame_(pivot, "B")) {
05725 if (lsame_(direct, "F")) {
05726 i__1 = *m - 1;
05727 for (j = 1; j <= i__1; ++j) {
05728 ctemp = c__[j];
05729 stemp = s[j];
05730 if (ctemp != 1.f || stemp != 0.f) {
05731 i__2 = *n;
05732 for (i__ = 1; i__ <= i__2; ++i__) {
05733 temp = a_ref(j, i__);
05734 a_ref(j, i__) = stemp * a_ref(*m, i__) + ctemp *
05735 temp;
05736 a_ref(*m, i__) = ctemp * a_ref(*m, i__) - stemp *
05737 temp;
05738 /* L90: */
05739 }
05740 }
05741 /* L100: */
05742 }
05743 } else if (lsame_(direct, "B")) {
05744 for (j = *m - 1; j >= 1; --j) {
05745 ctemp = c__[j];
05746 stemp = s[j];
05747 if (ctemp != 1.f || stemp != 0.f) {
05748 i__1 = *n;
05749 for (i__ = 1; i__ <= i__1; ++i__) {
05750 temp = a_ref(j, i__);
05751 a_ref(j, i__) = stemp * a_ref(*m, i__) + ctemp *
05752 temp;
05753 a_ref(*m, i__) = ctemp * a_ref(*m, i__) - stemp *
05754 temp;
05755 /* L110: */
05756 }
05757 }
05758 /* L120: */
05759 }
05760 }
05761 }
05762 } else if (lsame_(side, "R")) {
05763
05764 /* Form A * P' */
05765
05766 if (lsame_(pivot, "V")) {
05767 if (lsame_(direct, "F")) {
05768 i__1 = *n - 1;
05769 for (j = 1; j <= i__1; ++j) {
05770 ctemp = c__[j];
05771 stemp = s[j];
05772 if (ctemp != 1.f || stemp != 0.f) {
05773 i__2 = *m;
05774 for (i__ = 1; i__ <= i__2; ++i__) {
05775 temp = a_ref(i__, j + 1);
05776 a_ref(i__, j + 1) = ctemp * temp - stemp * a_ref(
05777 i__, j);
05778 a_ref(i__, j) = stemp * temp + ctemp * a_ref(i__,
05779 j);
05780 /* L130: */
05781 }
05782 }
05783 /* L140: */
05784 }
05785 } else if (lsame_(direct, "B")) {
05786 for (j = *n - 1; j >= 1; --j) {
05787 ctemp = c__[j];
05788 stemp = s[j];
05789 if (ctemp != 1.f || stemp != 0.f) {
05790 i__1 = *m;
05791 for (i__ = 1; i__ <= i__1; ++i__) {
05792 temp = a_ref(i__, j + 1);
05793 a_ref(i__, j + 1) = ctemp * temp - stemp * a_ref(
05794 i__, j);
05795 a_ref(i__, j) = stemp * temp + ctemp * a_ref(i__,
05796 j);
05797 /* L150: */
05798 }
05799 }
05800 /* L160: */
05801 }
05802 }
05803 } else if (lsame_(pivot, "T")) {
05804 if (lsame_(direct, "F")) {
05805 i__1 = *n;
05806 for (j = 2; j <= i__1; ++j) {
05807 ctemp = c__[j - 1];
05808 stemp = s[j - 1];
05809 if (ctemp != 1.f || stemp != 0.f) {
05810 i__2 = *m;
05811 for (i__ = 1; i__ <= i__2; ++i__) {
05812 temp = a_ref(i__, j);
05813 a_ref(i__, j) = ctemp * temp - stemp * a_ref(i__,
05814 1);
05815 a_ref(i__, 1) = stemp * temp + ctemp * a_ref(i__,
05816 1);
05817 /* L170: */
05818 }
05819 }
05820 /* L180: */
05821 }
05822 } else if (lsame_(direct, "B")) {
05823 for (j = *n; j >= 2; --j) {
05824 ctemp = c__[j - 1];
05825 stemp = s[j - 1];
05826 if (ctemp != 1.f || stemp != 0.f) {
05827 i__1 = *m;
05828 for (i__ = 1; i__ <= i__1; ++i__) {
05829 temp = a_ref(i__, j);
05830 a_ref(i__, j) = ctemp * temp - stemp * a_ref(i__,
05831 1);
05832 a_ref(i__, 1) = stemp * temp + ctemp * a_ref(i__,
05833 1);
05834 /* L190: */
05835 }
05836 }
05837 /* L200: */
05838 }
05839 }
05840 } else if (lsame_(pivot, "B")) {
05841 if (lsame_(direct, "F")) {
05842 i__1 = *n - 1;
05843 for (j = 1; j <= i__1; ++j) {
05844 ctemp = c__[j];
05845 stemp = s[j];
05846 if (ctemp != 1.f || stemp != 0.f) {
05847 i__2 = *m;
05848 for (i__ = 1; i__ <= i__2; ++i__) {
05849 temp = a_ref(i__, j);
05850 a_ref(i__, j) = stemp * a_ref(i__, *n) + ctemp *
05851 temp;
05852 a_ref(i__, *n) = ctemp * a_ref(i__, *n) - stemp *
05853 temp;
05854 /* L210: */
05855 }
05856 }
05857 /* L220: */
05858 }
05859 } else if (lsame_(direct, "B")) {
05860 for (j = *n - 1; j >= 1; --j) {
05861 ctemp = c__[j];
05862 stemp = s[j];
05863 if (ctemp != 1.f || stemp != 0.f) {
05864 i__1 = *m;
05865 for (i__ = 1; i__ <= i__1; ++i__) {
05866 temp = a_ref(i__, j);
05867 a_ref(i__, j) = stemp * a_ref(i__, *n) + ctemp *
05868 temp;
05869 a_ref(i__, *n) = ctemp * a_ref(i__, *n) - stemp *
05870 temp;
05871 /* L230: */
05872 }
05873 }
05874 /* L240: */
05875 }
05876 }
05877 }
05878 }
05879
05880 return 0;
05881
05882 /* End of SLASR */
05883
05884 } /* slasr_ */
|
|
||||||||||||||||||||
|
Definition at line 5892 of file lapackblas.cpp. References integer, logical, lsame_(), real, stack_ref, and xerbla_(). Referenced by slasq1_(), slasq2_(), sstedc_(), ssteqr_(), and ssterf_(). 05893 {
05894 /* -- LAPACK routine (version 3.0) --
05895 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
05896 Courant Institute, Argonne National Lab, and Rice University
05897 September 30, 1994
05898
05899
05900 Purpose
05901 =======
05902
05903 Sort the numbers in D in increasing order (if ID = 'I') or
05904 in decreasing order (if ID = 'D' ).
05905
05906 Use Quick Sort, reverting to Insertion sort on arrays of
05907 size <= 20. Dimension of STACK limits N to about 2**32.
05908
05909 Arguments
05910 =========
05911
05912 ID (input) CHARACTER*1
05913 = 'I': sort D in increasing order;
05914 = 'D': sort D in decreasing order.
05915
05916 N (input) INTEGER
05917 The length of the array D.
05918
05919 D (input/output) REAL array, dimension (N)
05920 On entry, the array to be sorted.
05921 On exit, D has been sorted into increasing order
05922 (D(1) <= ... <= D(N) ) or into decreasing order
05923 (D(1) >= ... >= D(N) ), depending on ID.
05924
05925 INFO (output) INTEGER
05926 = 0: successful exit
05927 < 0: if INFO = -i, the i-th argument had an illegal value
05928
05929 =====================================================================
05930
05931
05932 Test the input paramters.
05933
05934 Parameter adjustments */
05935 /* System generated locals */
05936 integer i__1, i__2;
05937 /* Local variables */
05938 static integer endd, i__, j;
05939 extern logical lsame_(const char *, const char *);
05940 static integer stack[64] /* was [2][32] */;
05941 static real dmnmx, d1, d2, d3;
05942 static integer start;
05943 extern /* Subroutine */ int xerbla_(const char *, integer *);
05944 static integer stkpnt, dir;
05945 static real tmp;
05946 #define stack_ref(a_1,a_2) stack[(a_2)*2 + a_1 - 3]
05947
05948 --d__;
05949
05950 /* Function Body */
05951 *info = 0;
05952 dir = -1;
05953 if (lsame_(id, "D")) {
05954 dir = 0;
05955 } else if (lsame_(id, "I")) {
05956 dir = 1;
05957 }
05958 if (dir == -1) {
05959 *info = -1;
05960 } else if (*n < 0) {
05961 *info = -2;
05962 }
05963 if (*info != 0) {
05964 i__1 = -(*info);
05965 xerbla_("SLASRT", &i__1);
05966 return 0;
05967 }
05968
05969 /* Quick return if possible */
05970
05971 if (*n <= 1) {
05972 return 0;
05973 }
05974
05975 stkpnt = 1;
05976 stack_ref(1, 1) = 1;
05977 stack_ref(2, 1) = *n;
05978 L10:
05979 start = stack_ref(1, stkpnt);
05980 endd = stack_ref(2, stkpnt);
05981 --stkpnt;
05982 if (endd - start <= 20 && endd - start > 0) {
05983
05984 /* Do Insertion sort on D( START:ENDD ) */
05985
05986 if (dir == 0) {
05987
05988 /* Sort into decreasing order */
05989
05990 i__1 = endd;
05991 for (i__ = start + 1; i__ <= i__1; ++i__) {
05992 i__2 = start + 1;
05993 for (j = i__; j >= i__2; --j) {
05994 if (d__[j] > d__[j - 1]) {
05995 dmnmx = d__[j];
05996 d__[j] = d__[j - 1];
05997 d__[j - 1] = dmnmx;
05998 } else {
05999 goto L30;
06000 }
06001 /* L20: */
06002 }
06003 L30:
06004 ;
06005 }
06006
06007 } else {
06008
06009 /* Sort into increasing order */
06010
06011 i__1 = endd;
06012 for (i__ = start + 1; i__ <= i__1; ++i__) {
06013 i__2 = start + 1;
06014 for (j = i__; j >= i__2; --j) {
06015 if (d__[j] < d__[j - 1]) {
06016 dmnmx = d__[j];
06017 d__[j] = d__[j - 1];
06018 d__[j - 1] = dmnmx;
06019 } else {
06020 goto L50;
06021 }
06022 /* L40: */
06023 }
06024 L50:
06025 ;
06026 }
06027
06028 }
06029
06030 } else if (endd - start > 20) {
06031
06032 /* Partition D( START:ENDD ) and stack parts, largest one first
06033
06034 Choose partition entry as median of 3 */
06035
06036 d1 = d__[start];
06037 d2 = d__[endd];
06038 i__ = (start + endd) / 2;
06039 d3 = d__[i__];
06040 if (d1 < d2) {
06041 if (d3 < d1) {
06042 dmnmx = d1;
06043 } else if (d3 < d2) {
06044 dmnmx = d3;
06045 } else {
06046 dmnmx = d2;
06047 }
06048 } else {
06049 if (d3 < d2) {
06050 dmnmx = d2;
06051 } else if (d3 < d1) {
06052 dmnmx = d3;
06053 } else {
06054 dmnmx = d1;
06055 }
06056 }
06057
06058 if (dir == 0) {
06059
06060 /* Sort into decreasing order */
06061
06062 i__ = start - 1;
06063 j = endd + 1;
06064 L60:
06065 L70:
06066 --j;
06067 if (d__[j] < dmnmx) {
06068 goto L70;
06069 }
06070 L80:
06071 ++i__;
06072 if (d__[i__] > dmnmx) {
06073 goto L80;
06074 }
06075 if (i__ < j) {
06076 tmp = d__[i__];
06077 d__[i__] = d__[j];
06078 d__[j] = tmp;
06079 goto L60;
06080 }
06081 if (j - start > endd - j - 1) {
06082 ++stkpnt;
06083 stack_ref(1, stkpnt) = start;
06084 stack_ref(2, stkpnt) = j;
06085 ++stkpnt;
06086 stack_ref(1, stkpnt) = j + 1;
06087 stack_ref(2, stkpnt) = endd;
06088 } else {
06089 ++stkpnt;
06090 stack_ref(1, stkpnt) = j + 1;
06091 stack_ref(2, stkpnt) = endd;
06092 ++stkpnt;
06093 stack_ref(1, stkpnt) = start;
06094 stack_ref(2, stkpnt) = j;
06095 }
06096 } else {
06097
06098 /* Sort into increasing order */
06099
06100 i__ = start - 1;
06101 j = endd + 1;
06102 L90:
06103 L100:
06104 --j;
06105 if (d__[j] > dmnmx) {
06106 goto L100;
06107 }
06108 L110:
06109 ++i__;
06110 if (d__[i__] < dmnmx) {
06111 goto L110;
06112 }
06113 if (i__ < j) {
06114 tmp = d__[i__];
06115 d__[i__] = d__[j];
06116 d__[j] = tmp;
06117 goto L90;
06118 }
06119 if (j - start > endd - j - 1) {
06120 ++stkpnt;
06121 stack_ref(1, stkpnt) = start;
06122 stack_ref(2, stkpnt) = j;
06123 ++stkpnt;
06124 stack_ref(1, stkpnt) = j + 1;
06125 stack_ref(2, stkpnt) = endd;
06126 } else {
06127 ++stkpnt;
06128 stack_ref(1, stkpnt) = j + 1;
06129 stack_ref(2, stkpnt) = endd;
06130 ++stkpnt;
06131 stack_ref(1, stkpnt) = start;
06132 stack_ref(2, stkpnt) = j;
06133 }
06134 }
06135 }
06136 if (stkpnt > 0) {
06137 goto L10;
06138 }
06139 return 0;
06140
06141 /* End of SLASRT */
06142
06143 } /* slasrt_ */
|
|
||||||||||||||||||||||||
|
Definition at line 6151 of file lapackblas.cpp. References dabs, integer, real, and x. Referenced by slange_(), slanst_(), and slansy_(). 06153 {
06154 /* -- LAPACK auxiliary routine (version 3.0) --
06155 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
06156 Courant Institute, Argonne National Lab, and Rice University
06157 June 30, 1999
06158
06159
06160 Purpose
06161 =======
06162
06163 SLASSQ returns the values scl and smsq such that
06164
06165 ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
06166
06167 where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
06168 assumed to be non-negative and scl returns the value
06169
06170 scl = f2cmax( scale, abs( x( i ) ) ).
06171
06172 scale and sumsq must be supplied in SCALE and SUMSQ and
06173 scl and smsq are overwritten on SCALE and SUMSQ respectively.
06174
06175 The routine makes only one pass through the vector x.
06176
06177 Arguments
06178 =========
06179
06180 N (input) INTEGER
06181 The number of elements to be used from the vector X.
06182
06183 X (input) REAL array, dimension (N)
06184 The vector for which a scaled sum of squares is computed.
06185 x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
06186
06187 INCX (input) INTEGER
06188 The increment between successive values of the vector X.
06189 INCX > 0.
06190
06191 SCALE (input/output) REAL
06192 On entry, the value scale in the equation above.
06193 On exit, SCALE is overwritten with scl , the scaling factor
06194 for the sum of squares.
06195
06196 SUMSQ (input/output) REAL
06197 On entry, the value sumsq in the equation above.
06198 On exit, SUMSQ is overwritten with smsq , the basic sum of
06199 squares from which scl has been factored out.
06200
06201 =====================================================================
06202
06203
06204 Parameter adjustments */
06205 /* System generated locals */
06206 integer i__1, i__2;
06207 real r__1;
06208 /* Local variables */
06209 static real absxi;
06210 static integer ix;
06211
06212 --x;
06213
06214 /* Function Body */
06215 if (*n > 0) {
06216 i__1 = (*n - 1) * *incx + 1;
06217 i__2 = *incx;
06218 for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
06219 if (x[ix] != 0.f) {
06220 absxi = (r__1 = x[ix], dabs(r__1));
06221 if (*scale < absxi) {
06222 /* Computing 2nd power */
06223 r__1 = *scale / absxi;
06224 *sumsq = *sumsq * (r__1 * r__1) + 1;
06225 *scale = absxi;
06226 } else {
06227 /* Computing 2nd power */
06228 r__1 = absxi / *scale;
06229 *sumsq += r__1 * r__1;
06230 }
06231 }
06232 /* L10: */
06233 }
06234 }
06235 return 0;
06236
06237 /* End of SLASSQ */
06238
06239 } /* slassq_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 27087 of file lapackblas.cpp. References c_b3, dabs, doublereal, integer, logical, r_sign(), real, slamch_(), sqrt(), and t. Referenced by sbdsqr_(). 27089 {
27090 /* -- LAPACK auxiliary routine (version 3.0) --
27091 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
27092 Courant Institute, Argonne National Lab, and Rice University
27093 October 31, 1992
27094
27095
27096 Purpose
27097 =======
27098
27099 SLASV2 computes the singular value decomposition of a 2-by-2
27100 triangular matrix
27101 [ F G ]
27102 [ 0 H ].
27103 On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
27104 smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
27105 right singular vectors for abs(SSMAX), giving the decomposition
27106
27107 [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ]
27108 [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ].
27109
27110 Arguments
27111 =========
27112
27113 F (input) REAL
27114 The (1,1) element of the 2-by-2 matrix.
27115
27116 G (input) REAL
27117 The (1,2) element of the 2-by-2 matrix.
27118
27119 H (input) REAL
27120 The (2,2) element of the 2-by-2 matrix.
27121
27122 SSMIN (output) REAL
27123 abs(SSMIN) is the smaller singular value.
27124
27125 SSMAX (output) REAL
27126 abs(SSMAX) is the larger singular value.
27127
27128 SNL (output) REAL
27129 CSL (output) REAL
27130 The vector (CSL, SNL) is a unit left singular vector for the
27131 singular value abs(SSMAX).
27132
27133 SNR (output) REAL
27134 CSR (output) REAL
27135 The vector (CSR, SNR) is a unit right singular vector for the
27136 singular value abs(SSMAX).
27137
27138 Further Details
27139 ===============
27140
27141 Any input parameter may be aliased with any output parameter.
27142
27143 Barring over/underflow and assuming a guard digit in subtraction, all
27144 output quantities are correct to within a few units in the last
27145 place (ulps).
27146
27147 In IEEE arithmetic, the code works correctly if one matrix element is
27148 infinite.
27149
27150 Overflow will not occur unless the largest singular value itself
27151 overflows or is within a few ulps of overflow. (On machines with
27152 partial overflow, like the Cray, overflow may occur if the largest
27153 singular value is within a factor of 2 of overflow.)
27154
27155 Underflow is harmless if underflow is gradual. Otherwise, results
27156 may correspond to a matrix modified by perturbations of size near
27157 the underflow threshold.
27158
27159 ===================================================================== */
27160 /* Table of constant values */
27161 static real c_b3 = 2.f;
27162 static real c_b4 = 1.f;
27163
27164 /* System generated locals */
27165 real r__1;
27166 /* Builtin functions */
27167 //double sqrt(doublereal), r_sign(real *, real *);
27168 /* Local variables */
27169 static integer pmax;
27170 static real temp;
27171 static logical swap;
27172 static real a, d__, l, m, r__, s, t, tsign, fa, ga, ha, ft, gt, ht, mm;
27173 static logical gasmal;
27174 extern doublereal slamch_(const char *);
27175 static real tt, clt, crt, slt, srt;
27176
27177
27178
27179
27180 ft = *f;
27181 fa = dabs(ft);
27182 ht = *h__;
27183 ha = dabs(*h__);
27184
27185 /* PMAX points to the maximum absolute element of matrix
27186 PMAX = 1 if F largest in absolute values
27187 PMAX = 2 if G largest in absolute values
27188 PMAX = 3 if H largest in absolute values */
27189
27190 pmax = 1;
27191 swap = ha > fa;
27192 if (swap) {
27193 pmax = 3;
27194 temp = ft;
27195 ft = ht;
27196 ht = temp;
27197 temp = fa;
27198 fa = ha;
27199 ha = temp;
27200
27201 /* Now FA .ge. HA */
27202
27203 }
27204 gt = *g;
27205 ga = dabs(gt);
27206 if (ga == 0.f) {
27207
27208 /* Diagonal matrix */
27209
27210 *ssmin = ha;
27211 *ssmax = fa;
27212 clt = 1.f;
27213 crt = 1.f;
27214 slt = 0.f;
27215 srt = 0.f;
27216 } else {
27217 gasmal = TRUE_;
27218 if (ga > fa) {
27219 pmax = 2;
27220 if (fa / ga < slamch_("EPS")) {
27221
27222 /* Case of very large GA */
27223
27224 gasmal = FALSE_;
27225 *ssmax = ga;
27226 if (ha > 1.f) {
27227 *ssmin = fa / (ga / ha);
27228 } else {
27229 *ssmin = fa / ga * ha;
27230 }
27231 clt = 1.f;
27232 slt = ht / gt;
27233 srt = 1.f;
27234 crt = ft / gt;
27235 }
27236 }
27237 if (gasmal) {
27238
27239 /* Normal case */
27240
27241 d__ = fa - ha;
27242 if (d__ == fa) {
27243
27244 /* Copes with infinite F or H */
27245
27246 l = 1.f;
27247 } else {
27248 l = d__ / fa;
27249 }
27250
27251 /* Note that 0 .le. L .le. 1 */
27252
27253 m = gt / ft;
27254
27255 /* Note that abs(M) .le. 1/macheps */
27256
27257 t = 2.f - l;
27258
27259 /* Note that T .ge. 1 */
27260
27261 mm = m * m;
27262 tt = t * t;
27263 s = sqrt(tt + mm);
27264
27265 /* Note that 1 .le. S .le. 1 + 1/macheps */
27266
27267 if (l == 0.f) {
27268 r__ = dabs(m);
27269 } else {
27270 r__ = sqrt(l * l + mm);
27271 }
27272
27273 /* Note that 0 .le. R .le. 1 + 1/macheps */
27274
27275 a = (s + r__) * .5f;
27276
27277 /* Note that 1 .le. A .le. 1 + abs(M) */
27278
27279 *ssmin = ha / a;
27280 *ssmax = fa * a;
27281 if (mm == 0.f) {
27282
27283 /* Note that M is very tiny */
27284
27285 if (l == 0.f) {
27286 t = r_sign(&c_b3, &ft) * r_sign(&c_b4, >);
27287 } else {
27288 t = gt / r_sign(&d__, &ft) + m / t;
27289 }
27290 } else {
27291 t = (m / (s + t) + m / (r__ + l)) * (a + 1.f);
27292 }
27293 l = sqrt(t * t + 4.f);
27294 crt = 2.f / l;
27295 srt = t / l;
27296 clt = (crt + srt * m) / a;
27297 slt = ht / ft * srt / a;
27298 }
27299 }
27300 if (swap) {
27301 *csl = srt;
27302 *snl = crt;
27303 *csr = slt;
27304 *snr = clt;
27305 } else {
27306 *csl = clt;
27307 *snl = slt;
27308 *csr = crt;
27309 *snr = srt;
27310 }
27311
27312 /* Correct signs of SSMAX and SSMIN */
27313
27314 if (pmax == 1) {
27315 tsign = r_sign(&c_b4, csr) * r_sign(&c_b4, csl) * r_sign(&c_b4, f);
27316 }
27317 if (pmax == 2) {
27318 tsign = r_sign(&c_b4, snr) * r_sign(&c_b4, csl) * r_sign(&c_b4, g);
27319 }
27320 if (pmax == 3) {
27321 tsign = r_sign(&c_b4, snr) * r_sign(&c_b4, snl) * r_sign(&c_b4, h__);
27322 }
27323 *ssmax = r_sign(ssmax, &tsign);
27324 r__1 = tsign * r_sign(&c_b4, f) * r_sign(&c_b4, h__);
27325 *ssmin = r_sign(ssmin, &r__1);
27326 return 0;
27327
27328 /* End of SLASV2 */
27329
27330 } /* slasv2_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 6244 of file lapackblas.cpp. References a_ref, c__1, doublereal, f2cmin, integer, logical, lsame_(), real, saxpy_(), sdot_(), sgemv_(), slarfg_(), sscal_(), ssymv_(), and w_ref. Referenced by ssytrd_(). 06246 {
06247 /* -- LAPACK auxiliary routine (version 3.0) --
06248 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
06249 Courant Institute, Argonne National Lab, and Rice University
06250 October 31, 1992
06251
06252
06253 Purpose
06254 =======
06255
06256 SLATRD reduces NB rows and columns of a real symmetric matrix A to
06257 symmetric tridiagonal form by an orthogonal similarity
06258 transformation Q' * A * Q, and returns the matrices V and W which are
06259 needed to apply the transformation to the unreduced part of A.
06260
06261 If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
06262 matrix, of which the upper triangle is supplied;
06263 if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
06264 matrix, of which the lower triangle is supplied.
06265
06266 This is an auxiliary routine called by SSYTRD.
06267
06268 Arguments
06269 =========
06270
06271 UPLO (input) CHARACTER
06272 Specifies whether the upper or lower triangular part of the
06273 symmetric matrix A is stored:
06274 = 'U': Upper triangular
06275 = 'L': Lower triangular
06276
06277 N (input) INTEGER
06278 The order of the matrix A.
06279
06280 NB (input) INTEGER
06281 The number of rows and columns to be reduced.
06282
06283 A (input/output) REAL array, dimension (LDA,N)
06284 On entry, the symmetric matrix A. If UPLO = 'U', the leading
06285 n-by-n upper triangular part of A contains the upper
06286 triangular part of the matrix A, and the strictly lower
06287 triangular part of A is not referenced. If UPLO = 'L', the
06288 leading n-by-n lower triangular part of A contains the lower
06289 triangular part of the matrix A, and the strictly upper
06290 triangular part of A is not referenced.
06291 On exit:
06292 if UPLO = 'U', the last NB columns have been reduced to
06293 tridiagonal form, with the diagonal elements overwriting
06294 the diagonal elements of A; the elements above the diagonal
06295 with the array TAU, represent the orthogonal matrix Q as a
06296 product of elementary reflectors;
06297 if UPLO = 'L', the first NB columns have been reduced to
06298 tridiagonal form, with the diagonal elements overwriting
06299 the diagonal elements of A; the elements below the diagonal
06300 with the array TAU, represent the orthogonal matrix Q as a
06301 product of elementary reflectors.
06302 See Further Details.
06303
06304 LDA (input) INTEGER
06305 The leading dimension of the array A. LDA >= (1,N).
06306
06307 E (output) REAL array, dimension (N-1)
06308 If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
06309 elements of the last NB columns of the reduced matrix;
06310 if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
06311 the first NB columns of the reduced matrix.
06312
06313 TAU (output) REAL array, dimension (N-1)
06314 The scalar factors of the elementary reflectors, stored in
06315 TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
06316 See Further Details.
06317
06318 W (output) REAL array, dimension (LDW,NB)
06319 The n-by-nb matrix W required to update the unreduced part
06320 of A.
06321
06322 LDW (input) INTEGER
06323 The leading dimension of the array W. LDW >= f2cmax(1,N).
06324
06325 Further Details
06326 ===============
06327
06328 If UPLO = 'U', the matrix Q is represented as a product of elementary
06329 reflectors
06330
06331 Q = H(n) H(n-1) . . . H(n-nb+1).
06332
06333 Each H(i) has the form
06334
06335 H(i) = I - tau * v * v'
06336
06337 where tau is a real scalar, and v is a real vector with
06338 v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
06339 and tau in TAU(i-1).
06340
06341 If UPLO = 'L', the matrix Q is represented as a product of elementary
06342 reflectors
06343
06344 Q = H(1) H(2) . . . H(nb).
06345
06346 Each H(i) has the form
06347
06348 H(i) = I - tau * v * v'
06349
06350 where tau is a real scalar, and v is a real vector with
06351 v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
06352 and tau in TAU(i).
06353
06354 The elements of the vectors v together form the n-by-nb matrix V
06355 which is needed, with W, to apply the transformation to the unreduced
06356 part of the matrix, using a symmetric rank-2k update of the form:
06357 A := A - V*W' - W*V'.
06358
06359 The contents of A on exit are illustrated by the following examples
06360 with n = 5 and nb = 2:
06361
06362 if UPLO = 'U': if UPLO = 'L':
06363
06364 ( a a a v4 v5 ) ( d )
06365 ( a a v4 v5 ) ( 1 d )
06366 ( a 1 v5 ) ( v1 1 a )
06367 ( d 1 ) ( v1 v2 a a )
06368 ( d ) ( v1 v2 a a a )
06369
06370 where d denotes a diagonal element of the reduced matrix, a denotes
06371 an element of the original matrix that is unchanged, and vi denotes
06372 an element of the vector defining H(i).
06373
06374 =====================================================================
06375
06376
06377 Quick return if possible
06378
06379 Parameter adjustments */
06380 /* Table of constant values */
06381 static real c_b5 = -1.f;
06382 static real c_b6 = 1.f;
06383 static integer c__1 = 1;
06384 static real c_b16 = 0.f;
06385
06386 /* System generated locals */
06387 integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
06388 /* Local variables */
06389 extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
06390 static integer i__;
06391 static real alpha;
06392 extern logical lsame_(const char *, const char *);
06393 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
06394 sgemv_(const char *, integer *, integer *, real *, real *, integer *,
06395 real *, integer *, real *, real *, integer *), saxpy_(
06396 integer *, real *, real *, integer *, real *, integer *), ssymv_(
06397 const char *, integer *, real *, real *, integer *, real *, integer *,
06398 real *, real *, integer *);
06399 static integer iw;
06400 extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
06401 real *);
06402 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
06403 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1]
06404
06405
06406 a_dim1 = *lda;
06407 a_offset = 1 + a_dim1 * 1;
06408 a -= a_offset;
06409 --e;
06410 --tau;
06411 w_dim1 = *ldw;
06412 w_offset = 1 + w_dim1 * 1;
06413 w -= w_offset;
06414
06415 /* Function Body */
06416 if (*n <= 0) {
06417 return 0;
06418 }
06419
06420 if (lsame_(uplo, "U")) {
06421
06422 /* Reduce last NB columns of upper triangle */
06423
06424 i__1 = *n - *nb + 1;
06425 for (i__ = *n; i__ >= i__1; --i__) {
06426 iw = i__ - *n + *nb;
06427 if (i__ < *n) {
06428
06429 /* Update A(1:i,i) */
06430
06431 i__2 = *n - i__;
06432 sgemv_("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1),
06433 lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__),
06434 &c__1);
06435 i__2 = *n - i__;
06436 sgemv_("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1),
06437 ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__),
06438 &c__1);
06439 }
06440 if (i__ > 1) {
06441
06442 /* Generate elementary reflector H(i) to annihilate
06443 A(1:i-2,i) */
06444
06445 i__2 = i__ - 1;
06446 slarfg_(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, &
06447 tau[i__ - 1]);
06448 e[i__ - 1] = a_ref(i__ - 1, i__);
06449 a_ref(i__ - 1, i__) = 1.f;
06450
06451 /* Compute W(1:i-1,i) */
06452
06453 i__2 = i__ - 1;
06454 ssymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1,
06455 i__), &c__1, &c_b16, &w_ref(1, iw), &c__1);
06456 if (i__ < *n) {
06457 i__2 = i__ - 1;
06458 i__3 = *n - i__;
06459 sgemv_("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1)
06460 , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__
06461 + 1, iw), &c__1);
06462 i__2 = i__ - 1;
06463 i__3 = *n - i__;
06464 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__
06465 + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
06466 w_ref(1, iw), &c__1);
06467 i__2 = i__ - 1;
06468 i__3 = *n - i__;
06469 sgemv_("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ +
06470 1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref(
06471 i__ + 1, iw), &c__1);
06472 i__2 = i__ - 1;
06473 i__3 = *n - i__;
06474 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw
06475 + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
06476 w_ref(1, iw), &c__1);
06477 }
06478 i__2 = i__ - 1;
06479 sscal_(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1);
06480 i__2 = i__ - 1;
06481 alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w_ref(1, iw), &
06482 c__1, &a_ref(1, i__), &c__1);
06483 i__2 = i__ - 1;
06484 saxpy_(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), &
06485 c__1);
06486 }
06487
06488 /* L10: */
06489 }
06490 } else {
06491
06492 /* Reduce first NB columns of lower triangle */
06493
06494 i__1 = *nb;
06495 for (i__ = 1; i__ <= i__1; ++i__) {
06496
06497 /* Update A(i:n,i) */
06498
06499 i__2 = *n - i__ + 1;
06500 i__3 = i__ - 1;
06501 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, &
06502 w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1);
06503 i__2 = *n - i__ + 1;
06504 i__3 = i__ - 1;
06505 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, &
06506 a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1);
06507 if (i__ < *n) {
06508
06509 /* Generate elementary reflector H(i) to annihilate
06510 A(i+2:n,i)
06511
06512 Computing MIN */
06513 i__2 = i__ + 2;
06514 i__3 = *n - i__;
06515 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*n), i__)
06516 , &c__1, &tau[i__]);
06517 e[i__] = a_ref(i__ + 1, i__);
06518 a_ref(i__ + 1, i__) = 1.f;
06519
06520 /* Compute W(i+1:n,i) */
06521
06522 i__2 = *n - i__;
06523 ssymv_("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, &
06524 a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1,
06525 i__), &c__1);
06526 i__2 = *n - i__;
06527 i__3 = i__ - 1;
06528 sgemv_("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1),
06529 ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
06530 i__), &c__1);
06531 i__2 = *n - i__;
06532 i__3 = i__ - 1;
06533 sgemv_("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1)
06534 , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
06535 i__), &c__1);
06536 i__2 = *n - i__;
06537 i__3 = i__ - 1;
06538 sgemv_("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1),
06539 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
06540 i__), &c__1);
06541 i__2 = *n - i__;
06542 i__3 = i__ - 1;
06543 sgemv_("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1)
06544 , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
06545 i__), &c__1);
06546 i__2 = *n - i__;
06547 sscal_(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1);
06548 i__2 = *n - i__;
06549 alpha = tau[i__] * -.5f * sdot_(&i__2, &w_ref(i__ + 1, i__), &
06550 c__1, &a_ref(i__ + 1, i__), &c__1);
06551 i__2 = *n - i__;
06552 saxpy_(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__
06553 + 1, i__), &c__1);
06554 }
06555
06556 /* L20: */
06557 }
06558 }
06559
06560 return 0;
06561
06562 /* End of SLATRD */
06563
06564 } /* slatrd_ */
|
|
||||||||||||||||
|
Definition at line 6573 of file lapackblas.cpp. References dabs, doublereal, integer, norm(), real, sqrt(), and x. Referenced by EMAN::PCAlarge::Lanczos(), EMAN::PCA::Lanczos(), EMAN::PCA::Lanczos_ooc(), slaed3_(), slaed9_(), and slarfg_(). 06574 {
06575 /* The following loop is equivalent to this call to the LAPACK
06576 auxiliary routine:
06577 CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
06578 /* System generated locals */
06579 integer i__1, i__2;
06580 real ret_val, r__1;
06581 /* Builtin functions */
06582 // double sqrt(doublereal);
06583 /* Local variables */
06584 static real norm, scale, absxi;
06585 static integer ix;
06586 static real ssq;
06587 /* SNRM2 returns the euclidean norm of a vector via the function
06588 name, so that
06589 SNRM2 := sqrt( x'*x )
06590 -- This version written on 25-October-1982.
06591 Modified on 14-October-1993 to inline the call to SLASSQ.
06592 Sven Hammarling, Nag Ltd.
06593 Parameter adjustments */
06594 --x;
06595 /* Function Body */
06596 if (*n < 1 || *incx < 1) {
06597 norm = 0.f;
06598 } else if (*n == 1) {
06599 norm = dabs(x[1]);
06600 } else {
06601 scale = 0.f;
06602 ssq = 1.f;
06603
06604
06605 i__1 = (*n - 1) * *incx + 1;
06606 i__2 = *incx;
06607 for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
06608 if (x[ix] != 0.f) {
06609 absxi = (r__1 = x[ix], dabs(r__1));
06610 if (scale < absxi) {
06611 /* Computing 2nd power */
06612 r__1 = scale / absxi;
06613 ssq = ssq * (r__1 * r__1) + 1.f;
06614 scale = absxi;
06615 } else {
06616 /* Computing 2nd power */
06617 r__1 = absxi / scale;
06618 ssq += r__1 * r__1;
06619 }
06620 }
06621 /* L10: */
06622 }
06623 norm = scale * sqrt(ssq);
06624 }
06625
06626 ret_val = norm;
06627 return ret_val;
06628
06629 /* End of SNRM2. */
06630
06631 } /* snrm2_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 6636 of file lapackblas.cpp. References a_ref, c__1, f2cmax, integer, real, slarf_(), sscal_(), and xerbla_(). Referenced by sorgql_(). 06638 {
06639 /* -- LAPACK routine (version 3.0) --
06640 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
06641 Courant Institute, Argonne National Lab, and Rice University
06642 February 29, 1992
06643
06644
06645 Purpose
06646 =======
06647
06648 SORG2L generates an m by n real matrix Q with orthonormal columns,
06649 which is defined as the last n columns of a product of k elementary
06650 reflectors of order m
06651
06652 Q = H(k) . . . H(2) H(1)
06653
06654 as returned by SGEQLF.
06655
06656 Arguments
06657 =========
06658
06659 M (input) INTEGER
06660 The number of rows of the matrix Q. M >= 0.
06661
06662 N (input) INTEGER
06663 The number of columns of the matrix Q. M >= N >= 0.
06664
06665 K (input) INTEGER
06666 The number of elementary reflectors whose product defines the
06667 matrix Q. N >= K >= 0.
06668
06669 A (input/output) REAL array, dimension (LDA,N)
06670 On entry, the (n-k+i)-th column must contain the vector which
06671 defines the elementary reflector H(i), for i = 1,2,...,k, as
06672 returned by SGEQLF in the last k columns of its array
06673 argument A.
06674 On exit, the m by n matrix Q.
06675
06676 LDA (input) INTEGER
06677 The first dimension of the array A. LDA >= f2cmax(1,M).
06678
06679 TAU (input) REAL array, dimension (K)
06680 TAU(i) must contain the scalar factor of the elementary
06681 reflector H(i), as returned by SGEQLF.
06682
06683 WORK (workspace) REAL array, dimension (N)
06684
06685 INFO (output) INTEGER
06686 = 0: successful exit
06687 < 0: if INFO = -i, the i-th argument has an illegal value
06688
06689 =====================================================================
06690
06691
06692 Test the input arguments
06693
06694 Parameter adjustments */
06695 /* Table of constant values */
06696 static integer c__1 = 1;
06697
06698 /* System generated locals */
06699 integer a_dim1, a_offset, i__1, i__2, i__3;
06700 real r__1;
06701 /* Local variables */
06702 static integer i__, j, l;
06703 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
06704 slarf_(const char *, integer *, integer *, real *, integer *, real *,
06705 real *, integer *, real *);
06706 static integer ii;
06707 extern /* Subroutine */ int xerbla_(const char *, integer *);
06708 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
06709
06710
06711 a_dim1 = *lda;
06712 a_offset = 1 + a_dim1 * 1;
06713 a -= a_offset;
06714 --tau;
06715 --work;
06716
06717 /* Function Body */
06718 *info = 0;
06719 if (*m < 0) {
06720 *info = -1;
06721 } else if (*n < 0 || *n > *m) {
06722 *info = -2;
06723 } else if (*k < 0 || *k > *n) {
06724 *info = -3;
06725 } else if (*lda < f2cmax(1,*m)) {
06726 *info = -5;
06727 }
06728 if (*info != 0) {
06729 i__1 = -(*info);
06730 xerbla_("SORG2L", &i__1);
06731 return 0;
06732 }
06733
06734 /* Quick return if possible */
06735
06736 if (*n <= 0) {
06737 return 0;
06738 }
06739
06740 /* Initialise columns 1:n-k to columns of the unit matrix */
06741
06742 i__1 = *n - *k;
06743 for (j = 1; j <= i__1; ++j) {
06744 i__2 = *m;
06745 for (l = 1; l <= i__2; ++l) {
06746 a_ref(l, j) = 0.f;
06747 /* L10: */
06748 }
06749 a_ref(*m - *n + j, j) = 1.f;
06750 /* L20: */
06751 }
06752
06753 i__1 = *k;
06754 for (i__ = 1; i__ <= i__1; ++i__) {
06755 ii = *n - *k + i__;
06756
06757 /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */
06758
06759 a_ref(*m - *n + ii, ii) = 1.f;
06760 i__2 = *m - *n + ii;
06761 i__3 = ii - 1;
06762 slarf_("Left", &i__2, &i__3, &a_ref(1, ii), &c__1, &tau[i__], &a[
06763 a_offset], lda, &work[1]);
06764 i__2 = *m - *n + ii - 1;
06765 r__1 = -tau[i__];
06766 sscal_(&i__2, &r__1, &a_ref(1, ii), &c__1);
06767 a_ref(*m - *n + ii, ii) = 1.f - tau[i__];
06768
06769 /* Set A(m-k+i+1:m,n-k+i) to zero */
06770
06771 i__2 = *m;
06772 for (l = *m - *n + ii + 1; l <= i__2; ++l) {
06773 a_ref(l, ii) = 0.f;
06774 /* L30: */
06775 }
06776 /* L40: */
06777 }
06778 return 0;
06779
06780 /* End of SORG2L */
06781
06782 } /* sorg2l_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 6790 of file lapackblas.cpp. References a_ref, c__1, f2cmax, integer, real, slarf_(), sscal_(), and xerbla_(). Referenced by sorgqr_(). 06792 {
06793 /* -- LAPACK routine (version 3.0) --
06794 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
06795 Courant Institute, Argonne National Lab, and Rice University
06796 February 29, 1992
06797
06798
06799 Purpose
06800 =======
06801
06802 SORG2R generates an m by n real matrix Q with orthonormal columns,
06803 which is defined as the first n columns of a product of k elementary
06804 reflectors of order m
06805
06806 Q = H(1) H(2) . . . H(k)
06807
06808 as returned by SGEQRF.
06809
06810 Arguments
06811 =========
06812
06813 M (input) INTEGER
06814 The number of rows of the matrix Q. M >= 0.
06815
06816 N (input) INTEGER
06817 The number of columns of the matrix Q. M >= N >= 0.
06818
06819 K (input) INTEGER
06820 The number of elementary reflectors whose product defines the
06821 matrix Q. N >= K >= 0.
06822
06823 A (input/output) REAL array, dimension (LDA,N)
06824 On entry, the i-th column must contain the vector which
06825 defines the elementary reflector H(i), for i = 1,2,...,k, as
06826 returned by SGEQRF in the first k columns of its array
06827 argument A.
06828 On exit, the m-by-n matrix Q.
06829
06830 LDA (input) INTEGER
06831 The first dimension of the array A. LDA >= f2cmax(1,M).
06832
06833 TAU (input) REAL array, dimension (K)
06834 TAU(i) must contain the scalar factor of the elementary
06835 reflector H(i), as returned by SGEQRF.
06836
06837 WORK (workspace) REAL array, dimension (N)
06838
06839 INFO (output) INTEGER
06840 = 0: successful exit
06841 < 0: if INFO = -i, the i-th argument has an illegal value
06842
06843 =====================================================================
06844
06845
06846 Test the input arguments
06847
06848 Parameter adjustments */
06849 /* Table of constant values */
06850 static integer c__1 = 1;
06851
06852 /* System generated locals */
06853 integer a_dim1, a_offset, i__1, i__2;
06854 real r__1;
06855 /* Local variables */
06856 static integer i__, j, l;
06857 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
06858 slarf_(const char *, integer *, integer *, real *, integer *, real *,
06859 real *, integer *, real *), xerbla_(const char *, integer *);
06860 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
06861
06862
06863 a_dim1 = *lda;
06864 a_offset = 1 + a_dim1 * 1;
06865 a -= a_offset;
06866 --tau;
06867 --work;
06868
06869 /* Function Body */
06870 *info = 0;
06871 if (*m < 0) {
06872 *info = -1;
06873 } else if (*n < 0 || *n > *m) {
06874 *info = -2;
06875 } else if (*k < 0 || *k > *n) {
06876 *info = -3;
06877 } else if (*lda < f2cmax(1,*m)) {
06878 *info = -5;
06879 }
06880 if (*info != 0) {
06881 i__1 = -(*info);
06882 xerbla_("SORG2R", &i__1);
06883 return 0;
06884 }
06885
06886 /* Quick return if possible */
06887
06888 if (*n <= 0) {
06889 return 0;
06890 }
06891
06892 /* Initialise columns k+1:n to columns of the unit matrix */
06893
06894 i__1 = *n;
06895 for (j = *k + 1; j <= i__1; ++j) {
06896 i__2 = *m;
06897 for (l = 1; l <= i__2; ++l) {
06898 a_ref(l, j) = 0.f;
06899 /* L10: */
06900 }
06901 a_ref(j, j) = 1.f;
06902 /* L20: */
06903 }
06904
06905 for (i__ = *k; i__ >= 1; --i__) {
06906
06907 /* Apply H(i) to A(i:m,i:n) from the left */
06908
06909 if (i__ < *n) {
06910 a_ref(i__, i__) = 1.f;
06911 i__1 = *m - i__ + 1;
06912 i__2 = *n - i__;
06913 slarf_("Left", &i__1, &i__2, &a_ref(i__, i__), &c__1, &tau[i__], &
06914 a_ref(i__, i__ + 1), lda, &work[1]);
06915 }
06916 if (i__ < *m) {
06917 i__1 = *m - i__;
06918 r__1 = -tau[i__];
06919 sscal_(&i__1, &r__1, &a_ref(i__ + 1, i__), &c__1);
06920 }
06921 a_ref(i__, i__) = 1.f - tau[i__];
06922
06923 /* Set A(1:i-1,i) to zero */
06924
06925 i__1 = i__ - 1;
06926 for (l = 1; l <= i__1; ++l) {
06927 a_ref(l, i__) = 0.f;
06928 /* L30: */
06929 }
06930 /* L40: */
06931 }
06932 return 0;
06933
06934 /* End of SORG2R */
06935
06936 } /* sorg2r_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 25035 of file lapackblas.cpp. References a_ref, c__1, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, lsame_(), real, sorglq_(), sorgqr_(), and xerbla_(). Referenced by sgesvd_(). 25038 {
25039 /* -- LAPACK routine (version 3.0) --
25040 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
25041 Courant Institute, Argonne National Lab, and Rice University
25042 June 30, 1999
25043
25044
25045 Purpose
25046 =======
25047
25048 SORGBR generates one of the real orthogonal matrices Q or P**T
25049 determined by SGEBRD when reducing a real matrix A to bidiagonal
25050 form: A = Q * B * P**T. Q and P**T are defined as products of
25051 elementary reflectors H(i) or G(i) respectively.
25052
25053 If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
25054 is of order M:
25055 if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
25056 columns of Q, where m >= n >= k;
25057 if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
25058 M-by-M matrix.
25059
25060 If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
25061 is of order N:
25062 if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
25063 rows of P**T, where n >= m >= k;
25064 if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
25065 an N-by-N matrix.
25066
25067 Arguments
25068 =========
25069
25070 VECT (input) CHARACTER*1
25071 Specifies whether the matrix Q or the matrix P**T is
25072 required, as defined in the transformation applied by SGEBRD:
25073 = 'Q': generate Q;
25074 = 'P': generate P**T.
25075
25076 M (input) INTEGER
25077 The number of rows of the matrix Q or P**T to be returned.
25078 M >= 0.
25079
25080 N (input) INTEGER
25081 The number of columns of the matrix Q or P**T to be returned.
25082 N >= 0.
25083 If VECT = 'Q', M >= N >= min(M,K);
25084 if VECT = 'P', N >= M >= min(N,K).
25085
25086 K (input) INTEGER
25087 If VECT = 'Q', the number of columns in the original M-by-K
25088 matrix reduced by SGEBRD.
25089 If VECT = 'P', the number of rows in the original K-by-N
25090 matrix reduced by SGEBRD.
25091 K >= 0.
25092
25093 A (input/output) REAL array, dimension (LDA,N)
25094 On entry, the vectors which define the elementary reflectors,
25095 as returned by SGEBRD.
25096 On exit, the M-by-N matrix Q or P**T.
25097
25098 LDA (input) INTEGER
25099 The leading dimension of the array A. LDA >= max(1,M).
25100
25101 TAU (input) REAL array, dimension
25102 (min(M,K)) if VECT = 'Q'
25103 (min(N,K)) if VECT = 'P'
25104 TAU(i) must contain the scalar factor of the elementary
25105 reflector H(i) or G(i), which determines Q or P**T, as
25106 returned by SGEBRD in its array argument TAUQ or TAUP.
25107
25108 WORK (workspace/output) REAL array, dimension (LWORK)
25109 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
25110
25111 LWORK (input) INTEGER
25112 The dimension of the array WORK. LWORK >= max(1,min(M,N)).
25113 For optimum performance LWORK >= min(M,N)*NB, where NB
25114 is the optimal blocksize.
25115
25116 If LWORK = -1, then a workspace query is assumed; the routine
25117 only calculates the optimal size of the WORK array, returns
25118 this value as the first entry of the WORK array, and no error
25119 message related to LWORK is issued by XERBLA.
25120
25121 INFO (output) INTEGER
25122 = 0: successful exit
25123 < 0: if INFO = -i, the i-th argument had an illegal value
25124
25125 =====================================================================
25126
25127
25128 Test the input arguments
25129
25130 Parameter adjustments */
25131 /* Table of constant values */
25132 static integer c__1 = 1;
25133 static integer c_n1 = -1;
25134
25135 /* System generated locals */
25136 integer a_dim1, a_offset, i__1, i__2, i__3;
25137 /* Local variables */
25138 static integer i__, j;
25139 extern logical lsame_(const char *, const char *);
25140 static integer iinfo;
25141 static logical wantq;
25142 static integer nb, mn;
25143 extern /* Subroutine */ int xerbla_(const char *, integer *);
25144 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
25145 integer *, integer *, ftnlen, ftnlen);
25146 extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
25147 *, integer *, real *, real *, integer *, integer *), sorgqr_(
25148 integer *, integer *, integer *, real *, integer *, real *, real *
25149 , integer *, integer *);
25150 static integer lwkopt;
25151 static logical lquery;
25152 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
25153
25154
25155 a_dim1 = *lda;
25156 a_offset = 1 + a_dim1 * 1;
25157 a -= a_offset;
25158 --tau;
25159 --work;
25160
25161 /* Function Body */
25162 *info = 0;
25163 wantq = lsame_(vect, "Q");
25164 mn = f2cmin(*m,*n);
25165 lquery = *lwork == -1;
25166 if (! wantq && ! lsame_(vect, "P")) {
25167 *info = -1;
25168 } else if (*m < 0) {
25169 *info = -2;
25170 } else if (*n < 0 || wantq && (*n > *m || *n < f2cmin(*m,*k)) || ! wantq && (
25171 *m > *n || *m < f2cmin(*n,*k))) {
25172 *info = -3;
25173 } else if (*k < 0) {
25174 *info = -4;
25175 } else if (*lda < f2cmax(1,*m)) {
25176 *info = -6;
25177 } else if (*lwork < f2cmax(1,mn) && ! lquery) {
25178 *info = -9;
25179 }
25180
25181 if (*info == 0) {
25182 if (wantq) {
25183 nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
25184 ftnlen)1);
25185 } else {
25186 nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
25187 ftnlen)1);
25188 }
25189 lwkopt = f2cmax(1,mn) * nb;
25190 work[1] = (real) lwkopt;
25191 }
25192
25193 if (*info != 0) {
25194 i__1 = -(*info);
25195 xerbla_("SORGBR", &i__1);
25196 return 0;
25197 } else if (lquery) {
25198 return 0;
25199 }
25200
25201 /* Quick return if possible */
25202
25203 if (*m == 0 || *n == 0) {
25204 work[1] = 1.f;
25205 return 0;
25206 }
25207
25208 if (wantq) {
25209
25210 /* Form Q, determined by a call to SGEBRD to reduce an m-by-k
25211 matrix */
25212
25213 if (*m >= *k) {
25214
25215 /* If m >= k, assume m >= n >= k */
25216
25217 sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
25218 iinfo);
25219
25220 } else {
25221
25222 /* If m < k, assume m = n
25223
25224 Shift the vectors which define the elementary reflectors one
25225 column to the right, and set the first row and column of Q
25226 to those of the unit matrix */
25227
25228 for (j = *m; j >= 2; --j) {
25229 a_ref(1, j) = 0.f;
25230 i__1 = *m;
25231 for (i__ = j + 1; i__ <= i__1; ++i__) {
25232 a_ref(i__, j) = a_ref(i__, j - 1);
25233 /* L10: */
25234 }
25235 /* L20: */
25236 }
25237 a_ref(1, 1) = 1.f;
25238 i__1 = *m;
25239 for (i__ = 2; i__ <= i__1; ++i__) {
25240 a_ref(i__, 1) = 0.f;
25241 /* L30: */
25242 }
25243 if (*m > 1) {
25244
25245 /* Form Q(2:m,2:m) */
25246
25247 i__1 = *m - 1;
25248 i__2 = *m - 1;
25249 i__3 = *m - 1;
25250 sorgqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &
25251 work[1], lwork, &iinfo);
25252 }
25253 }
25254 } else {
25255
25256 /* Form P', determined by a call to SGEBRD to reduce a k-by-n
25257 matrix */
25258
25259 if (*k < *n) {
25260
25261 /* If k < n, assume k <= m <= n */
25262
25263 sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
25264 iinfo);
25265
25266 } else {
25267
25268 /* If k >= n, assume m = n
25269
25270 Shift the vectors which define the elementary reflectors one
25271 row downward, and set the first row and column of P' to
25272 those of the unit matrix */
25273
25274 a_ref(1, 1) = 1.f;
25275 i__1 = *n;
25276 for (i__ = 2; i__ <= i__1; ++i__) {
25277 a_ref(i__, 1) = 0.f;
25278 /* L40: */
25279 }
25280 i__1 = *n;
25281 for (j = 2; j <= i__1; ++j) {
25282 for (i__ = j - 1; i__ >= 2; --i__) {
25283 a_ref(i__, j) = a_ref(i__ - 1, j);
25284 /* L50: */
25285 }
25286 a_ref(1, j) = 0.f;
25287 /* L60: */
25288 }
25289 if (*n > 1) {
25290
25291 /* Form P'(2:n,2:n) */
25292
25293 i__1 = *n - 1;
25294 i__2 = *n - 1;
25295 i__3 = *n - 1;
25296 sorglq_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &
25297 work[1], lwork, &iinfo);
25298 }
25299 }
25300 }
25301 work[1] = (real) lwkopt;
25302 return 0;
25303
25304 /* End of SORGBR */
25305
25306 } /* sorgbr_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 20442 of file lapackblas.cpp. References a_ref, f2cmax, integer, real, slarf_(), sscal_(), and xerbla_(). Referenced by sorglq_(). 20445 {
20446 /* -- LAPACK routine (version 3.0) --
20447 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20448 Courant Institute, Argonne National Lab, and Rice University
20449 June 30, 1999
20450
20451
20452 Purpose
20453 =======
20454
20455 SORGL2 generates an m by n real matrix Q with orthonormal rows,
20456 which is defined as the first m rows of a product of k elementary
20457 reflectors of order n
20458
20459 Q = H(k) . . . H(2) H(1)
20460
20461 as returned by SGELQF.
20462
20463 Arguments
20464 =========
20465
20466 M (input) INTEGER
20467 The number of rows of the matrix Q. M >= 0.
20468
20469 N (input) INTEGER
20470 The number of columns of the matrix Q. N >= M.
20471
20472 K (input) INTEGER
20473 The number of elementary reflectors whose product defines the
20474 matrix Q. M >= K >= 0.
20475
20476 A (input/output) REAL array, dimension (LDA,N)
20477 On entry, the i-th row must contain the vector which defines
20478 the elementary reflector H(i), for i = 1,2,...,k, as returned
20479 by SGELQF in the first k rows of its array argument A.
20480 On exit, the m-by-n matrix Q.
20481
20482 LDA (input) INTEGER
20483 The first dimension of the array A. LDA >= max(1,M).
20484
20485 TAU (input) REAL array, dimension (K)
20486 TAU(i) must contain the scalar factor of the elementary
20487 reflector H(i), as returned by SGELQF.
20488
20489 WORK (workspace) REAL array, dimension (M)
20490
20491 INFO (output) INTEGER
20492 = 0: successful exit
20493 < 0: if INFO = -i, the i-th argument has an illegal value
20494
20495 =====================================================================
20496
20497
20498 Test the input arguments
20499
20500 Parameter adjustments */
20501 /* System generated locals */
20502 integer a_dim1, a_offset, i__1, i__2;
20503 real r__1;
20504 /* Local variables */
20505 static integer i__, j, l;
20506 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
20507 slarf_(const char *, integer *, integer *, real *, integer *, real *,
20508 real *, integer *, real *), xerbla_(const char *, integer *);
20509 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
20510
20511 a_dim1 = *lda;
20512 a_offset = 1 + a_dim1 * 1;
20513 a -= a_offset;
20514 --tau;
20515 --work;
20516
20517 /* Function Body */
20518 *info = 0;
20519 if (*m < 0) {
20520 *info = -1;
20521 } else if (*n < *m) {
20522 *info = -2;
20523 } else if (*k < 0 || *k > *m) {
20524 *info = -3;
20525 } else if (*lda < f2cmax(1,*m)) {
20526 *info = -5;
20527 }
20528 if (*info != 0) {
20529 i__1 = -(*info);
20530 xerbla_("SORGL2", &i__1);
20531 return 0;
20532 }
20533
20534 /* Quick return if possible */
20535
20536 if (*m <= 0) {
20537 return 0;
20538 }
20539
20540 if (*k < *m) {
20541
20542 /* Initialise rows k+1:m to rows of the unit matrix */
20543
20544 i__1 = *n;
20545 for (j = 1; j <= i__1; ++j) {
20546 i__2 = *m;
20547 for (l = *k + 1; l <= i__2; ++l) {
20548 a_ref(l, j) = 0.f;
20549 /* L10: */
20550 }
20551 if (j > *k && j <= *m) {
20552 a_ref(j, j) = 1.f;
20553 }
20554 /* L20: */
20555 }
20556 }
20557
20558 for (i__ = *k; i__ >= 1; --i__) {
20559
20560 /* Apply H(i) to A(i:m,i:n) from the right */
20561
20562 if (i__ < *n) {
20563 if (i__ < *m) {
20564 a_ref(i__, i__) = 1.f;
20565 i__1 = *m - i__;
20566 i__2 = *n - i__ + 1;
20567 slarf_("Right", &i__1, &i__2, &a_ref(i__, i__), lda, &tau[i__]
20568 , &a_ref(i__ + 1, i__), lda, &work[1]);
20569 }
20570 i__1 = *n - i__;
20571 r__1 = -tau[i__];
20572 sscal_(&i__1, &r__1, &a_ref(i__, i__ + 1), lda);
20573 }
20574 a_ref(i__, i__) = 1.f - tau[i__];
20575
20576 /* Set A(i,1:i-1) to zero */
20577
20578 i__1 = i__ - 1;
20579 for (l = 1; l <= i__1; ++l) {
20580 a_ref(i__, l) = 0.f;
20581 /* L30: */
20582 }
20583 /* L40: */
20584 }
20585 return 0;
20586
20587 /* End of SORGL2 */
20588
20589 } /* sorgl2_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 20595 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, slarfb_(), slarft_(), sorgl2_(), and xerbla_(). Referenced by sgesvd_(), and sorgbr_(). 20597 {
20598 /* -- LAPACK routine (version 3.0) --
20599 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20600 Courant Institute, Argonne National Lab, and Rice University
20601 June 30, 1999
20602
20603
20604 Purpose
20605 =======
20606
20607 SORGLQ generates an M-by-N real matrix Q with orthonormal rows,
20608 which is defined as the first M rows of a product of K elementary
20609 reflectors of order N
20610
20611 Q = H(k) . . . H(2) H(1)
20612
20613 as returned by SGELQF.
20614
20615 Arguments
20616 =========
20617
20618 M (input) INTEGER
20619 The number of rows of the matrix Q. M >= 0.
20620
20621 N (input) INTEGER
20622 The number of columns of the matrix Q. N >= M.
20623
20624 K (input) INTEGER
20625 The number of elementary reflectors whose product defines the
20626 matrix Q. M >= K >= 0.
20627
20628 A (input/output) REAL array, dimension (LDA,N)
20629 On entry, the i-th row must contain the vector which defines
20630 the elementary reflector H(i), for i = 1,2,...,k, as returned
20631 by SGELQF in the first k rows of its array argument A.
20632 On exit, the M-by-N matrix Q.
20633
20634 LDA (input) INTEGER
20635 The first dimension of the array A. LDA >= max(1,M).
20636
20637 TAU (input) REAL array, dimension (K)
20638 TAU(i) must contain the scalar factor of the elementary
20639 reflector H(i), as returned by SGELQF.
20640
20641 WORK (workspace/output) REAL array, dimension (LWORK)
20642 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
20643
20644 LWORK (input) INTEGER
20645 The dimension of the array WORK. LWORK >= max(1,M).
20646 For optimum performance LWORK >= M*NB, where NB is
20647 the optimal blocksize.
20648
20649 If LWORK = -1, then a workspace query is assumed; the routine
20650 only calculates the optimal size of the WORK array, returns
20651 this value as the first entry of the WORK array, and no error
20652 message related to LWORK is issued by XERBLA.
20653
20654 INFO (output) INTEGER
20655 = 0: successful exit
20656 < 0: if INFO = -i, the i-th argument has an illegal value
20657
20658 =====================================================================
20659
20660
20661 Test the input arguments
20662
20663 Parameter adjustments */
20664 /* Table of constant values */
20665 static integer c__1 = 1;
20666 static integer c_n1 = -1;
20667 static integer c__3 = 3;
20668 static integer c__2 = 2;
20669
20670 /* System generated locals */
20671 integer a_dim1, a_offset, i__1, i__2, i__3;
20672 /* Local variables */
20673 static integer i__, j, l, nbmin, iinfo;
20674 extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real
20675 *, integer *, real *, real *, integer *);
20676 static integer ib, nb, ki, kk, nx;
20677 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
20678 integer *, integer *, integer *, real *, integer *, real *,
20679 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
20680 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
20681 integer *, integer *, ftnlen, ftnlen);
20682 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
20683 real *, integer *, real *, real *, integer *);
20684 static integer ldwork, lwkopt;
20685 static logical lquery;
20686 static integer iws;
20687 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
20688
20689
20690 a_dim1 = *lda;
20691 a_offset = 1 + a_dim1 * 1;
20692 a -= a_offset;
20693 --tau;
20694 --work;
20695
20696 /* Function Body */
20697 *info = 0;
20698 nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
20699 lwkopt = f2cmax(1,*m) * nb;
20700 work[1] = (real) lwkopt;
20701 lquery = *lwork == -1;
20702 if (*m < 0) {
20703 *info = -1;
20704 } else if (*n < *m) {
20705 *info = -2;
20706 } else if (*k < 0 || *k > *m) {
20707 *info = -3;
20708 } else if (*lda < f2cmax(1,*m)) {
20709 *info = -5;
20710 } else if (*lwork < f2cmax(1,*m) && ! lquery) {
20711 *info = -8;
20712 }
20713 if (*info != 0) {
20714 i__1 = -(*info);
20715 xerbla_("SORGLQ", &i__1);
20716 return 0;
20717 } else if (lquery) {
20718 return 0;
20719 }
20720
20721 /* Quick return if possible */
20722
20723 if (*m <= 0) {
20724 work[1] = 1.f;
20725 return 0;
20726 }
20727
20728 nbmin = 2;
20729 nx = 0;
20730 iws = *m;
20731 if (nb > 1 && nb < *k) {
20732
20733 /* Determine when to cross over from blocked to unblocked code.
20734
20735 Computing MAX */
20736 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, (
20737 ftnlen)6, (ftnlen)1);
20738 nx = f2cmax(i__1,i__2);
20739 if (nx < *k) {
20740
20741 /* Determine if workspace is large enough for blocked code. */
20742
20743 ldwork = *m;
20744 iws = ldwork * nb;
20745 if (*lwork < iws) {
20746
20747 /* Not enough workspace to use optimal NB: reduce NB and
20748 determine the minimum value of NB. */
20749
20750 nb = *lwork / ldwork;
20751 /* Computing MAX */
20752 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1,
20753 (ftnlen)6, (ftnlen)1);
20754 nbmin = f2cmax(i__1,i__2);
20755 }
20756 }
20757 }
20758
20759 if (nb >= nbmin && nb < *k && nx < *k) {
20760
20761 /* Use blocked code after the last block.
20762 The first kk rows are handled by the block method. */
20763
20764 ki = (*k - nx - 1) / nb * nb;
20765 /* Computing MIN */
20766 i__1 = *k, i__2 = ki + nb;
20767 kk = f2cmin(i__1,i__2);
20768
20769 /* Set A(kk+1:m,1:kk) to zero. */
20770
20771 i__1 = kk;
20772 for (j = 1; j <= i__1; ++j) {
20773 i__2 = *m;
20774 for (i__ = kk + 1; i__ <= i__2; ++i__) {
20775 a_ref(i__, j) = 0.f;
20776 /* L10: */
20777 }
20778 /* L20: */
20779 }
20780 } else {
20781 kk = 0;
20782 }
20783
20784 /* Use unblocked code for the last or only block. */
20785
20786 if (kk < *m) {
20787 i__1 = *m - kk;
20788 i__2 = *n - kk;
20789 i__3 = *k - kk;
20790 sorgl2_(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1]
20791 , &work[1], &iinfo);
20792 }
20793
20794 if (kk > 0) {
20795
20796 /* Use blocked code */
20797
20798 i__1 = -nb;
20799 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
20800 /* Computing MIN */
20801 i__2 = nb, i__3 = *k - i__ + 1;
20802 ib = f2cmin(i__2,i__3);
20803 if (i__ + ib <= *m) {
20804
20805 /* Form the triangular factor of the block reflector
20806 H = H(i) H(i+1) . . . H(i+ib-1) */
20807
20808 i__2 = *n - i__ + 1;
20809 slarft_("Forward", "Rowwise", &i__2, &ib, &a_ref(i__, i__),
20810 lda, &tau[i__], &work[1], &ldwork);
20811
20812 /* Apply H' to A(i+ib:m,i:n) from the right */
20813
20814 i__2 = *m - i__ - ib + 1;
20815 i__3 = *n - i__ + 1;
20816 slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
20817 i__3, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
20818 a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork);
20819 }
20820
20821 /* Apply H' to columns i:n of current block */
20822
20823 i__2 = *n - i__ + 1;
20824 sorgl2_(&ib, &i__2, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[
20825 1], &iinfo);
20826
20827 /* Set columns 1:i-1 of current block to zero */
20828
20829 i__2 = i__ - 1;
20830 for (j = 1; j <= i__2; ++j) {
20831 i__3 = i__ + ib - 1;
20832 for (l = i__; l <= i__3; ++l) {
20833 a_ref(l, j) = 0.f;
20834 /* L30: */
20835 }
20836 /* L40: */
20837 }
20838 /* L50: */
20839 }
20840 }
20841
20842 work[1] = (real) iws;
20843 return 0;
20844
20845 /* End of SORGLQ */
20846
20847 } /* sorglq_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 6944 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, slarfb_(), slarft_(), sorg2l_(), and xerbla_(). Referenced by sorgtr_(). 06946 {
06947 /* -- LAPACK routine (version 3.0) --
06948 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
06949 Courant Institute, Argonne National Lab, and Rice University
06950 June 30, 1999
06951
06952
06953 Purpose
06954 =======
06955
06956 SORGQL generates an M-by-N real matrix Q with orthonormal columns,
06957 which is defined as the last N columns of a product of K elementary
06958 reflectors of order M
06959
06960 Q = H(k) . . . H(2) H(1)
06961
06962 as returned by SGEQLF.
06963
06964 Arguments
06965 =========
06966
06967 M (input) INTEGER
06968 The number of rows of the matrix Q. M >= 0.
06969
06970 N (input) INTEGER
06971 The number of columns of the matrix Q. M >= N >= 0.
06972
06973 K (input) INTEGER
06974 The number of elementary reflectors whose product defines the
06975 matrix Q. N >= K >= 0.
06976
06977 A (input/output) REAL array, dimension (LDA,N)
06978 On entry, the (n-k+i)-th column must contain the vector which
06979 defines the elementary reflector H(i), for i = 1,2,...,k, as
06980 returned by SGEQLF in the last k columns of its array
06981 argument A.
06982 On exit, the M-by-N matrix Q.
06983
06984 LDA (input) INTEGER
06985 The first dimension of the array A. LDA >= f2cmax(1,M).
06986
06987 TAU (input) REAL array, dimension (K)
06988 TAU(i) must contain the scalar factor of the elementary
06989 reflector H(i), as returned by SGEQLF.
06990
06991 WORK (workspace/output) REAL array, dimension (LWORK)
06992 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
06993
06994 LWORK (input) INTEGER
06995 The dimension of the array WORK. LWORK >= f2cmax(1,N).
06996 For optimum performance LWORK >= N*NB, where NB is the
06997 optimal blocksize.
06998
06999 If LWORK = -1, then a workspace query is assumed; the routine
07000 only calculates the optimal size of the WORK array, returns
07001 this value as the first entry of the WORK array, and no error
07002 message related to LWORK is issued by XERBLA.
07003
07004 INFO (output) INTEGER
07005 = 0: successful exit
07006 < 0: if INFO = -i, the i-th argument has an illegal value
07007
07008 =====================================================================
07009
07010
07011 Test the input arguments
07012
07013 Parameter adjustments */
07014 /* Table of constant values */
07015 static integer c__1 = 1;
07016 static integer c_n1 = -1;
07017 static integer c__3 = 3;
07018 static integer c__2 = 2;
07019
07020 /* System generated locals */
07021 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
07022 /* Local variables */
07023 static integer i__, j, l, nbmin, iinfo;
07024 extern /* Subroutine */ int sorg2l_(integer *, integer *, integer *, real
07025 *, integer *, real *, real *, integer *);
07026 static integer ib, nb, kk, nx;
07027 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
07028 integer *, integer *, integer *, real *, integer *, real *,
07029 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
07030 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
07031 integer *, integer *, ftnlen, ftnlen);
07032 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
07033 real *, integer *, real *, real *, integer *);
07034 static integer ldwork, lwkopt;
07035 static logical lquery;
07036 static integer iws;
07037 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
07038
07039
07040 a_dim1 = *lda;
07041 a_offset = 1 + a_dim1 * 1;
07042 a -= a_offset;
07043 --tau;
07044 --work;
07045
07046 /* Function Body */
07047 *info = 0;
07048 nb = ilaenv_(&c__1, "SORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
07049 lwkopt = f2cmax(1,*n) * nb;
07050 work[1] = (real) lwkopt;
07051 lquery = *lwork == -1;
07052 if (*m < 0) {
07053 *info = -1;
07054 } else if (*n < 0 || *n > *m) {
07055 *info = -2;
07056 } else if (*k < 0 || *k > *n) {
07057 *info = -3;
07058 } else if (*lda < f2cmax(1,*m)) {
07059 *info = -5;
07060 } else if (*lwork < f2cmax(1,*n) && ! lquery) {
07061 *info = -8;
07062 }
07063 if (*info != 0) {
07064 i__1 = -(*info);
07065 xerbla_("SORGQL", &i__1);
07066 return 0;
07067 } else if (lquery) {
07068 return 0;
07069 }
07070
07071 /* Quick return if possible */
07072
07073 if (*n <= 0) {
07074 work[1] = 1.f;
07075 return 0;
07076 }
07077
07078 nbmin = 2;
07079 nx = 0;
07080 iws = *n;
07081 if (nb > 1 && nb < *k) {
07082
07083 /* Determine when to cross over from blocked to unblocked code.
07084
07085 Computing MAX */
07086 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQL", " ", m, n, k, &c_n1, (
07087 ftnlen)6, (ftnlen)1);
07088 nx = f2cmax(i__1,i__2);
07089 if (nx < *k) {
07090
07091 /* Determine if workspace is large enough for blocked code. */
07092
07093 ldwork = *n;
07094 iws = ldwork * nb;
07095 if (*lwork < iws) {
07096
07097 /* Not enough workspace to use optimal NB: reduce NB and
07098 determine the minimum value of NB. */
07099
07100 nb = *lwork / ldwork;
07101 /* Computing MAX */
07102 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQL", " ", m, n, k, &c_n1,
07103 (ftnlen)6, (ftnlen)1);
07104 nbmin = f2cmax(i__1,i__2);
07105 }
07106 }
07107 }
07108
07109 if (nb >= nbmin && nb < *k && nx < *k) {
07110
07111 /* Use blocked code after the first block.
07112 The last kk columns are handled by the block method.
07113
07114 Computing MIN */
07115 i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb;
07116 kk = f2cmin(i__1,i__2);
07117
07118 /* Set A(m-kk+1:m,1:n-kk) to zero. */
07119
07120 i__1 = *n - kk;
07121 for (j = 1; j <= i__1; ++j) {
07122 i__2 = *m;
07123 for (i__ = *m - kk + 1; i__ <= i__2; ++i__) {
07124 a_ref(i__, j) = 0.f;
07125 /* L10: */
07126 }
07127 /* L20: */
07128 }
07129 } else {
07130 kk = 0;
07131 }
07132
07133 /* Use unblocked code for the first or only block. */
07134
07135 i__1 = *m - kk;
07136 i__2 = *n - kk;
07137 i__3 = *k - kk;
07138 sorg2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo)
07139 ;
07140
07141 if (kk > 0) {
07142
07143 /* Use blocked code */
07144
07145 i__1 = *k;
07146 i__2 = nb;
07147 for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
07148 i__2) {
07149 /* Computing MIN */
07150 i__3 = nb, i__4 = *k - i__ + 1;
07151 ib = f2cmin(i__3,i__4);
07152 if (*n - *k + i__ > 1) {
07153
07154 /* Form the triangular factor of the block reflector
07155 H = H(i+ib-1) . . . H(i+1) H(i) */
07156
07157 i__3 = *m - *k + i__ + ib - 1;
07158 slarft_("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - *
07159 k + i__), lda, &tau[i__], &work[1], &ldwork);
07160
07161 /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
07162
07163 i__3 = *m - *k + i__ + ib - 1;
07164 i__4 = *n - *k + i__ - 1;
07165 slarfb_("Left", "No transpose", "Backward", "Columnwise", &
07166 i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, &
07167 work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
07168 ldwork);
07169 }
07170
07171 /* Apply H to rows 1:m-k+i+ib-1 of current block */
07172
07173 i__3 = *m - *k + i__ + ib - 1;
07174 sorg2l_(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__],
07175 &work[1], &iinfo);
07176
07177 /* Set rows m-k+i+ib:m of current block to zero */
07178
07179 i__3 = *n - *k + i__ + ib - 1;
07180 for (j = *n - *k + i__; j <= i__3; ++j) {
07181 i__4 = *m;
07182 for (l = *m - *k + i__ + ib; l <= i__4; ++l) {
07183 a_ref(l, j) = 0.f;
07184 /* L30: */
07185 }
07186 /* L40: */
07187 }
07188 /* L50: */
07189 }
07190 }
07191
07192 work[1] = (real) iws;
07193 return 0;
07194
07195 /* End of SORGQL */
07196
07197 } /* sorgql_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 7205 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, nx, real, slarfb_(), slarft_(), sorg2r_(), and xerbla_(). Referenced by sgesvd_(), sorgbr_(), and sorgtr_(). 07207 {
07208 /* -- LAPACK routine (version 3.0) --
07209 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
07210 Courant Institute, Argonne National Lab, and Rice University
07211 June 30, 1999
07212
07213
07214 Purpose
07215 =======
07216
07217 SORGQR generates an M-by-N real matrix Q with orthonormal columns,
07218 which is defined as the first N columns of a product of K elementary
07219 reflectors of order M
07220
07221 Q = H(1) H(2) . . . H(k)
07222
07223 as returned by SGEQRF.
07224
07225 Arguments
07226 =========
07227
07228 M (input) INTEGER
07229 The number of rows of the matrix Q. M >= 0.
07230
07231 N (input) INTEGER
07232 The number of columns of the matrix Q. M >= N >= 0.
07233
07234 K (input) INTEGER
07235 The number of elementary reflectors whose product defines the
07236 matrix Q. N >= K >= 0.
07237
07238 A (input/output) REAL array, dimension (LDA,N)
07239 On entry, the i-th column must contain the vector which
07240 defines the elementary reflector H(i), for i = 1,2,...,k, as
07241 returned by SGEQRF in the first k columns of its array
07242 argument A.
07243 On exit, the M-by-N matrix Q.
07244
07245 LDA (input) INTEGER
07246 The first dimension of the array A. LDA >= f2cmax(1,M).
07247
07248 TAU (input) REAL array, dimension (K)
07249 TAU(i) must contain the scalar factor of the elementary
07250 reflector H(i), as returned by SGEQRF.
07251
07252 WORK (workspace/output) REAL array, dimension (LWORK)
07253 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
07254
07255 LWORK (input) INTEGER
07256 The dimension of the array WORK. LWORK >= f2cmax(1,N).
07257 For optimum performance LWORK >= N*NB, where NB is the
07258 optimal blocksize.
07259
07260 If LWORK = -1, then a workspace query is assumed; the routine
07261 only calculates the optimal size of the WORK array, returns
07262 this value as the first entry of the WORK array, and no error
07263 message related to LWORK is issued by XERBLA.
07264
07265 INFO (output) INTEGER
07266 = 0: successful exit
07267 < 0: if INFO = -i, the i-th argument has an illegal value
07268
07269 =====================================================================
07270
07271
07272 Test the input arguments
07273
07274 Parameter adjustments */
07275 /* Table of constant values */
07276 static integer c__1 = 1;
07277 static integer c_n1 = -1;
07278 static integer c__3 = 3;
07279 static integer c__2 = 2;
07280
07281 /* System generated locals */
07282 integer a_dim1, a_offset, i__1, i__2, i__3;
07283 /* Local variables */
07284 static integer i__, j, l, nbmin, iinfo, ib;
07285 extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real
07286 *, integer *, real *, real *, integer *);
07287 static integer nb, ki, kk, nx;
07288 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
07289 integer *, integer *, integer *, real *, integer *, real *,
07290 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
07291 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
07292 integer *, integer *, ftnlen, ftnlen);
07293 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
07294 real *, integer *, real *, real *, integer *);
07295 static integer ldwork, lwkopt;
07296 static logical lquery;
07297 static integer iws;
07298 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
07299
07300
07301 a_dim1 = *lda;
07302 a_offset = 1 + a_dim1 * 1;
07303 a -= a_offset;
07304 --tau;
07305 --work;
07306
07307 /* Function Body */
07308 *info = 0;
07309 nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
07310 lwkopt = f2cmax(1,*n) * nb;
07311 work[1] = (real) lwkopt;
07312 lquery = *lwork == -1;
07313 if (*m < 0) {
07314 *info = -1;
07315 } else if (*n < 0 || *n > *m) {
07316 *info = -2;
07317 } else if (*k < 0 || *k > *n) {
07318 *info = -3;
07319 } else if (*lda < f2cmax(1,*m)) {
07320 *info = -5;
07321 } else if (*lwork < f2cmax(1,*n) && ! lquery) {
07322 *info = -8;
07323 }
07324 if (*info != 0) {
07325 i__1 = -(*info);
07326 xerbla_("SORGQR", &i__1);
07327 return 0;
07328 } else if (lquery) {
07329 return 0;
07330 }
07331
07332 /* Quick return if possible */
07333
07334 if (*n <= 0) {
07335 work[1] = 1.f;
07336 return 0;
07337 }
07338
07339 nbmin = 2;
07340 nx = 0;
07341 iws = *n;
07342 if (nb > 1 && nb < *k) {
07343
07344 /* Determine when to cross over from blocked to unblocked code.
07345
07346 Computing MAX */
07347 i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, (
07348 ftnlen)6, (ftnlen)1);
07349 nx = f2cmax(i__1,i__2);
07350 if (nx < *k) {
07351
07352 /* Determine if workspace is large enough for blocked code. */
07353
07354 ldwork = *n;
07355 iws = ldwork * nb;
07356 if (*lwork < iws) {
07357
07358 /* Not enough workspace to use optimal NB: reduce NB and
07359 determine the minimum value of NB. */
07360
07361 nb = *lwork / ldwork;
07362 /* Computing MAX */
07363 i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1,
07364 (ftnlen)6, (ftnlen)1);
07365 nbmin = f2cmax(i__1,i__2);
07366 }
07367 }
07368 }
07369
07370 if (nb >= nbmin && nb < *k && nx < *k) {
07371
07372 /* Use blocked code after the last block.
07373 The first kk columns are handled by the block method. */
07374
07375 ki = (*k - nx - 1) / nb * nb;
07376 /* Computing MIN */
07377 i__1 = *k, i__2 = ki + nb;
07378 kk = f2cmin(i__1,i__2);
07379
07380 /* Set A(1:kk,kk+1:n) to zero. */
07381
07382 i__1 = *n;
07383 for (j = kk + 1; j <= i__1; ++j) {
07384 i__2 = kk;
07385 for (i__ = 1; i__ <= i__2; ++i__) {
07386 a_ref(i__, j) = 0.f;
07387 /* L10: */
07388 }
07389 /* L20: */
07390 }
07391 } else {
07392 kk = 0;
07393 }
07394
07395 /* Use unblocked code for the last or only block. */
07396
07397 if (kk < *n) {
07398 i__1 = *m - kk;
07399 i__2 = *n - kk;
07400 i__3 = *k - kk;
07401 sorg2r_(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1]
07402 , &work[1], &iinfo);
07403 }
07404
07405 if (kk > 0) {
07406
07407 /* Use blocked code */
07408
07409 i__1 = -nb;
07410 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
07411 /* Computing MIN */
07412 i__2 = nb, i__3 = *k - i__ + 1;
07413 ib = f2cmin(i__2,i__3);
07414 if (i__ + ib <= *n) {
07415
07416 /* Form the triangular factor of the block reflector
07417 H = H(i) H(i+1) . . . H(i+ib-1) */
07418
07419 i__2 = *m - i__ + 1;
07420 slarft_("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__),
07421 lda, &tau[i__], &work[1], &ldwork);
07422
07423 /* Apply H to A(i:m,i+ib:n) from the left */
07424
07425 i__2 = *m - i__ + 1;
07426 i__3 = *n - i__ - ib + 1;
07427 slarfb_("Left", "No transpose", "Forward", "Columnwise", &
07428 i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], &
07429 ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], &
07430 ldwork);
07431 }
07432
07433 /* Apply H to rows i:m of current block */
07434
07435 i__2 = *m - i__ + 1;
07436 sorg2r_(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[
07437 1], &iinfo);
07438
07439 /* Set rows 1:i-1 of current block to zero */
07440
07441 i__2 = i__ + ib - 1;
07442 for (j = i__; j <= i__2; ++j) {
07443 i__3 = i__ - 1;
07444 for (l = 1; l <= i__3; ++l) {
07445 a_ref(l, j) = 0.f;
07446 /* L30: */
07447 }
07448 /* L40: */
07449 }
07450 /* L50: */
07451 }
07452 }
07453
07454 work[1] = (real) iws;
07455 return 0;
07456
07457 /* End of SORGQR */
07458
07459 } /* sorgqr_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 7467 of file lapackblas.cpp. References a_ref, c__1, c_n1, f2cmax, ftnlen, ilaenv_(), integer, logical, lsame_(), real, sorgql_(), sorgqr_(), and xerbla_(). Referenced by ssyev_(). 07469 {
07470 /* -- LAPACK routine (version 3.0) --
07471 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
07472 Courant Institute, Argonne National Lab, and Rice University
07473 June 30, 1999
07474
07475
07476 Purpose
07477 =======
07478
07479 SORGTR generates a real orthogonal matrix Q which is defined as the
07480 product of n-1 elementary reflectors of order N, as returned by
07481 SSYTRD:
07482
07483 if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
07484
07485 if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
07486
07487 Arguments
07488 =========
07489
07490 UPLO (input) CHARACTER*1
07491 = 'U': Upper triangle of A contains elementary reflectors
07492 from SSYTRD;
07493 = 'L': Lower triangle of A contains elementary reflectors
07494 from SSYTRD.
07495
07496 N (input) INTEGER
07497 The order of the matrix Q. N >= 0.
07498
07499 A (input/output) REAL array, dimension (LDA,N)
07500 On entry, the vectors which define the elementary reflectors,
07501 as returned by SSYTRD.
07502 On exit, the N-by-N orthogonal matrix Q.
07503
07504 LDA (input) INTEGER
07505 The leading dimension of the array A. LDA >= f2cmax(1,N).
07506
07507 TAU (input) REAL array, dimension (N-1)
07508 TAU(i) must contain the scalar factor of the elementary
07509 reflector H(i), as returned by SSYTRD.
07510
07511 WORK (workspace/output) REAL array, dimension (LWORK)
07512 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
07513
07514 LWORK (input) INTEGER
07515 The dimension of the array WORK. LWORK >= f2cmax(1,N-1).
07516 For optimum performance LWORK >= (N-1)*NB, where NB is
07517 the optimal blocksize.
07518
07519 If LWORK = -1, then a workspace query is assumed; the routine
07520 only calculates the optimal size of the WORK array, returns
07521 this value as the first entry of the WORK array, and no error
07522 message related to LWORK is issued by XERBLA.
07523
07524 INFO (output) INTEGER
07525 = 0: successful exit
07526 < 0: if INFO = -i, the i-th argument had an illegal value
07527
07528 =====================================================================
07529
07530
07531 Test the input arguments
07532
07533 Parameter adjustments */
07534 /* Table of constant values */
07535 static integer c__1 = 1;
07536 static integer c_n1 = -1;
07537
07538 /* System generated locals */
07539 integer a_dim1, a_offset, i__1, i__2, i__3;
07540 /* Local variables */
07541 static integer i__, j;
07542 extern logical lsame_(const char *, const char *);
07543 static integer iinfo;
07544 static logical upper;
07545 static integer nb;
07546 extern /* Subroutine */ int xerbla_(const char *, integer *);
07547 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
07548 integer *, integer *, ftnlen, ftnlen);
07549 extern /* Subroutine */ int sorgql_(integer *, integer *, integer *, real
07550 *, integer *, real *, real *, integer *, integer *), sorgqr_(
07551 integer *, integer *, integer *, real *, integer *, real *, real *
07552 , integer *, integer *);
07553 static logical lquery;
07554 static integer lwkopt;
07555 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
07556
07557
07558 a_dim1 = *lda;
07559 a_offset = 1 + a_dim1 * 1;
07560 a -= a_offset;
07561 --tau;
07562 --work;
07563
07564 /* Function Body */
07565 *info = 0;
07566 lquery = *lwork == -1;
07567 upper = lsame_(uplo, "U");
07568 if (! upper && ! lsame_(uplo, "L")) {
07569 *info = -1;
07570 } else if (*n < 0) {
07571 *info = -2;
07572 } else if (*lda < f2cmax(1,*n)) {
07573 *info = -4;
07574 } else /* if(complicated condition) */ {
07575 /* Computing MAX */
07576 i__1 = 1, i__2 = *n - 1;
07577 if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
07578 *info = -7;
07579 }
07580 }
07581
07582 if (*info == 0) {
07583 if (upper) {
07584 i__1 = *n - 1;
07585 i__2 = *n - 1;
07586 i__3 = *n - 1;
07587 nb = ilaenv_(&c__1, "SORGQL", " ", &i__1, &i__2, &i__3, &c_n1, (
07588 ftnlen)6, (ftnlen)1);
07589 } else {
07590 i__1 = *n - 1;
07591 i__2 = *n - 1;
07592 i__3 = *n - 1;
07593 nb = ilaenv_(&c__1, "SORGQR", " ", &i__1, &i__2, &i__3, &c_n1, (
07594 ftnlen)6, (ftnlen)1);
07595 }
07596 /* Computing MAX */
07597 i__1 = 1, i__2 = *n - 1;
07598 lwkopt = f2cmax(i__1,i__2) * nb;
07599 work[1] = (real) lwkopt;
07600 }
07601
07602 if (*info != 0) {
07603 i__1 = -(*info);
07604 xerbla_("SORGTR", &i__1);
07605 return 0;
07606 } else if (lquery) {
07607 return 0;
07608 }
07609
07610 /* Quick return if possible */
07611
07612 if (*n == 0) {
07613 work[1] = 1.f;
07614 return 0;
07615 }
07616
07617 if (upper) {
07618
07619 /* Q was determined by a call to SSYTRD with UPLO = 'U'
07620
07621 Shift the vectors which define the elementary reflectors one
07622 column to the left, and set the last row and column of Q to
07623 those of the unit matrix */
07624
07625 i__1 = *n - 1;
07626 for (j = 1; j <= i__1; ++j) {
07627 i__2 = j - 1;
07628 for (i__ = 1; i__ <= i__2; ++i__) {
07629 a_ref(i__, j) = a_ref(i__, j + 1);
07630 /* L10: */
07631 }
07632 a_ref(*n, j) = 0.f;
07633 /* L20: */
07634 }
07635 i__1 = *n - 1;
07636 for (i__ = 1; i__ <= i__1; ++i__) {
07637 a_ref(i__, *n) = 0.f;
07638 /* L30: */
07639 }
07640 a_ref(*n, *n) = 1.f;
07641
07642 /* Generate Q(1:n-1,1:n-1) */
07643
07644 i__1 = *n - 1;
07645 i__2 = *n - 1;
07646 i__3 = *n - 1;
07647 sorgql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1],
07648 lwork, &iinfo);
07649
07650 } else {
07651
07652 /* Q was determined by a call to SSYTRD with UPLO = 'L'.
07653
07654 Shift the vectors which define the elementary reflectors one
07655 column to the right, and set the first row and column of Q to
07656 those of the unit matrix */
07657
07658 for (j = *n; j >= 2; --j) {
07659 a_ref(1, j) = 0.f;
07660 i__1 = *n;
07661 for (i__ = j + 1; i__ <= i__1; ++i__) {
07662 a_ref(i__, j) = a_ref(i__, j - 1);
07663 /* L40: */
07664 }
07665 /* L50: */
07666 }
07667 a_ref(1, 1) = 1.f;
07668 i__1 = *n;
07669 for (i__ = 2; i__ <= i__1; ++i__) {
07670 a_ref(i__, 1) = 0.f;
07671 /* L60: */
07672 }
07673 if (*n > 1) {
07674
07675 /* Generate Q(2:n,2:n) */
07676
07677 i__1 = *n - 1;
07678 i__2 = *n - 1;
07679 i__3 = *n - 1;
07680 sorgqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &work[1],
07681 lwork, &iinfo);
07682 }
07683 }
07684 work[1] = (real) lwkopt;
07685 return 0;
07686
07687 /* End of SORGTR */
07688
07689 } /* sorgtr_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 24823 of file lapackblas.cpp. References a_ref, c__1, c___ref, f2cmax, integer, logical, lsame_(), real, slarf_(), and xerbla_(). Referenced by sormqr_(). 24826 {
24827 /* -- LAPACK routine (version 3.0) --
24828 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
24829 Courant Institute, Argonne National Lab, and Rice University
24830 February 29, 1992
24831
24832
24833 Purpose
24834 =======
24835
24836 SORM2R overwrites the general real m by n matrix C with
24837
24838 Q * C if SIDE = 'L' and TRANS = 'N', or
24839
24840 Q'* C if SIDE = 'L' and TRANS = 'T', or
24841
24842 C * Q if SIDE = 'R' and TRANS = 'N', or
24843
24844 C * Q' if SIDE = 'R' and TRANS = 'T',
24845
24846 where Q is a real orthogonal matrix defined as the product of k
24847 elementary reflectors
24848
24849 Q = H(1) H(2) . . . H(k)
24850
24851 as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
24852 if SIDE = 'R'.
24853
24854 Arguments
24855 =========
24856
24857 SIDE (input) CHARACTER*1
24858 = 'L': apply Q or Q' from the Left
24859 = 'R': apply Q or Q' from the Right
24860
24861 TRANS (input) CHARACTER*1
24862 = 'N': apply Q (No transpose)
24863 = 'T': apply Q' (Transpose)
24864
24865 M (input) INTEGER
24866 The number of rows of the matrix C. M >= 0.
24867
24868 N (input) INTEGER
24869 The number of columns of the matrix C. N >= 0.
24870
24871 K (input) INTEGER
24872 The number of elementary reflectors whose product defines
24873 the matrix Q.
24874 If SIDE = 'L', M >= K >= 0;
24875 if SIDE = 'R', N >= K >= 0.
24876
24877 A (input) REAL array, dimension (LDA,K)
24878 The i-th column must contain the vector which defines the
24879 elementary reflector H(i), for i = 1,2,...,k, as returned by
24880 SGEQRF in the first k columns of its array argument A.
24881 A is modified by the routine but restored on exit.
24882
24883 LDA (input) INTEGER
24884 The leading dimension of the array A.
24885 If SIDE = 'L', LDA >= max(1,M);
24886 if SIDE = 'R', LDA >= max(1,N).
24887
24888 TAU (input) REAL array, dimension (K)
24889 TAU(i) must contain the scalar factor of the elementary
24890 reflector H(i), as returned by SGEQRF.
24891
24892 C (input/output) REAL array, dimension (LDC,N)
24893 On entry, the m by n matrix C.
24894 On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
24895
24896 LDC (input) INTEGER
24897 The leading dimension of the array C. LDC >= max(1,M).
24898
24899 WORK (workspace) REAL array, dimension
24900 (N) if SIDE = 'L',
24901 (M) if SIDE = 'R'
24902
24903 INFO (output) INTEGER
24904 = 0: successful exit
24905 < 0: if INFO = -i, the i-th argument had an illegal value
24906
24907 =====================================================================
24908
24909
24910 Test the input arguments
24911
24912 Parameter adjustments */
24913 /* Table of constant values */
24914 static integer c__1 = 1;
24915
24916 /* System generated locals */
24917 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
24918 /* Local variables */
24919 static logical left;
24920 static integer i__;
24921 extern logical lsame_(const char *, const char *);
24922 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *,
24923 integer *, real *, real *, integer *, real *);
24924 static integer i1, i2, i3, ic, jc, mi, ni, nq;
24925 extern /* Subroutine */ int xerbla_(const char *, integer *);
24926 static logical notran;
24927 static real aii;
24928 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
24929 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
24930
24931
24932 a_dim1 = *lda;
24933 a_offset = 1 + a_dim1 * 1;
24934 a -= a_offset;
24935 --tau;
24936 c_dim1 = *ldc;
24937 c_offset = 1 + c_dim1 * 1;
24938 c__ -= c_offset;
24939 --work;
24940
24941 /* Function Body */
24942 *info = 0;
24943 left = lsame_(side, "L");
24944 notran = lsame_(trans, "N");
24945
24946 /* NQ is the order of Q */
24947
24948 if (left) {
24949 nq = *m;
24950 } else {
24951 nq = *n;
24952 }
24953 if (! left && ! lsame_(side, "R")) {
24954 *info = -1;
24955 } else if (! notran && ! lsame_(trans, "T")) {
24956 *info = -2;
24957 } else if (*m < 0) {
24958 *info = -3;
24959 } else if (*n < 0) {
24960 *info = -4;
24961 } else if (*k < 0 || *k > nq) {
24962 *info = -5;
24963 } else if (*lda < f2cmax(1,nq)) {
24964 *info = -7;
24965 } else if (*ldc < f2cmax(1,*m)) {
24966 *info = -10;
24967 }
24968 if (*info != 0) {
24969 i__1 = -(*info);
24970 xerbla_("SORM2R", &i__1);
24971 return 0;
24972 }
24973
24974 /* Quick return if possible */
24975
24976 if (*m == 0 || *n == 0 || *k == 0) {
24977 return 0;
24978 }
24979
24980 if (left && ! notran || ! left && notran) {
24981 i1 = 1;
24982 i2 = *k;
24983 i3 = 1;
24984 } else {
24985 i1 = *k;
24986 i2 = 1;
24987 i3 = -1;
24988 }
24989
24990 if (left) {
24991 ni = *n;
24992 jc = 1;
24993 } else {
24994 mi = *m;
24995 ic = 1;
24996 }
24997
24998 i__1 = i2;
24999 i__2 = i3;
25000 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
25001 if (left) {
25002
25003 /* H(i) is applied to C(i:m,1:n) */
25004
25005 mi = *m - i__ + 1;
25006 ic = i__;
25007 } else {
25008
25009 /* H(i) is applied to C(1:m,i:n) */
25010
25011 ni = *n - i__ + 1;
25012 jc = i__;
25013 }
25014
25015 /* Apply H(i) */
25016
25017 aii = a_ref(i__, i__);
25018 a_ref(i__, i__) = 1.f;
25019 slarf_(side, &mi, &ni, &a_ref(i__, i__), &c__1, &tau[i__], &c___ref(
25020 ic, jc), ldc, &work[1]);
25021 a_ref(i__, i__) = aii;
25022 /* L10: */
25023 }
25024 return 0;
25025
25026 /* End of SORM2R */
25027
25028 } /* sorm2r_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 21618 of file lapackblas.cpp. References a_ref, c__1, c__2, c___ref, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, lsame_(), real, s_cat(), sormlq_(), sormqr_(), and xerbla_(). Referenced by sgesvd_(). 21621 {
21622 /* -- LAPACK routine (version 3.0) --
21623 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21624 Courant Institute, Argonne National Lab, and Rice University
21625 June 30, 1999
21626
21627
21628 Purpose
21629 =======
21630
21631 If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C
21632 with
21633 SIDE = 'L' SIDE = 'R'
21634 TRANS = 'N': Q * C C * Q
21635 TRANS = 'T': Q**T * C C * Q**T
21636
21637 If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
21638 with
21639 SIDE = 'L' SIDE = 'R'
21640 TRANS = 'N': P * C C * P
21641 TRANS = 'T': P**T * C C * P**T
21642
21643 Here Q and P**T are the orthogonal matrices determined by SGEBRD when
21644 reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
21645 P**T are defined as products of elementary reflectors H(i) and G(i)
21646 respectively.
21647
21648 Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
21649 order of the orthogonal matrix Q or P**T that is applied.
21650
21651 If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
21652 if nq >= k, Q = H(1) H(2) . . . H(k);
21653 if nq < k, Q = H(1) H(2) . . . H(nq-1).
21654
21655 If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
21656 if k < nq, P = G(1) G(2) . . . G(k);
21657 if k >= nq, P = G(1) G(2) . . . G(nq-1).
21658
21659 Arguments
21660 =========
21661
21662 VECT (input) CHARACTER*1
21663 = 'Q': apply Q or Q**T;
21664 = 'P': apply P or P**T.
21665
21666 SIDE (input) CHARACTER*1
21667 = 'L': apply Q, Q**T, P or P**T from the Left;
21668 = 'R': apply Q, Q**T, P or P**T from the Right.
21669
21670 TRANS (input) CHARACTER*1
21671 = 'N': No transpose, apply Q or P;
21672 = 'T': Transpose, apply Q**T or P**T.
21673
21674 M (input) INTEGER
21675 The number of rows of the matrix C. M >= 0.
21676
21677 N (input) INTEGER
21678 The number of columns of the matrix C. N >= 0.
21679
21680 K (input) INTEGER
21681 If VECT = 'Q', the number of columns in the original
21682 matrix reduced by SGEBRD.
21683 If VECT = 'P', the number of rows in the original
21684 matrix reduced by SGEBRD.
21685 K >= 0.
21686
21687 A (input) REAL array, dimension
21688 (LDA,min(nq,K)) if VECT = 'Q'
21689 (LDA,nq) if VECT = 'P'
21690 The vectors which define the elementary reflectors H(i) and
21691 G(i), whose products determine the matrices Q and P, as
21692 returned by SGEBRD.
21693
21694 LDA (input) INTEGER
21695 The leading dimension of the array A.
21696 If VECT = 'Q', LDA >= max(1,nq);
21697 if VECT = 'P', LDA >= max(1,min(nq,K)).
21698
21699 TAU (input) REAL array, dimension (min(nq,K))
21700 TAU(i) must contain the scalar factor of the elementary
21701 reflector H(i) or G(i) which determines Q or P, as returned
21702 by SGEBRD in the array argument TAUQ or TAUP.
21703
21704 C (input/output) REAL array, dimension (LDC,N)
21705 On entry, the M-by-N matrix C.
21706 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
21707 or P*C or P**T*C or C*P or C*P**T.
21708
21709 LDC (input) INTEGER
21710 The leading dimension of the array C. LDC >= max(1,M).
21711
21712 WORK (workspace/output) REAL array, dimension (LWORK)
21713 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
21714
21715 LWORK (input) INTEGER
21716 The dimension of the array WORK.
21717 If SIDE = 'L', LWORK >= max(1,N);
21718 if SIDE = 'R', LWORK >= max(1,M).
21719 For optimum performance LWORK >= N*NB if SIDE = 'L', and
21720 LWORK >= M*NB if SIDE = 'R', where NB is the optimal
21721 blocksize.
21722
21723 If LWORK = -1, then a workspace query is assumed; the routine
21724 only calculates the optimal size of the WORK array, returns
21725 this value as the first entry of the WORK array, and no error
21726 message related to LWORK is issued by XERBLA.
21727
21728 INFO (output) INTEGER
21729 = 0: successful exit
21730 < 0: if INFO = -i, the i-th argument had an illegal value
21731
21732 =====================================================================
21733
21734
21735 Test the input arguments
21736
21737 Parameter adjustments */
21738 /* Table of constant values */
21739 static integer c__1 = 1;
21740 static integer c_n1 = -1;
21741 static integer c__2 = 2;
21742
21743 typedef const char *address;
21744
21745 /* System generated locals */
21746 address a__1[2];
21747 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
21748 char ch__1[2];
21749 /* Builtin functions
21750 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen);
21751 /* Local variables */
21752 static logical left;
21753 extern logical lsame_(const char *, const char *);
21754 static integer iinfo, i1, i2, nb, mi, ni, nq, nw;
21755 extern /* Subroutine */ int xerbla_(const char *, integer *);
21756 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
21757 integer *, integer *, ftnlen, ftnlen);
21758 static logical notran, applyq;
21759 static char transt[1];
21760 extern /* Subroutine */ int sormlq_(const char *, const char *, integer *, integer *,
21761 integer *, real *, integer *, real *, real *, integer *, real *,
21762 integer *, integer *);
21763 static integer lwkopt;
21764 static logical lquery;
21765 extern /* Subroutine */ int sormqr_(const char *, const char *, integer *, integer *,
21766 integer *, real *, integer *, real *, real *, integer *, real *,
21767 integer *, integer *);
21768 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
21769 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
21770
21771
21772 a_dim1 = *lda;
21773 a_offset = 1 + a_dim1 * 1;
21774 a -= a_offset;
21775 --tau;
21776 c_dim1 = *ldc;
21777 c_offset = 1 + c_dim1 * 1;
21778 c__ -= c_offset;
21779 --work;
21780
21781 /* Function Body */
21782 *info = 0;
21783 applyq = lsame_(vect, "Q");
21784 left = lsame_(side, "L");
21785 notran = lsame_(trans, "N");
21786 lquery = *lwork == -1;
21787
21788 /* NQ is the order of Q or P and NW is the minimum dimension of WORK */
21789
21790 if (left) {
21791 nq = *m;
21792 nw = *n;
21793 } else {
21794 nq = *n;
21795 nw = *m;
21796 }
21797 if (! applyq && ! lsame_(vect, "P")) {
21798 *info = -1;
21799 } else if (! left && ! lsame_(side, "R")) {
21800 *info = -2;
21801 } else if (! notran && ! lsame_(trans, "T")) {
21802 *info = -3;
21803 } else if (*m < 0) {
21804 *info = -4;
21805 } else if (*n < 0) {
21806 *info = -5;
21807 } else if (*k < 0) {
21808 *info = -6;
21809 } else /* if(complicated condition) */ {
21810 /* Computing MAX */
21811 i__1 = 1, i__2 = f2cmin(nq,*k);
21812 if (applyq && *lda < f2cmax(1,nq) || ! applyq && *lda < f2cmax(i__1,i__2)) {
21813 *info = -8;
21814 } else if (*ldc < f2cmax(1,*m)) {
21815 *info = -11;
21816 } else if (*lwork < f2cmax(1,nw) && ! lquery) {
21817 *info = -13;
21818 }
21819 }
21820
21821 if (*info == 0) {
21822 if (applyq) {
21823 if (left) {
21824 /* Writing concatenation */
21825 i__3[0] = 1, a__1[0] = side;
21826 i__3[1] = 1, a__1[1] = trans;
21827 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
21828 i__1 = *m - 1;
21829 i__2 = *m - 1;
21830 nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
21831 ftnlen)6, (ftnlen)2);
21832 } else {
21833 /* Writing concatenation */
21834 i__3[0] = 1, a__1[0] = side;
21835 i__3[1] = 1, a__1[1] = trans;
21836 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
21837 i__1 = *n - 1;
21838 i__2 = *n - 1;
21839 nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
21840 ftnlen)6, (ftnlen)2);
21841 }
21842 } else {
21843 if (left) {
21844 /* Writing concatenation */
21845 i__3[0] = 1, a__1[0] = side;
21846 i__3[1] = 1, a__1[1] = trans;
21847 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
21848 i__1 = *m - 1;
21849 i__2 = *m - 1;
21850 nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
21851 ftnlen)6, (ftnlen)2);
21852 } else {
21853 /* Writing concatenation */
21854 i__3[0] = 1, a__1[0] = side;
21855 i__3[1] = 1, a__1[1] = trans;
21856 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
21857 i__1 = *n - 1;
21858 i__2 = *n - 1;
21859 nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
21860 ftnlen)6, (ftnlen)2);
21861 }
21862 }
21863 lwkopt = f2cmax(1,nw) * nb;
21864 work[1] = (real) lwkopt;
21865 }
21866
21867 if (*info != 0) {
21868 i__1 = -(*info);
21869 xerbla_("SORMBR", &i__1);
21870 return 0;
21871 } else if (lquery) {
21872 return 0;
21873 }
21874
21875 /* Quick return if possible */
21876
21877 work[1] = 1.f;
21878 if (*m == 0 || *n == 0) {
21879 return 0;
21880 }
21881
21882 if (applyq) {
21883
21884 /* Apply Q */
21885
21886 if (nq >= *k) {
21887
21888 /* Q was determined by a call to SGEBRD with nq >= k */
21889
21890 sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
21891 c_offset], ldc, &work[1], lwork, &iinfo);
21892 } else if (nq > 1) {
21893
21894 /* Q was determined by a call to SGEBRD with nq < k */
21895
21896 if (left) {
21897 mi = *m - 1;
21898 ni = *n;
21899 i1 = 2;
21900 i2 = 1;
21901 } else {
21902 mi = *m;
21903 ni = *n - 1;
21904 i1 = 1;
21905 i2 = 2;
21906 }
21907 i__1 = nq - 1;
21908 sormqr_(side, trans, &mi, &ni, &i__1, &a_ref(2, 1), lda, &tau[1],
21909 &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo);
21910 }
21911 } else {
21912
21913 /* Apply P */
21914
21915 if (notran) {
21916 *(unsigned char *)transt = 'T';
21917 } else {
21918 *(unsigned char *)transt = 'N';
21919 }
21920 if (nq > *k) {
21921
21922 /* P was determined by a call to SGEBRD with nq > k */
21923
21924 sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
21925 c_offset], ldc, &work[1], lwork, &iinfo);
21926 } else if (nq > 1) {
21927
21928 /* P was determined by a call to SGEBRD with nq <= k */
21929
21930 if (left) {
21931 mi = *m - 1;
21932 ni = *n;
21933 i1 = 2;
21934 i2 = 1;
21935 } else {
21936 mi = *m;
21937 ni = *n - 1;
21938 i1 = 1;
21939 i2 = 2;
21940 }
21941 i__1 = nq - 1;
21942 sormlq_(side, transt, &mi, &ni, &i__1, &a_ref(1, 2), lda, &tau[1],
21943 &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo);
21944 }
21945 }
21946 work[1] = (real) lwkopt;
21947 return 0;
21948
21949 /* End of SORMBR */
21950
21951 } /* sormbr_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 24069 of file lapackblas.cpp. References a_ref, c___ref, f2cmax, integer, logical, lsame_(), real, slarf_(), and xerbla_(). Referenced by sormlq_(). 24072 {
24073 /* -- LAPACK routine (version 3.0) --
24074 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
24075 Courant Institute, Argonne National Lab, and Rice University
24076 February 29, 1992
24077
24078
24079 Purpose
24080 =======
24081
24082 SORML2 overwrites the general real m by n matrix C with
24083
24084 Q * C if SIDE = 'L' and TRANS = 'N', or
24085
24086 Q'* C if SIDE = 'L' and TRANS = 'T', or
24087
24088 C * Q if SIDE = 'R' and TRANS = 'N', or
24089
24090 C * Q' if SIDE = 'R' and TRANS = 'T',
24091
24092 where Q is a real orthogonal matrix defined as the product of k
24093 elementary reflectors
24094
24095 Q = H(k) . . . H(2) H(1)
24096
24097 as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n
24098 if SIDE = 'R'.
24099
24100 Arguments
24101 =========
24102
24103 SIDE (input) CHARACTER*1
24104 = 'L': apply Q or Q' from the Left
24105 = 'R': apply Q or Q' from the Right
24106
24107 TRANS (input) CHARACTER*1
24108 = 'N': apply Q (No transpose)
24109 = 'T': apply Q' (Transpose)
24110
24111 M (input) INTEGER
24112 The number of rows of the matrix C. M >= 0.
24113
24114 N (input) INTEGER
24115 The number of columns of the matrix C. N >= 0.
24116
24117 K (input) INTEGER
24118 The number of elementary reflectors whose product defines
24119 the matrix Q.
24120 If SIDE = 'L', M >= K >= 0;
24121 if SIDE = 'R', N >= K >= 0.
24122
24123 A (input) REAL array, dimension
24124 (LDA,M) if SIDE = 'L',
24125 (LDA,N) if SIDE = 'R'
24126 The i-th row must contain the vector which defines the
24127 elementary reflector H(i), for i = 1,2,...,k, as returned by
24128 SGELQF in the first k rows of its array argument A.
24129 A is modified by the routine but restored on exit.
24130
24131 LDA (input) INTEGER
24132 The leading dimension of the array A. LDA >= max(1,K).
24133
24134 TAU (input) REAL array, dimension (K)
24135 TAU(i) must contain the scalar factor of the elementary
24136 reflector H(i), as returned by SGELQF.
24137
24138 C (input/output) REAL array, dimension (LDC,N)
24139 On entry, the m by n matrix C.
24140 On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
24141
24142 LDC (input) INTEGER
24143 The leading dimension of the array C. LDC >= max(1,M).
24144
24145 WORK (workspace) REAL array, dimension
24146 (N) if SIDE = 'L',
24147 (M) if SIDE = 'R'
24148
24149 INFO (output) INTEGER
24150 = 0: successful exit
24151 < 0: if INFO = -i, the i-th argument had an illegal value
24152
24153 =====================================================================
24154
24155
24156 Test the input arguments
24157
24158 Parameter adjustments */
24159 /* System generated locals */
24160 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
24161 /* Local variables */
24162 static logical left;
24163 static integer i__;
24164 extern logical lsame_(const char *, const char *);
24165 extern /* Subroutine */ int slarf_(const char *, integer *, integer *, real *,
24166 integer *, real *, real *, integer *, real *);
24167 static integer i1, i2, i3, ic, jc, mi, ni, nq;
24168 extern /* Subroutine */ int xerbla_(const char *, integer *);
24169 static logical notran;
24170 static real aii;
24171 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
24172 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
24173
24174 a_dim1 = *lda;
24175 a_offset = 1 + a_dim1 * 1;
24176 a -= a_offset;
24177 --tau;
24178 c_dim1 = *ldc;
24179 c_offset = 1 + c_dim1 * 1;
24180 c__ -= c_offset;
24181 --work;
24182
24183 /* Function Body */
24184 *info = 0;
24185 left = lsame_(side, "L");
24186 notran = lsame_(trans, "N");
24187
24188 /* NQ is the order of Q */
24189
24190 if (left) {
24191 nq = *m;
24192 } else {
24193 nq = *n;
24194 }
24195 if (! left && ! lsame_(side, "R")) {
24196 *info = -1;
24197 } else if (! notran && ! lsame_(trans, "T")) {
24198 *info = -2;
24199 } else if (*m < 0) {
24200 *info = -3;
24201 } else if (*n < 0) {
24202 *info = -4;
24203 } else if (*k < 0 || *k > nq) {
24204 *info = -5;
24205 } else if (*lda < f2cmax(1,*k)) {
24206 *info = -7;
24207 } else if (*ldc < f2cmax(1,*m)) {
24208 *info = -10;
24209 }
24210 if (*info != 0) {
24211 i__1 = -(*info);
24212 xerbla_("SORML2", &i__1);
24213 return 0;
24214 }
24215
24216 /* Quick return if possible */
24217
24218 if (*m == 0 || *n == 0 || *k == 0) {
24219 return 0;
24220 }
24221
24222 if (left && notran || ! left && ! notran) {
24223 i1 = 1;
24224 i2 = *k;
24225 i3 = 1;
24226 } else {
24227 i1 = *k;
24228 i2 = 1;
24229 i3 = -1;
24230 }
24231
24232 if (left) {
24233 ni = *n;
24234 jc = 1;
24235 } else {
24236 mi = *m;
24237 ic = 1;
24238 }
24239
24240 i__1 = i2;
24241 i__2 = i3;
24242 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
24243 if (left) {
24244
24245 /* H(i) is applied to C(i:m,1:n) */
24246
24247 mi = *m - i__ + 1;
24248 ic = i__;
24249 } else {
24250
24251 /* H(i) is applied to C(1:m,i:n) */
24252
24253 ni = *n - i__ + 1;
24254 jc = i__;
24255 }
24256
24257 /* Apply H(i) */
24258
24259 aii = a_ref(i__, i__);
24260 a_ref(i__, i__) = 1.f;
24261 slarf_(side, &mi, &ni, &a_ref(i__, i__), lda, &tau[i__], &c___ref(ic,
24262 jc), ldc, &work[1]);
24263 a_ref(i__, i__) = aii;
24264 /* L10: */
24265 }
24266 return 0;
24267
24268 /* End of SORML2 */
24269
24270 } /* sorml2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 22190 of file lapackblas.cpp. References a_ref, c__1, c__2, c___ref, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, lsame_(), real, s_cat(), slarfb_(), slarft_(), sorml2_(), t, and xerbla_(). Referenced by sormbr_(). 22193 {
22194 /* -- LAPACK routine (version 3.0) --
22195 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22196 Courant Institute, Argonne National Lab, and Rice University
22197 June 30, 1999
22198
22199
22200 Purpose
22201 =======
22202
22203 SORMLQ overwrites the general real M-by-N matrix C with
22204
22205 SIDE = 'L' SIDE = 'R'
22206 TRANS = 'N': Q * C C * Q
22207 TRANS = 'T': Q**T * C C * Q**T
22208
22209 where Q is a real orthogonal matrix defined as the product of k
22210 elementary reflectors
22211
22212 Q = H(k) . . . H(2) H(1)
22213
22214 as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N
22215 if SIDE = 'R'.
22216
22217 Arguments
22218 =========
22219
22220 SIDE (input) CHARACTER*1
22221 = 'L': apply Q or Q**T from the Left;
22222 = 'R': apply Q or Q**T from the Right.
22223
22224 TRANS (input) CHARACTER*1
22225 = 'N': No transpose, apply Q;
22226 = 'T': Transpose, apply Q**T.
22227
22228 M (input) INTEGER
22229 The number of rows of the matrix C. M >= 0.
22230
22231 N (input) INTEGER
22232 The number of columns of the matrix C. N >= 0.
22233
22234 K (input) INTEGER
22235 The number of elementary reflectors whose product defines
22236 the matrix Q.
22237 If SIDE = 'L', M >= K >= 0;
22238 if SIDE = 'R', N >= K >= 0.
22239
22240 A (input) REAL array, dimension
22241 (LDA,M) if SIDE = 'L',
22242 (LDA,N) if SIDE = 'R'
22243 The i-th row must contain the vector which defines the
22244 elementary reflector H(i), for i = 1,2,...,k, as returned by
22245 SGELQF in the first k rows of its array argument A.
22246 A is modified by the routine but restored on exit.
22247
22248 LDA (input) INTEGER
22249 The leading dimension of the array A. LDA >= max(1,K).
22250
22251 TAU (input) REAL array, dimension (K)
22252 TAU(i) must contain the scalar factor of the elementary
22253 reflector H(i), as returned by SGELQF.
22254
22255 C (input/output) REAL array, dimension (LDC,N)
22256 On entry, the M-by-N matrix C.
22257 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
22258
22259 LDC (input) INTEGER
22260 The leading dimension of the array C. LDC >= max(1,M).
22261
22262 WORK (workspace/output) REAL array, dimension (LWORK)
22263 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
22264
22265 LWORK (input) INTEGER
22266 The dimension of the array WORK.
22267 If SIDE = 'L', LWORK >= max(1,N);
22268 if SIDE = 'R', LWORK >= max(1,M).
22269 For optimum performance LWORK >= N*NB if SIDE = 'L', and
22270 LWORK >= M*NB if SIDE = 'R', where NB is the optimal
22271 blocksize.
22272
22273 If LWORK = -1, then a workspace query is assumed; the routine
22274 only calculates the optimal size of the WORK array, returns
22275 this value as the first entry of the WORK array, and no error
22276 message related to LWORK is issued by XERBLA.
22277
22278 INFO (output) INTEGER
22279 = 0: successful exit
22280 < 0: if INFO = -i, the i-th argument had an illegal value
22281
22282 =====================================================================
22283
22284
22285 Test the input arguments
22286
22287 Parameter adjustments */
22288 /* Table of constant values */
22289 static integer c__1 = 1;
22290 static integer c_n1 = -1;
22291 static integer c__2 = 2;
22292 static integer c__65 = 65;
22293
22294 typedef const char *address;
22295 /* System generated locals */
22296 address a__1[2];
22297 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
22298 i__5;
22299 char ch__1[2];
22300 /* Builtin functions
22301 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen);
22302 /* Local variables */
22303 static logical left;
22304 static integer i__;
22305 static real t[4160] /* was [65][64] */;
22306 extern logical lsame_(const char *, const char *);
22307 static integer nbmin, iinfo, i1, i2, i3, ib, ic, jc;
22308 extern /* Subroutine */ int sorml2_(const char *, const char *, integer *, integer *,
22309 integer *, real *, integer *, real *, real *, integer *, real *,
22310 integer *);
22311 static integer nb, mi, ni, nq, nw;
22312 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
22313 integer *, integer *, integer *, real *, integer *, real *,
22314 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
22315 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
22316 integer *, integer *, ftnlen, ftnlen);
22317 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
22318 real *, integer *, real *, real *, integer *);
22319 static logical notran;
22320 static integer ldwork;
22321 static char transt[1];
22322 static integer lwkopt;
22323 static logical lquery;
22324 static integer iws;
22325 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
22326 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
22327
22328
22329 a_dim1 = *lda;
22330 a_offset = 1 + a_dim1 * 1;
22331 a -= a_offset;
22332 --tau;
22333 c_dim1 = *ldc;
22334 c_offset = 1 + c_dim1 * 1;
22335 c__ -= c_offset;
22336 --work;
22337
22338 /* Function Body */
22339 *info = 0;
22340 left = lsame_(side, "L");
22341 notran = lsame_(trans, "N");
22342 lquery = *lwork == -1;
22343
22344 /* NQ is the order of Q and NW is the minimum dimension of WORK */
22345
22346 if (left) {
22347 nq = *m;
22348 nw = *n;
22349 } else {
22350 nq = *n;
22351 nw = *m;
22352 }
22353 if (! left && ! lsame_(side, "R")) {
22354 *info = -1;
22355 } else if (! notran && ! lsame_(trans, "T")) {
22356 *info = -2;
22357 } else if (*m < 0) {
22358 *info = -3;
22359 } else if (*n < 0) {
22360 *info = -4;
22361 } else if (*k < 0 || *k > nq) {
22362 *info = -5;
22363 } else if (*lda < f2cmax(1,*k)) {
22364 *info = -7;
22365 } else if (*ldc < f2cmax(1,*m)) {
22366 *info = -10;
22367 } else if (*lwork < f2cmax(1,nw) && ! lquery) {
22368 *info = -12;
22369 }
22370
22371 if (*info == 0) {
22372
22373 /* Determine the block size. NB may be at most NBMAX, where NBMAX
22374 is used to define the local array T.
22375
22376 Computing MIN
22377 Writing concatenation */
22378 i__3[0] = 1, a__1[0] = side;
22379 i__3[1] = 1, a__1[1] = trans;
22380 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
22381 i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, (
22382 ftnlen)6, (ftnlen)2);
22383 nb = f2cmin(i__1,i__2);
22384 lwkopt = f2cmax(1,nw) * nb;
22385 work[1] = (real) lwkopt;
22386 }
22387
22388 if (*info != 0) {
22389 i__1 = -(*info);
22390 xerbla_("SORMLQ", &i__1);
22391 return 0;
22392 } else if (lquery) {
22393 return 0;
22394 }
22395
22396 /* Quick return if possible */
22397
22398 if (*m == 0 || *n == 0 || *k == 0) {
22399 work[1] = 1.f;
22400 return 0;
22401 }
22402
22403 nbmin = 2;
22404 ldwork = nw;
22405 if (nb > 1 && nb < *k) {
22406 iws = nw * nb;
22407 if (*lwork < iws) {
22408 nb = *lwork / ldwork;
22409 /* Computing MAX
22410 Writing concatenation */
22411 i__3[0] = 1, a__1[0] = side;
22412 i__3[1] = 1, a__1[1] = trans;
22413 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
22414 i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, (
22415 ftnlen)6, (ftnlen)2);
22416 nbmin = f2cmax(i__1,i__2);
22417 }
22418 } else {
22419 iws = nw;
22420 }
22421
22422 if (nb < nbmin || nb >= *k) {
22423
22424 /* Use unblocked code */
22425
22426 sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
22427 c_offset], ldc, &work[1], &iinfo);
22428 } else {
22429
22430 /* Use blocked code */
22431
22432 if (left && notran || ! left && ! notran) {
22433 i1 = 1;
22434 i2 = *k;
22435 i3 = nb;
22436 } else {
22437 i1 = (*k - 1) / nb * nb + 1;
22438 i2 = 1;
22439 i3 = -nb;
22440 }
22441
22442 if (left) {
22443 ni = *n;
22444 jc = 1;
22445 } else {
22446 mi = *m;
22447 ic = 1;
22448 }
22449
22450 if (notran) {
22451 *(unsigned char *)transt = 'T';
22452 } else {
22453 *(unsigned char *)transt = 'N';
22454 }
22455
22456 i__1 = i2;
22457 i__2 = i3;
22458 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
22459 /* Computing MIN */
22460 i__4 = nb, i__5 = *k - i__ + 1;
22461 ib = f2cmin(i__4,i__5);
22462
22463 /* Form the triangular factor of the block reflector
22464 H = H(i) H(i+1) . . . H(i+ib-1) */
22465
22466 i__4 = nq - i__ + 1;
22467 slarft_("Forward", "Rowwise", &i__4, &ib, &a_ref(i__, i__), lda, &
22468 tau[i__], t, &c__65);
22469 if (left) {
22470
22471 /* H or H' is applied to C(i:m,1:n) */
22472
22473 mi = *m - i__ + 1;
22474 ic = i__;
22475 } else {
22476
22477 /* H or H' is applied to C(1:m,i:n) */
22478
22479 ni = *n - i__ + 1;
22480 jc = i__;
22481 }
22482
22483 /* Apply H or H' */
22484
22485 slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a_ref(
22486 i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &work[1]
22487 , &ldwork);
22488 /* L10: */
22489 }
22490 }
22491 work[1] = (real) lwkopt;
22492 return 0;
22493
22494 /* End of SORMLQ */
22495
22496 } /* sormlq_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 22503 of file lapackblas.cpp. References a_ref, c__1, c__2, c___ref, c_n1, f2cmax, f2cmin, ftnlen, ilaenv_(), integer, logical, lsame_(), real, s_cat(), slarfb_(), slarft_(), sorm2r_(), t, and xerbla_(). Referenced by sormbr_(). 22506 {
22507 /* -- LAPACK routine (version 3.0) --
22508 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22509 Courant Institute, Argonne National Lab, and Rice University
22510 June 30, 1999
22511
22512
22513 Purpose
22514 =======
22515
22516 SORMQR overwrites the general real M-by-N matrix C with
22517
22518 SIDE = 'L' SIDE = 'R'
22519 TRANS = 'N': Q * C C * Q
22520 TRANS = 'T': Q**T * C C * Q**T
22521
22522 where Q is a real orthogonal matrix defined as the product of k
22523 elementary reflectors
22524
22525 Q = H(1) H(2) . . . H(k)
22526
22527 as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
22528 if SIDE = 'R'.
22529
22530 Arguments
22531 =========
22532
22533 SIDE (input) CHARACTER*1
22534 = 'L': apply Q or Q**T from the Left;
22535 = 'R': apply Q or Q**T from the Right.
22536
22537 TRANS (input) CHARACTER*1
22538 = 'N': No transpose, apply Q;
22539 = 'T': Transpose, apply Q**T.
22540
22541 M (input) INTEGER
22542 The number of rows of the matrix C. M >= 0.
22543
22544 N (input) INTEGER
22545 The number of columns of the matrix C. N >= 0.
22546
22547 K (input) INTEGER
22548 The number of elementary reflectors whose product defines
22549 the matrix Q.
22550 If SIDE = 'L', M >= K >= 0;
22551 if SIDE = 'R', N >= K >= 0.
22552
22553 A (input) REAL array, dimension (LDA,K)
22554 The i-th column must contain the vector which defines the
22555 elementary reflector H(i), for i = 1,2,...,k, as returned by
22556 SGEQRF in the first k columns of its array argument A.
22557 A is modified by the routine but restored on exit.
22558
22559 LDA (input) INTEGER
22560 The leading dimension of the array A.
22561 If SIDE = 'L', LDA >= max(1,M);
22562 if SIDE = 'R', LDA >= max(1,N).
22563
22564 TAU (input) REAL array, dimension (K)
22565 TAU(i) must contain the scalar factor of the elementary
22566 reflector H(i), as returned by SGEQRF.
22567
22568 C (input/output) REAL array, dimension (LDC,N)
22569 On entry, the M-by-N matrix C.
22570 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
22571
22572 LDC (input) INTEGER
22573 The leading dimension of the array C. LDC >= max(1,M).
22574
22575 WORK (workspace/output) REAL array, dimension (LWORK)
22576 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
22577
22578 LWORK (input) INTEGER
22579 The dimension of the array WORK.
22580 If SIDE = 'L', LWORK >= max(1,N);
22581 if SIDE = 'R', LWORK >= max(1,M).
22582 For optimum performance LWORK >= N*NB if SIDE = 'L', and
22583 LWORK >= M*NB if SIDE = 'R', where NB is the optimal
22584 blocksize.
22585
22586 If LWORK = -1, then a workspace query is assumed; the routine
22587 only calculates the optimal size of the WORK array, returns
22588 this value as the first entry of the WORK array, and no error
22589 message related to LWORK is issued by XERBLA.
22590
22591 INFO (output) INTEGER
22592 = 0: successful exit
22593 < 0: if INFO = -i, the i-th argument had an illegal value
22594
22595 =====================================================================
22596
22597
22598 Test the input arguments
22599
22600 Parameter adjustments */
22601 /* Table of constant values */
22602 static integer c__1 = 1;
22603 static integer c_n1 = -1;
22604 static integer c__2 = 2;
22605 static integer c__65 = 65;
22606
22607 /* System generated locals */
22608 typedef const char *address;
22609 address a__1[2];
22610 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
22611 i__5;
22612 char ch__1[2];
22613 /* Builtin functions
22614 Subroutine */ int s_cat(char *, const char **, integer *, integer *, ftnlen);
22615 /* Local variables */
22616 static logical left;
22617 static integer i__;
22618 static real t[4160] /* was [65][64] */;
22619 extern logical lsame_(const char *, const char *);
22620 static integer nbmin, iinfo, i1, i2, i3, ib, ic, jc, nb;
22621 extern /* Subroutine */ int sorm2r_(const char *, const char *, integer *, integer *,
22622 integer *, real *, integer *, real *, real *, integer *, real *,
22623 integer *);
22624 static integer mi, ni, nq, nw;
22625 extern /* Subroutine */ int slarfb_(const char *, const char *, const char *, const char *,
22626 integer *, integer *, integer *, real *, integer *, real *,
22627 integer *, real *, integer *, real *, integer *), xerbla_(const char *, integer *);
22628 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
22629 integer *, integer *, ftnlen, ftnlen);
22630 extern /* Subroutine */ int slarft_(const char *, const char *, integer *, integer *,
22631 real *, integer *, real *, real *, integer *);
22632 static logical notran;
22633 static integer ldwork, lwkopt;
22634 static logical lquery;
22635 static integer iws;
22636 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
22637 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
22638
22639
22640 a_dim1 = *lda;
22641 a_offset = 1 + a_dim1 * 1;
22642 a -= a_offset;
22643 --tau;
22644 c_dim1 = *ldc;
22645 c_offset = 1 + c_dim1 * 1;
22646 c__ -= c_offset;
22647 --work;
22648
22649 /* Function Body */
22650 *info = 0;
22651 left = lsame_(side, "L");
22652 notran = lsame_(trans, "N");
22653 lquery = *lwork == -1;
22654
22655 /* NQ is the order of Q and NW is the minimum dimension of WORK */
22656
22657 if (left) {
22658 nq = *m;
22659 nw = *n;
22660 } else {
22661 nq = *n;
22662 nw = *m;
22663 }
22664 if (! left && ! lsame_(side, "R")) {
22665 *info = -1;
22666 } else if (! notran && ! lsame_(trans, "T")) {
22667 *info = -2;
22668 } else if (*m < 0) {
22669 *info = -3;
22670 } else if (*n < 0) {
22671 *info = -4;
22672 } else if (*k < 0 || *k > nq) {
22673 *info = -5;
22674 } else if (*lda < f2cmax(1,nq)) {
22675 *info = -7;
22676 } else if (*ldc < f2cmax(1,*m)) {
22677 *info = -10;
22678 } else if (*lwork < f2cmax(1,nw) && ! lquery) {
22679 *info = -12;
22680 }
22681
22682 if (*info == 0) {
22683
22684 /* Determine the block size. NB may be at most NBMAX, where NBMAX
22685 is used to define the local array T.
22686
22687 Computing MIN
22688 Writing concatenation */
22689 i__3[0] = 1, a__1[0] = side;
22690 i__3[1] = 1, a__1[1] = trans;
22691 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
22692 i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, (
22693 ftnlen)6, (ftnlen)2);
22694 nb = f2cmin(i__1,i__2);
22695 lwkopt = f2cmax(1,nw) * nb;
22696 work[1] = (real) lwkopt;
22697 }
22698
22699 if (*info != 0) {
22700 i__1 = -(*info);
22701 xerbla_("SORMQR", &i__1);
22702 return 0;
22703 } else if (lquery) {
22704 return 0;
22705 }
22706
22707 /* Quick return if possible */
22708
22709 if (*m == 0 || *n == 0 || *k == 0) {
22710 work[1] = 1.f;
22711 return 0;
22712 }
22713
22714 nbmin = 2;
22715 ldwork = nw;
22716 if (nb > 1 && nb < *k) {
22717 iws = nw * nb;
22718 if (*lwork < iws) {
22719 nb = *lwork / ldwork;
22720 /* Computing MAX
22721 Writing concatenation */
22722 i__3[0] = 1, a__1[0] = side;
22723 i__3[1] = 1, a__1[1] = trans;
22724 s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
22725 i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, (
22726 ftnlen)6, (ftnlen)2);
22727 nbmin = f2cmax(i__1,i__2);
22728 }
22729 } else {
22730 iws = nw;
22731 }
22732
22733 if (nb < nbmin || nb >= *k) {
22734
22735 /* Use unblocked code */
22736
22737 sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
22738 c_offset], ldc, &work[1], &iinfo);
22739 } else {
22740
22741 /* Use blocked code */
22742
22743 if (left && ! notran || ! left && notran) {
22744 i1 = 1;
22745 i2 = *k;
22746 i3 = nb;
22747 } else {
22748 i1 = (*k - 1) / nb * nb + 1;
22749 i2 = 1;
22750 i3 = -nb;
22751 }
22752
22753 if (left) {
22754 ni = *n;
22755 jc = 1;
22756 } else {
22757 mi = *m;
22758 ic = 1;
22759 }
22760
22761 i__1 = i2;
22762 i__2 = i3;
22763 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
22764 /* Computing MIN */
22765 i__4 = nb, i__5 = *k - i__ + 1;
22766 ib = f2cmin(i__4,i__5);
22767
22768 /* Form the triangular factor of the block reflector
22769 H = H(i) H(i+1) . . . H(i+ib-1) */
22770
22771 i__4 = nq - i__ + 1;
22772 slarft_("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__),
22773 lda, &tau[i__], t, &c__65);
22774 if (left) {
22775
22776 /* H or H' is applied to C(i:m,1:n) */
22777
22778 mi = *m - i__ + 1;
22779 ic = i__;
22780 } else {
22781
22782 /* H or H' is applied to C(1:m,i:n) */
22783
22784 ni = *n - i__ + 1;
22785 jc = i__;
22786 }
22787
22788 /* Apply H or H' */
22789
22790 slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &
22791 a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &
22792 work[1], &ldwork);
22793 /* L10: */
22794 }
22795 }
22796 work[1] = (real) lwkopt;
22797 return 0;
22798
22799 /* End of SORMQR */
22800
22801 } /* sormqr_ */
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 14381 of file lapackblas.cpp. Referenced by sbdsqr_(), slaed2_(), slaed8_(), and slaeda_(). 14383 {
14384 /* System generated locals */
14385 integer i__1;
14386 /* Local variables */
14387 static integer i__;
14388 static real stemp;
14389 static integer ix, iy;
14390 /* applies a plane rotation.
14391 jack dongarra, linpack, 3/11/78.
14392 modified 12/3/93, array(1) declarations changed to array(*)
14393 Parameter adjustments */
14394 --sy;
14395 --sx;
14396 /* Function Body */
14397 if (*n <= 0) {
14398 return 0;
14399 }
14400 if (*incx == 1 && *incy == 1) {
14401 goto L20;
14402 }
14403 /* code for unequal increments or equal increments not equal
14404 to 1 */
14405 ix = 1;
14406 iy = 1;
14407 if (*incx < 0) {
14408 ix = (-(*n) + 1) * *incx + 1;
14409 }
14410 if (*incy < 0) {
14411 iy = (-(*n) + 1) * *incy + 1;
14412 }
14413 i__1 = *n;
14414 for (i__ = 1; i__ <= i__1; ++i__) {
14415 stemp = *c__ * sx[ix] + *s * sy[iy];
14416 sy[iy] = *c__ * sy[iy] - *s * sx[ix];
14417 sx[ix] = stemp;
14418 ix += *incx;
14419 iy += *incy;
14420 /* L10: */
14421 }
14422 return 0;
14423 /* code for both increments equal to 1 */
14424 L20:
14425 i__1 = *n;
14426 for (i__ = 1; i__ <= i__1; ++i__) {
14427 stemp = *c__ * sx[i__] + *s * sy[i__];
14428 sy[i__] = *c__ * sy[i__] - *s * sx[i__];
14429 sx[i__] = stemp;
14430 /* L30: */
14431 }
14432 return 0;
14433 } /* srot_ */
|
|
||||||||||||||||||||
|
Definition at line 7697 of file lapackblas.cpp. References integer. Referenced by EMAN::PCA::Lanczos_ooc(), sbdsqr_(), slabrd_(), slaed2_(), slaed8_(), slarfg_(), slatrd_(), sorg2l_(), sorg2r_(), sorgl2_(), sstevd_(), and ssyev_(). 07698 {
07699 /* System generated locals */
07700 integer i__1, i__2;
07701 /* Local variables */
07702 static integer i__, m, nincx, mp1;
07703 /* scales a vector by a constant.
07704 uses unrolled loops for increment equal to 1.
07705 jack dongarra, linpack, 3/11/78.
07706 modified 3/93 to return if incx .le. 0.
07707 modified 12/3/93, array(1) declarations changed to array(*)
07708 Parameter adjustments */
07709 --sx;
07710 /* Function Body */
07711 if (*n <= 0 || *incx <= 0) {
07712 return 0;
07713 }
07714 if (*incx == 1) {
07715 goto L20;
07716 }
07717 /* code for increment not equal to 1 */
07718 nincx = *n * *incx;
07719 i__1 = nincx;
07720 i__2 = *incx;
07721 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
07722 sx[i__] = *sa * sx[i__];
07723 /* L10: */
07724 }
07725 return 0;
07726 /* code for increment equal to 1
07727 clean-up loop */
07728 L20:
07729 m = *n % 5;
07730 if (m == 0) {
07731 goto L40;
07732 }
07733 i__2 = m;
07734 for (i__ = 1; i__ <= i__2; ++i__) {
07735 sx[i__] = *sa * sx[i__];
07736 /* L30: */
07737 }
07738 if (*n < 5) {
07739 return 0;
07740 }
07741 L40:
07742 mp1 = m + 1;
07743 i__2 = *n;
07744 for (i__ = mp1; i__ <= i__2; i__ += 5) {
07745 sx[i__] = *sa * sx[i__];
07746 sx[i__ + 1] = *sa * sx[i__ + 1];
07747 sx[i__ + 2] = *sa * sx[i__ + 2];
07748 sx[i__ + 3] = *sa * sx[i__ + 3];
07749 sx[i__ + 4] = *sa * sx[i__ + 4];
07750 /* L50: */
07751 }
07752 return 0;
07753 } /* sscal_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 11235 of file lapackblas.cpp. References c__0, c__1, c__2, dabs, doublereal, f2cmax, ftnlen, ilaenv_(), integer, log(), logical, lsame_(), pow_ii(), real, sgemm_(), slacpy_(), slaed0_(), slamch_(), slanst_(), slascl_(), slaset_(), slasrt_(), sqrt(), ssteqr_(), ssterf_(), sswap_(), xerbla_(), and z___ref. Referenced by sstevd_(). 11238 {
11239 /* -- LAPACK driver routine (version 3.0) --
11240 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11241 Courant Institute, Argonne National Lab, and Rice University
11242 June 30, 1999
11243
11244
11245 Purpose
11246 =======
11247
11248 SSTEDC computes all eigenvalues and, optionally, eigenvectors of a
11249 symmetric tridiagonal matrix using the divide and conquer method.
11250 The eigenvectors of a full or band real symmetric matrix can also be
11251 found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this
11252 matrix to tridiagonal form.
11253
11254 This code makes very mild assumptions about floating point
11255 arithmetic. It will work on machines with a guard digit in
11256 add/subtract, or on those binary machines without guard digits
11257 which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
11258 It could conceivably fail on hexadecimal or decimal machines
11259 without guard digits, but we know of none. See SLAED3 for details.
11260
11261 Arguments
11262 =========
11263
11264 COMPZ (input) CHARACTER*1
11265 = 'N': Compute eigenvalues only.
11266 = 'I': Compute eigenvectors of tridiagonal matrix also.
11267 = 'V': Compute eigenvectors of original dense symmetric
11268 matrix also. On entry, Z contains the orthogonal
11269 matrix used to reduce the original matrix to
11270 tridiagonal form.
11271
11272 N (input) INTEGER
11273 The dimension of the symmetric tridiagonal matrix. N >= 0.
11274
11275 D (input/output) REAL array, dimension (N)
11276 On entry, the diagonal elements of the tridiagonal matrix.
11277 On exit, if INFO = 0, the eigenvalues in ascending order.
11278
11279 E (input/output) REAL array, dimension (N-1)
11280 On entry, the subdiagonal elements of the tridiagonal matrix.
11281 On exit, E has been destroyed.
11282
11283 Z (input/output) REAL array, dimension (LDZ,N)
11284 On entry, if COMPZ = 'V', then Z contains the orthogonal
11285 matrix used in the reduction to tridiagonal form.
11286 On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
11287 orthonormal eigenvectors of the original symmetric matrix,
11288 and if COMPZ = 'I', Z contains the orthonormal eigenvectors
11289 of the symmetric tridiagonal matrix.
11290 If COMPZ = 'N', then Z is not referenced.
11291
11292 LDZ (input) INTEGER
11293 The leading dimension of the array Z. LDZ >= 1.
11294 If eigenvectors are desired, then LDZ >= max(1,N).
11295
11296 WORK (workspace/output) REAL array,
11297 dimension (LWORK)
11298 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
11299
11300 LWORK (input) INTEGER
11301 The dimension of the array WORK.
11302 If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
11303 If COMPZ = 'V' and N > 1 then LWORK must be at least
11304 ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
11305 where lg( N ) = smallest integer k such
11306 that 2**k >= N.
11307 If COMPZ = 'I' and N > 1 then LWORK must be at least
11308 ( 1 + 4*N + N**2 ).
11309
11310 If LWORK = -1, then a workspace query is assumed; the routine
11311 only calculates the optimal size of the WORK array, returns
11312 this value as the first entry of the WORK array, and no error
11313 message related to LWORK is issued by XERBLA.
11314
11315 IWORK (workspace/output) INTEGER array, dimension (LIWORK)
11316 On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
11317
11318 LIWORK (input) INTEGER
11319 The dimension of the array IWORK.
11320 If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
11321 If COMPZ = 'V' and N > 1 then LIWORK must be at least
11322 ( 6 + 6*N + 5*N*lg N ).
11323 If COMPZ = 'I' and N > 1 then LIWORK must be at least
11324 ( 3 + 5*N ).
11325
11326 If LIWORK = -1, then a workspace query is assumed; the
11327 routine only calculates the optimal size of the IWORK array,
11328 returns this value as the first entry of the IWORK array, and
11329 no error message related to LIWORK is issued by XERBLA.
11330
11331 INFO (output) INTEGER
11332 = 0: successful exit.
11333 < 0: if INFO = -i, the i-th argument had an illegal value.
11334 > 0: The algorithm failed to compute an eigenvalue while
11335 working on the submatrix lying in rows and columns
11336 INFO/(N+1) through mod(INFO,N+1).
11337
11338 Further Details
11339 ===============
11340
11341 Based on contributions by
11342 Jeff Rutter, Computer Science Division, University of California
11343 at Berkeley, USA
11344 Modified by Francoise Tisseur, University of Tennessee.
11345
11346 =====================================================================
11347
11348
11349 Test the input parameters.
11350
11351 Parameter adjustments */
11352 /* Table of constant values */
11353 static integer c__2 = 2;
11354 static integer c__9 = 9;
11355 static integer c__0 = 0;
11356 static real c_b18 = 0.f;
11357 static real c_b19 = 1.f;
11358 static integer c__1 = 1;
11359
11360 /* System generated locals */
11361 integer z_dim1, z_offset, i__1, i__2;
11362 real r__1, r__2;
11363 /* Builtin functions */
11364 //double log(doublereal);
11365 integer pow_ii(integer *, integer *);
11366 //double sqrt(doublereal);
11367 /* Local variables */
11368 static real tiny;
11369 static integer i__, j, k, m;
11370 static real p;
11371 extern logical lsame_(const char *, const char *);
11372 extern /* Subroutine */ int sgemm_(const char *, const char *, integer *, integer *,
11373 integer *, real *, real *, integer *, real *, integer *, real *,
11374 real *, integer *);
11375 static integer lwmin, start;
11376 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
11377 integer *), slaed0_(integer *, integer *, integer *, real *, real
11378 *, real *, integer *, real *, integer *, real *, integer *,
11379 integer *);
11380 static integer ii;
11381 extern doublereal slamch_(const char *);
11382 extern /* Subroutine */ int xerbla_(const char *, integer *);
11383 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
11384 integer *, integer *, ftnlen, ftnlen);
11385 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *,
11386 real *, integer *, integer *, real *, integer *, integer *), slacpy_(const char *, integer *, integer *, real *, integer *,
11387 real *, integer *), slaset_(const char *, integer *, integer *,
11388 real *, real *, real *, integer *);
11389 static integer liwmin, icompz;
11390 static real orgnrm;
11391 extern doublereal slanst_(const char *, integer *, real *, real *);
11392 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *),
11393 slasrt_(const char *, integer *, real *, integer *);
11394 static logical lquery;
11395 static integer smlsiz;
11396 extern /* Subroutine */ int ssteqr_(const char *, integer *, real *, real *,
11397 real *, integer *, real *, integer *);
11398 static integer storez, strtrw, end, lgn;
11399 static real eps;
11400 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
11401
11402
11403 --d__;
11404 --e;
11405 z_dim1 = *ldz;
11406 z_offset = 1 + z_dim1 * 1;
11407 z__ -= z_offset;
11408 --work;
11409 --iwork;
11410
11411 /* Function Body */
11412 *info = 0;
11413 lquery = *lwork == -1 || *liwork == -1;
11414
11415 if (lsame_(compz, "N")) {
11416 icompz = 0;
11417 } else if (lsame_(compz, "V")) {
11418 icompz = 1;
11419 } else if (lsame_(compz, "I")) {
11420 icompz = 2;
11421 } else {
11422 icompz = -1;
11423 }
11424 if (*n <= 1 || icompz <= 0) {
11425 liwmin = 1;
11426 lwmin = 1;
11427 } else {
11428 lgn = (integer) (log((real) (*n)) / log(2.f));
11429 if (pow_ii(&c__2, &lgn) < *n) {
11430 ++lgn;
11431 }
11432 if (pow_ii(&c__2, &lgn) < *n) {
11433 ++lgn;
11434 }
11435 if (icompz == 1) {
11436 /* Computing 2nd power */
11437 i__1 = *n;
11438 lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
11439 liwmin = *n * 6 + 6 + *n * 5 * lgn;
11440 } else if (icompz == 2) {
11441 /* Computing 2nd power */
11442 i__1 = *n;
11443 lwmin = (*n << 2) + 1 + i__1 * i__1;
11444 liwmin = *n * 5 + 3;
11445 }
11446 }
11447 if (icompz < 0) {
11448 *info = -1;
11449 } else if (*n < 0) {
11450 *info = -2;
11451 } else if (*ldz < 1 || icompz > 0 && *ldz < f2cmax(1,*n)) {
11452 *info = -6;
11453 } else if (*lwork < lwmin && ! lquery) {
11454 *info = -8;
11455 } else if (*liwork < liwmin && ! lquery) {
11456 *info = -10;
11457 }
11458
11459 if (*info == 0) {
11460 work[1] = (real) lwmin;
11461 iwork[1] = liwmin;
11462 }
11463
11464 if (*info != 0) {
11465 i__1 = -(*info);
11466 xerbla_("SSTEDC", &i__1);
11467 return 0;
11468 } else if (lquery) {
11469 return 0;
11470 }
11471
11472 /* Quick return if possible */
11473
11474 if (*n == 0) {
11475 return 0;
11476 }
11477 if (*n == 1) {
11478 if (icompz != 0) {
11479 z___ref(1, 1) = 1.f;
11480 }
11481 return 0;
11482 }
11483
11484 smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
11485 ftnlen)6, (ftnlen)1);
11486
11487 /* If the following conditional clause is removed, then the routine
11488 will use the Divide and Conquer routine to compute only the
11489 eigenvalues, which requires (3N + 3N**2) real workspace and
11490 (2 + 5N + 2N lg(N)) integer workspace.
11491 Since on many architectures SSTERF is much faster than any other
11492 algorithm for finding eigenvalues only, it is used here
11493 as the default.
11494
11495 If COMPZ = 'N', use SSTERF to compute the eigenvalues. */
11496
11497 if (icompz == 0) {
11498 ssterf_(n, &d__[1], &e[1], info);
11499 return 0;
11500 }
11501
11502 /* If N is smaller than the minimum divide size (SMLSIZ+1), then
11503 solve the problem with another solver. */
11504
11505 if (*n <= smlsiz) {
11506 if (icompz == 0) {
11507 ssterf_(n, &d__[1], &e[1], info);
11508 return 0;
11509 } else if (icompz == 2) {
11510 ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
11511 info);
11512 return 0;
11513 } else {
11514 ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
11515 info);
11516 return 0;
11517 }
11518 }
11519
11520 /* If COMPZ = 'V', the Z matrix must be stored elsewhere for later
11521 use. */
11522
11523 if (icompz == 1) {
11524 storez = *n * *n + 1;
11525 } else {
11526 storez = 1;
11527 }
11528
11529 if (icompz == 2) {
11530 slaset_("Full", n, n, &c_b18, &c_b19, &z__[z_offset], ldz);
11531 }
11532
11533 /* Scale. */
11534
11535 orgnrm = slanst_("M", n, &d__[1], &e[1]);
11536 if (orgnrm == 0.f) {
11537 return 0;
11538 }
11539
11540 eps = slamch_("Epsilon");
11541
11542 start = 1;
11543
11544 /* while ( START <= N ) */
11545
11546 L10:
11547 if (start <= *n) {
11548
11549 /* Let END be the position of the next subdiagonal entry such that
11550 E( END ) <= TINY or END = N if no such subdiagonal exists. The
11551 matrix identified by the elements between START and END
11552 constitutes an independent sub-problem. */
11553
11554 end = start;
11555 L20:
11556 if (end < *n) {
11557 tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
11558 d__[end + 1], dabs(r__2)));
11559 if ((r__1 = e[end], dabs(r__1)) > tiny) {
11560 ++end;
11561 goto L20;
11562 }
11563 }
11564
11565 /* (Sub) Problem determined. Compute its size and solve it. */
11566
11567 m = end - start + 1;
11568 if (m == 1) {
11569 start = end + 1;
11570 goto L10;
11571 }
11572 if (m > smlsiz) {
11573 *info = smlsiz;
11574
11575 /* Scale. */
11576
11577 orgnrm = slanst_("M", &m, &d__[start], &e[start]);
11578 slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &m, &c__1, &d__[start]
11579 , &m, info);
11580 i__1 = m - 1;
11581 i__2 = m - 1;
11582 slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &i__1, &c__1, &e[
11583 start], &i__2, info);
11584
11585 if (icompz == 1) {
11586 strtrw = 1;
11587 } else {
11588 strtrw = start;
11589 }
11590 slaed0_(&icompz, n, &m, &d__[start], &e[start], &z___ref(strtrw,
11591 start), ldz, &work[1], n, &work[storez], &iwork[1], info);
11592 if (*info != 0) {
11593 *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
11594 + 1) + start - 1;
11595 return 0;
11596 }
11597
11598 /* Scale back. */
11599
11600 slascl_("G", &c__0, &c__0, &c_b19, &orgnrm, &m, &c__1, &d__[start]
11601 , &m, info);
11602
11603 } else {
11604 if (icompz == 1) {
11605
11606 /* Since QR won't update a Z matrix which is larger than the
11607 length of D, we must solve the sub-problem in a workspace and
11608 then multiply back into Z. */
11609
11610 ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
11611 m * m + 1], info);
11612 slacpy_("A", n, &m, &z___ref(1, start), ldz, &work[storez], n);
11613 sgemm_("N", "N", n, &m, &m, &c_b19, &work[storez], ldz, &work[
11614 1], &m, &c_b18, &z___ref(1, start), ldz);
11615 } else if (icompz == 2) {
11616 ssteqr_("I", &m, &d__[start], &e[start], &z___ref(start,
11617 start), ldz, &work[1], info);
11618 } else {
11619 ssterf_(&m, &d__[start], &e[start], info);
11620 }
11621 if (*info != 0) {
11622 *info = start * (*n + 1) + end;
11623 return 0;
11624 }
11625 }
11626
11627 start = end + 1;
11628 goto L10;
11629 }
11630
11631 /* endwhile
11632
11633 If the problem split any number of times, then the eigenvalues
11634 will not be properly ordered. Here we permute the eigenvalues
11635 (and the associated eigenvectors) into ascending order. */
11636
11637 if (m != *n) {
11638 if (icompz == 0) {
11639
11640 /* Use Quick Sort */
11641
11642 slasrt_("I", n, &d__[1], info);
11643
11644 } else {
11645
11646 /* Use Selection Sort to minimize swaps of eigenvectors */
11647
11648 i__1 = *n;
11649 for (ii = 2; ii <= i__1; ++ii) {
11650 i__ = ii - 1;
11651 k = i__;
11652 p = d__[i__];
11653 i__2 = *n;
11654 for (j = ii; j <= i__2; ++j) {
11655 if (d__[j] < p) {
11656 k = j;
11657 p = d__[j];
11658 }
11659 /* L30: */
11660 }
11661 if (k != i__) {
11662 d__[k] = d__[i__];
11663 d__[i__] = p;
11664 sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1);
11665 }
11666 /* L40: */
11667 }
11668 }
11669 }
11670
11671 work[1] = (real) lwmin;
11672 iwork[1] = liwmin;
11673
11674 return 0;
11675
11676 /* End of SSTEDC */
11677
11678 } /* sstedc_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 7758 of file lapackblas.cpp. References b, c__0, c__1, c__2, dabs, doublereal, f2cmax, integer, logical, lsame_(), r_sign(), real, slae2_(), slaev2_(), slamch_(), slanst_(), slapy2_(), slartg_(), slascl_(), slaset_(), slasr_(), slasrt_(), sqrt(), sswap_(), xerbla_(), and z___ref. Referenced by slaed0_(), sstedc_(), and ssyev_(). 07760 {
07761 /* -- LAPACK routine (version 3.0) --
07762 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
07763 Courant Institute, Argonne National Lab, and Rice University
07764 September 30, 1994
07765
07766
07767 Purpose
07768 =======
07769
07770 SSTEQR computes all eigenvalues and, optionally, eigenvectors of a
07771 symmetric tridiagonal matrix using the implicit QL or QR method.
07772 The eigenvectors of a full or band symmetric matrix can also be found
07773 if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to
07774 tridiagonal form.
07775
07776 Arguments
07777 =========
07778
07779 COMPZ (input) CHARACTER*1
07780 = 'N': Compute eigenvalues only.
07781 = 'V': Compute eigenvalues and eigenvectors of the original
07782 symmetric matrix. On entry, Z must contain the
07783 orthogonal matrix used to reduce the original matrix
07784 to tridiagonal form.
07785 = 'I': Compute eigenvalues and eigenvectors of the
07786 tridiagonal matrix. Z is initialized to the identity
07787 matrix.
07788
07789 N (input) INTEGER
07790 The order of the matrix. N >= 0.
07791
07792 D (input/output) REAL array, dimension (N)
07793 On entry, the diagonal elements of the tridiagonal matrix.
07794 On exit, if INFO = 0, the eigenvalues in ascending order.
07795
07796 E (input/output) REAL array, dimension (N-1)
07797 On entry, the (n-1) subdiagonal elements of the tridiagonal
07798 matrix.
07799 On exit, E has been destroyed.
07800
07801 Z (input/output) REAL array, dimension (LDZ, N)
07802 On entry, if COMPZ = 'V', then Z contains the orthogonal
07803 matrix used in the reduction to tridiagonal form.
07804 On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
07805 orthonormal eigenvectors of the original symmetric matrix,
07806 and if COMPZ = 'I', Z contains the orthonormal eigenvectors
07807 of the symmetric tridiagonal matrix.
07808 If COMPZ = 'N', then Z is not referenced.
07809
07810 LDZ (input) INTEGER
07811 The leading dimension of the array Z. LDZ >= 1, and if
07812 eigenvectors are desired, then LDZ >= f2cmax(1,N).
07813
07814 WORK (workspace) REAL array, dimension (f2cmax(1,2*N-2))
07815 If COMPZ = 'N', then WORK is not referenced.
07816
07817 INFO (output) INTEGER
07818 = 0: successful exit
07819 < 0: if INFO = -i, the i-th argument had an illegal value
07820 > 0: the algorithm has failed to find all the eigenvalues in
07821 a total of 30*N iterations; if INFO = i, then i
07822 elements of E have not converged to zero; on exit, D
07823 and E contain the elements of a symmetric tridiagonal
07824 matrix which is orthogonally similar to the original
07825 matrix.
07826
07827 =====================================================================
07828
07829
07830 Test the input parameters.
07831
07832 Parameter adjustments */
07833 /* Table of constant values */
07834 static real c_b9 = 0.f;
07835 static real c_b10 = 1.f;
07836 static integer c__0 = 0;
07837 static integer c__1 = 1;
07838 static integer c__2 = 2;
07839
07840 /* System generated locals */
07841 integer z_dim1, z_offset, i__1, i__2;
07842 real r__1, r__2;
07843 /* Builtin functions */
07844 // double sqrt(doublereal), r_sign(real *, real *);
07845 double r_sign(real *, real *);
07846 /* Local variables */
07847 static integer lend, jtot;
07848 extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
07849 ;
07850 static real b, c__, f, g;
07851 static integer i__, j, k, l, m;
07852 static real p, r__, s;
07853 extern logical lsame_(const char *, const char *);
07854 static real anorm;
07855 extern /* Subroutine */ int slasr_(const char *, const char *, const char *, integer *,
07856 integer *, real *, real *, real *, integer *);
07857 static integer l1;
07858 extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
07859 integer *);
07860 static integer lendm1, lendp1;
07861 extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
07862 , real *, real *);
07863 extern doublereal slapy2_(real *, real *);
07864 static integer ii, mm, iscale;
07865 extern doublereal slamch_(const char *);
07866 static real safmin;
07867 extern /* Subroutine */ int xerbla_(const char *, integer *);
07868 static real safmax;
07869 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *,
07870 real *, integer *, integer *, real *, integer *, integer *);
07871 static integer lendsv;
07872 extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
07873 ), slaset_(const char *, integer *, integer *, real *, real *, real *,
07874 integer *);
07875 static real ssfmin;
07876 static integer nmaxit, icompz;
07877 static real ssfmax;
07878 extern doublereal slanst_(const char *, integer *, real *, real *);
07879 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *);
07880 static integer lm1, mm1, nm1;
07881 static real rt1, rt2, eps;
07882 static integer lsv;
07883 static real tst, eps2;
07884 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
07885
07886
07887 --d__;
07888 --e;
07889 z_dim1 = *ldz;
07890 z_offset = 1 + z_dim1 * 1;
07891 z__ -= z_offset;
07892 --work;
07893
07894 /* Function Body */
07895 *info = 0;
07896
07897 if (lsame_(compz, "N")) {
07898 icompz = 0;
07899 } else if (lsame_(compz, "V")) {
07900 icompz = 1;
07901 } else if (lsame_(compz, "I")) {
07902 icompz = 2;
07903 } else {
07904 icompz = -1;
07905 }
07906 if (icompz < 0) {
07907 *info = -1;
07908 } else if (*n < 0) {
07909 *info = -2;
07910 } else if (*ldz < 1 || icompz > 0 && *ldz < f2cmax(1,*n)) {
07911 *info = -6;
07912 }
07913 if (*info != 0) {
07914 i__1 = -(*info);
07915 xerbla_("SSTEQR", &i__1);
07916 return 0;
07917 }
07918
07919 /* Quick return if possible */
07920
07921 if (*n == 0) {
07922 return 0;
07923 }
07924
07925 if (*n == 1) {
07926 if (icompz == 2) {
07927 z___ref(1, 1) = 1.f;
07928 }
07929 return 0;
07930 }
07931
07932 /* Determine the unit roundoff and over/underflow thresholds. */
07933
07934 eps = slamch_("E");
07935 /* Computing 2nd power */
07936 r__1 = eps;
07937 eps2 = r__1 * r__1;
07938 safmin = slamch_("S");
07939 safmax = 1.f / safmin;
07940 ssfmax = sqrt(safmax) / 3.f;
07941 ssfmin = sqrt(safmin) / eps2;
07942
07943 /* Compute the eigenvalues and eigenvectors of the tridiagonal
07944 matrix. */
07945
07946 if (icompz == 2) {
07947 slaset_("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz);
07948 }
07949
07950 nmaxit = *n * 30;
07951 jtot = 0;
07952
07953 /* Determine where the matrix splits and choose QL or QR iteration
07954 for each block, according to whether top or bottom diagonal
07955 element is smaller. */
07956
07957 l1 = 1;
07958 nm1 = *n - 1;
07959
07960 L10:
07961 if (l1 > *n) {
07962 goto L160;
07963 }
07964 if (l1 > 1) {
07965 e[l1 - 1] = 0.f;
07966 }
07967 if (l1 <= nm1) {
07968 i__1 = nm1;
07969 for (m = l1; m <= i__1; ++m) {
07970 tst = (r__1 = e[m], dabs(r__1));
07971 if (tst == 0.f) {
07972 goto L30;
07973 }
07974 if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
07975 + 1], dabs(r__2))) * eps) {
07976 e[m] = 0.f;
07977 goto L30;
07978 }
07979 /* L20: */
07980 }
07981 }
07982 m = *n;
07983
07984 L30:
07985 l = l1;
07986 lsv = l;
07987 lend = m;
07988 lendsv = lend;
07989 l1 = m + 1;
07990 if (lend == l) {
07991 goto L10;
07992 }
07993
07994 /* Scale submatrix in rows and columns L to LEND */
07995
07996 i__1 = lend - l + 1;
07997 anorm = slanst_("I", &i__1, &d__[l], &e[l]);
07998 iscale = 0;
07999 if (anorm == 0.f) {
08000 goto L10;
08001 }
08002 if (anorm > ssfmax) {
08003 iscale = 1;
08004 i__1 = lend - l + 1;
08005 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
08006 info);
08007 i__1 = lend - l;
08008 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
08009 info);
08010 } else if (anorm < ssfmin) {
08011 iscale = 2;
08012 i__1 = lend - l + 1;
08013 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
08014 info);
08015 i__1 = lend - l;
08016 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
08017 info);
08018 }
08019
08020 /* Choose between QL and QR iteration */
08021
08022 if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
08023 lend = lsv;
08024 l = lendsv;
08025 }
08026
08027 if (lend > l) {
08028
08029 /* QL Iteration
08030
08031 Look for small subdiagonal element. */
08032
08033 L40:
08034 if (l != lend) {
08035 lendm1 = lend - 1;
08036 i__1 = lendm1;
08037 for (m = l; m <= i__1; ++m) {
08038 /* Computing 2nd power */
08039 r__2 = (r__1 = e[m], dabs(r__1));
08040 tst = r__2 * r__2;
08041 if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
08042 + 1], dabs(r__2)) + safmin) {
08043 goto L60;
08044 }
08045 /* L50: */
08046 }
08047 }
08048
08049 m = lend;
08050
08051 L60:
08052 if (m < lend) {
08053 e[m] = 0.f;
08054 }
08055 p = d__[l];
08056 if (m == l) {
08057 goto L80;
08058 }
08059
08060 /* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
08061 to compute its eigensystem. */
08062
08063 if (m == l + 1) {
08064 if (icompz > 0) {
08065 slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
08066 work[l] = c__;
08067 work[*n - 1 + l] = s;
08068 slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
08069 z___ref(1, l), ldz);
08070 } else {
08071 slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
08072 }
08073 d__[l] = rt1;
08074 d__[l + 1] = rt2;
08075 e[l] = 0.f;
08076 l += 2;
08077 if (l <= lend) {
08078 goto L40;
08079 }
08080 goto L140;
08081 }
08082
08083 if (jtot == nmaxit) {
08084 goto L140;
08085 }
08086 ++jtot;
08087
08088 /* Form shift. */
08089
08090 g = (d__[l + 1] - p) / (e[l] * 2.f);
08091 r__ = slapy2_(&g, &c_b10);
08092 g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
08093
08094 s = 1.f;
08095 c__ = 1.f;
08096 p = 0.f;
08097
08098 /* Inner loop */
08099
08100 mm1 = m - 1;
08101 i__1 = l;
08102 for (i__ = mm1; i__ >= i__1; --i__) {
08103 f = s * e[i__];
08104 b = c__ * e[i__];
08105 slartg_(&g, &f, &c__, &s, &r__);
08106 if (i__ != m - 1) {
08107 e[i__ + 1] = r__;
08108 }
08109 g = d__[i__ + 1] - p;
08110 r__ = (d__[i__] - g) * s + c__ * 2.f * b;
08111 p = s * r__;
08112 d__[i__ + 1] = g + p;
08113 g = c__ * r__ - b;
08114
08115 /* If eigenvectors are desired, then save rotations. */
08116
08117 if (icompz > 0) {
08118 work[i__] = c__;
08119 work[*n - 1 + i__] = -s;
08120 }
08121
08122 /* L70: */
08123 }
08124
08125 /* If eigenvectors are desired, then apply saved rotations. */
08126
08127 if (icompz > 0) {
08128 mm = m - l + 1;
08129 slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &
08130 z___ref(1, l), ldz);
08131 }
08132
08133 d__[l] -= p;
08134 e[l] = g;
08135 goto L40;
08136
08137 /* Eigenvalue found. */
08138
08139 L80:
08140 d__[l] = p;
08141
08142 ++l;
08143 if (l <= lend) {
08144 goto L40;
08145 }
08146 goto L140;
08147
08148 } else {
08149
08150 /* QR Iteration
08151
08152 Look for small superdiagonal element. */
08153
08154 L90:
08155 if (l != lend) {
08156 lendp1 = lend + 1;
08157 i__1 = lendp1;
08158 for (m = l; m >= i__1; --m) {
08159 /* Computing 2nd power */
08160 r__2 = (r__1 = e[m - 1], dabs(r__1));
08161 tst = r__2 * r__2;
08162 if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
08163 - 1], dabs(r__2)) + safmin) {
08164 goto L110;
08165 }
08166 /* L100: */
08167 }
08168 }
08169
08170 m = lend;
08171
08172 L110:
08173 if (m > lend) {
08174 e[m - 1] = 0.f;
08175 }
08176 p = d__[l];
08177 if (m == l) {
08178 goto L130;
08179 }
08180
08181 /* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
08182 to compute its eigensystem. */
08183
08184 if (m == l - 1) {
08185 if (icompz > 0) {
08186 slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
08187 ;
08188 work[m] = c__;
08189 work[*n - 1 + m] = s;
08190 slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
08191 z___ref(1, l - 1), ldz);
08192 } else {
08193 slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
08194 }
08195 d__[l - 1] = rt1;
08196 d__[l] = rt2;
08197 e[l - 1] = 0.f;
08198 l += -2;
08199 if (l >= lend) {
08200 goto L90;
08201 }
08202 goto L140;
08203 }
08204
08205 if (jtot == nmaxit) {
08206 goto L140;
08207 }
08208 ++jtot;
08209
08210 /* Form shift. */
08211
08212 g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
08213 r__ = slapy2_(&g, &c_b10);
08214 g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
08215
08216 s = 1.f;
08217 c__ = 1.f;
08218 p = 0.f;
08219
08220 /* Inner loop */
08221
08222 lm1 = l - 1;
08223 i__1 = lm1;
08224 for (i__ = m; i__ <= i__1; ++i__) {
08225 f = s * e[i__];
08226 b = c__ * e[i__];
08227 slartg_(&g, &f, &c__, &s, &r__);
08228 if (i__ != m) {
08229 e[i__ - 1] = r__;
08230 }
08231 g = d__[i__] - p;
08232 r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
08233 p = s * r__;
08234 d__[i__] = g + p;
08235 g = c__ * r__ - b;
08236
08237 /* If eigenvectors are desired, then save rotations. */
08238
08239 if (icompz > 0) {
08240 work[i__] = c__;
08241 work[*n - 1 + i__] = s;
08242 }
08243
08244 /* L120: */
08245 }
08246
08247 /* If eigenvectors are desired, then apply saved rotations. */
08248
08249 if (icompz > 0) {
08250 mm = l - m + 1;
08251 slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &
08252 z___ref(1, m), ldz);
08253 }
08254
08255 d__[l] -= p;
08256 e[lm1] = g;
08257 goto L90;
08258
08259 /* Eigenvalue found. */
08260
08261 L130:
08262 d__[l] = p;
08263
08264 --l;
08265 if (l >= lend) {
08266 goto L90;
08267 }
08268 goto L140;
08269
08270 }
08271
08272 /* Undo scaling if necessary */
08273
08274 L140:
08275 if (iscale == 1) {
08276 i__1 = lendsv - lsv + 1;
08277 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
08278 n, info);
08279 i__1 = lendsv - lsv;
08280 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
08281 info);
08282 } else if (iscale == 2) {
08283 i__1 = lendsv - lsv + 1;
08284 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
08285 n, info);
08286 i__1 = lendsv - lsv;
08287 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
08288 info);
08289 }
08290
08291 /* Check for no convergence to an eigenvalue after a total
08292 of N*MAXIT iterations. */
08293
08294 if (jtot < nmaxit) {
08295 goto L10;
08296 }
08297 i__1 = *n - 1;
08298 for (i__ = 1; i__ <= i__1; ++i__) {
08299 if (e[i__] != 0.f) {
08300 ++(*info);
08301 }
08302 /* L150: */
08303 }
08304 goto L190;
08305
08306 /* Order eigenvalues and eigenvectors. */
08307
08308 L160:
08309 if (icompz == 0) {
08310
08311 /* Use Quick Sort */
08312
08313 slasrt_("I", n, &d__[1], info);
08314
08315 } else {
08316
08317 /* Use Selection Sort to minimize swaps of eigenvectors */
08318
08319 i__1 = *n;
08320 for (ii = 2; ii <= i__1; ++ii) {
08321 i__ = ii - 1;
08322 k = i__;
08323 p = d__[i__];
08324 i__2 = *n;
08325 for (j = ii; j <= i__2; ++j) {
08326 if (d__[j] < p) {
08327 k = j;
08328 p = d__[j];
08329 }
08330 /* L170: */
08331 }
08332 if (k != i__) {
08333 d__[k] = d__[i__];
08334 d__[i__] = p;
08335 sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1);
08336 }
08337 /* L180: */
08338 }
08339 }
08340
08341 L190:
08342 return 0;
08343
08344 /* End of SSTEQR */
08345
08346 } /* ssteqr_ */
|
|
||||||||||||||||||||
|
Definition at line 8354 of file lapackblas.cpp. References c__0, c__1, dabs, doublereal, integer, r_sign(), real, slae2_(), slamch_(), slanst_(), slapy2_(), slascl_(), slasrt_(), sqrt(), and xerbla_(). Referenced by sstedc_(), sstevd_(), and ssyev_(). 08355 {
08356 /* -- LAPACK routine (version 3.0) --
08357 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
08358 Courant Institute, Argonne National Lab, and Rice University
08359 June 30, 1999
08360
08361
08362 Purpose
08363 =======
08364
08365 SSTERF computes all eigenvalues of a symmetric tridiagonal matrix
08366 using the Pal-Walker-Kahan variant of the QL or QR algorithm.
08367
08368 Arguments
08369 =========
08370
08371 N (input) INTEGER
08372 The order of the matrix. N >= 0.
08373
08374 D (input/output) REAL array, dimension (N)
08375 On entry, the n diagonal elements of the tridiagonal matrix.
08376 On exit, if INFO = 0, the eigenvalues in ascending order.
08377
08378 E (input/output) REAL array, dimension (N-1)
08379 On entry, the (n-1) subdiagonal elements of the tridiagonal
08380 matrix.
08381 On exit, E has been destroyed.
08382
08383 INFO (output) INTEGER
08384 = 0: successful exit
08385 < 0: if INFO = -i, the i-th argument had an illegal value
08386 > 0: the algorithm failed to find all of the eigenvalues in
08387 a total of 30*N iterations; if INFO = i, then i
08388 elements of E have not converged to zero.
08389
08390 =====================================================================
08391
08392
08393 Test the input parameters.
08394
08395 Parameter adjustments */
08396 /* Table of constant values */
08397 static integer c__0 = 0;
08398 static integer c__1 = 1;
08399 static real c_b32 = 1.f;
08400
08401 /* System generated locals */
08402 integer i__1;
08403 real r__1, r__2, r__3;
08404 /* Builtin functions */
08405 // double sqrt(doublereal), r_sign(real *, real *);
08406 double r_sign(real *, real *);
08407 /* Local variables */
08408 static real oldc;
08409 static integer lend, jtot;
08410 extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
08411 ;
08412 static real c__;
08413 static integer i__, l, m;
08414 static real p, gamma, r__, s, alpha, sigma, anorm;
08415 static integer l1;
08416 static real bb;
08417 extern doublereal slapy2_(real *, real *);
08418 static integer iscale;
08419 static real oldgam;
08420 extern doublereal slamch_(const char *);
08421 static real safmin;
08422 extern /* Subroutine */ int xerbla_(const char *, integer *);
08423 static real safmax;
08424 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *,
08425 real *, integer *, integer *, real *, integer *, integer *);
08426 static integer lendsv;
08427 static real ssfmin;
08428 static integer nmaxit;
08429 static real ssfmax;
08430 extern doublereal slanst_(const char *, integer *, real *, real *);
08431 extern /* Subroutine */ int slasrt_(const char *, integer *, real *, integer *);
08432 static real rt1, rt2, eps, rte;
08433 static integer lsv;
08434 static real eps2;
08435
08436
08437 --e;
08438 --d__;
08439
08440 /* Function Body */
08441 *info = 0;
08442
08443 /* Quick return if possible */
08444
08445 if (*n < 0) {
08446 *info = -1;
08447 i__1 = -(*info);
08448 xerbla_("SSTERF", &i__1);
08449 return 0;
08450 }
08451 if (*n <= 1) {
08452 return 0;
08453 }
08454
08455 /* Determine the unit roundoff for this environment. */
08456
08457 eps = slamch_("E");
08458 /* Computing 2nd power */
08459 r__1 = eps;
08460 eps2 = r__1 * r__1;
08461 safmin = slamch_("S");
08462 safmax = 1.f / safmin;
08463 ssfmax = sqrt(safmax) / 3.f;
08464 ssfmin = sqrt(safmin) / eps2;
08465
08466 /* Compute the eigenvalues of the tridiagonal matrix. */
08467
08468 nmaxit = *n * 30;
08469 sigma = 0.f;
08470 jtot = 0;
08471
08472 /* Determine where the matrix splits and choose QL or QR iteration
08473 for each block, according to whether top or bottom diagonal
08474 element is smaller. */
08475
08476 l1 = 1;
08477
08478 L10:
08479 if (l1 > *n) {
08480 goto L170;
08481 }
08482 if (l1 > 1) {
08483 e[l1 - 1] = 0.f;
08484 }
08485 i__1 = *n - 1;
08486 for (m = l1; m <= i__1; ++m) {
08487 if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) *
08488 sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) {
08489 e[m] = 0.f;
08490 goto L30;
08491 }
08492 /* L20: */
08493 }
08494 m = *n;
08495
08496 L30:
08497 l = l1;
08498 lsv = l;
08499 lend = m;
08500 lendsv = lend;
08501 l1 = m + 1;
08502 if (lend == l) {
08503 goto L10;
08504 }
08505
08506 /* Scale submatrix in rows and columns L to LEND */
08507
08508 i__1 = lend - l + 1;
08509 anorm = slanst_("I", &i__1, &d__[l], &e[l]);
08510 iscale = 0;
08511 if (anorm > ssfmax) {
08512 iscale = 1;
08513 i__1 = lend - l + 1;
08514 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
08515 info);
08516 i__1 = lend - l;
08517 slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
08518 info);
08519 } else if (anorm < ssfmin) {
08520 iscale = 2;
08521 i__1 = lend - l + 1;
08522 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
08523 info);
08524 i__1 = lend - l;
08525 slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
08526 info);
08527 }
08528
08529 i__1 = lend - 1;
08530 for (i__ = l; i__ <= i__1; ++i__) {
08531 /* Computing 2nd power */
08532 r__1 = e[i__];
08533 e[i__] = r__1 * r__1;
08534 /* L40: */
08535 }
08536
08537 /* Choose between QL and QR iteration */
08538
08539 if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
08540 lend = lsv;
08541 l = lendsv;
08542 }
08543
08544 if (lend >= l) {
08545
08546 /* QL Iteration
08547
08548 Look for small subdiagonal element. */
08549
08550 L50:
08551 if (l != lend) {
08552 i__1 = lend - 1;
08553 for (m = l; m <= i__1; ++m) {
08554 if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
08555 m + 1], dabs(r__1))) {
08556 goto L70;
08557 }
08558 /* L60: */
08559 }
08560 }
08561 m = lend;
08562
08563 L70:
08564 if (m < lend) {
08565 e[m] = 0.f;
08566 }
08567 p = d__[l];
08568 if (m == l) {
08569 goto L90;
08570 }
08571
08572 /* If remaining matrix is 2 by 2, use SLAE2 to compute its
08573 eigenvalues. */
08574
08575 if (m == l + 1) {
08576 rte = sqrt(e[l]);
08577 slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
08578 d__[l] = rt1;
08579 d__[l + 1] = rt2;
08580 e[l] = 0.f;
08581 l += 2;
08582 if (l <= lend) {
08583 goto L50;
08584 }
08585 goto L150;
08586 }
08587
08588 if (jtot == nmaxit) {
08589 goto L150;
08590 }
08591 ++jtot;
08592
08593 /* Form shift. */
08594
08595 rte = sqrt(e[l]);
08596 sigma = (d__[l + 1] - p) / (rte * 2.f);
08597 r__ = slapy2_(&sigma, &c_b32);
08598 sigma = p - rte / (sigma + r_sign(&r__, &sigma));
08599
08600 c__ = 1.f;
08601 s = 0.f;
08602 gamma = d__[m] - sigma;
08603 p = gamma * gamma;
08604
08605 /* Inner loop */
08606
08607 i__1 = l;
08608 for (i__ = m - 1; i__ >= i__1; --i__) {
08609 bb = e[i__];
08610 r__ = p + bb;
08611 if (i__ != m - 1) {
08612 e[i__ + 1] = s * r__;
08613 }
08614 oldc = c__;
08615 c__ = p / r__;
08616 s = bb / r__;
08617 oldgam = gamma;
08618 alpha = d__[i__];
08619 gamma = c__ * (alpha - sigma) - s * oldgam;
08620 d__[i__ + 1] = oldgam + (alpha - gamma);
08621 if (c__ != 0.f) {
08622 p = gamma * gamma / c__;
08623 } else {
08624 p = oldc * bb;
08625 }
08626 /* L80: */
08627 }
08628
08629 e[l] = s * p;
08630 d__[l] = sigma + gamma;
08631 goto L50;
08632
08633 /* Eigenvalue found. */
08634
08635 L90:
08636 d__[l] = p;
08637
08638 ++l;
08639 if (l <= lend) {
08640 goto L50;
08641 }
08642 goto L150;
08643
08644 } else {
08645
08646 /* QR Iteration
08647
08648 Look for small superdiagonal element. */
08649
08650 L100:
08651 i__1 = lend + 1;
08652 for (m = l; m >= i__1; --m) {
08653 if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
08654 m - 1], dabs(r__1))) {
08655 goto L120;
08656 }
08657 /* L110: */
08658 }
08659 m = lend;
08660
08661 L120:
08662 if (m > lend) {
08663 e[m - 1] = 0.f;
08664 }
08665 p = d__[l];
08666 if (m == l) {
08667 goto L140;
08668 }
08669
08670 /* If remaining matrix is 2 by 2, use SLAE2 to compute its
08671 eigenvalues. */
08672
08673 if (m == l - 1) {
08674 rte = sqrt(e[l - 1]);
08675 slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
08676 d__[l] = rt1;
08677 d__[l - 1] = rt2;
08678 e[l - 1] = 0.f;
08679 l += -2;
08680 if (l >= lend) {
08681 goto L100;
08682 }
08683 goto L150;
08684 }
08685
08686 if (jtot == nmaxit) {
08687 goto L150;
08688 }
08689 ++jtot;
08690
08691 /* Form shift. */
08692
08693 rte = sqrt(e[l - 1]);
08694 sigma = (d__[l - 1] - p) / (rte * 2.f);
08695 r__ = slapy2_(&sigma, &c_b32);
08696 sigma = p - rte / (sigma + r_sign(&r__, &sigma));
08697
08698 c__ = 1.f;
08699 s = 0.f;
08700 gamma = d__[m] - sigma;
08701 p = gamma * gamma;
08702
08703 /* Inner loop */
08704
08705 i__1 = l - 1;
08706 for (i__ = m; i__ <= i__1; ++i__) {
08707 bb = e[i__];
08708 r__ = p + bb;
08709 if (i__ != m) {
08710 e[i__ - 1] = s * r__;
08711 }
08712 oldc = c__;
08713 c__ = p / r__;
08714 s = bb / r__;
08715 oldgam = gamma;
08716 alpha = d__[i__ + 1];
08717 gamma = c__ * (alpha - sigma) - s * oldgam;
08718 d__[i__] = oldgam + (alpha - gamma);
08719 if (c__ != 0.f) {
08720 p = gamma * gamma / c__;
08721 } else {
08722 p = oldc * bb;
08723 }
08724 /* L130: */
08725 }
08726
08727 e[l - 1] = s * p;
08728 d__[l] = sigma + gamma;
08729 goto L100;
08730
08731 /* Eigenvalue found. */
08732
08733 L140:
08734 d__[l] = p;
08735
08736 --l;
08737 if (l >= lend) {
08738 goto L100;
08739 }
08740 goto L150;
08741
08742 }
08743
08744 /* Undo scaling if necessary */
08745
08746 L150:
08747 if (iscale == 1) {
08748 i__1 = lendsv - lsv + 1;
08749 slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
08750 n, info);
08751 }
08752 if (iscale == 2) {
08753 i__1 = lendsv - lsv + 1;
08754 slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
08755 n, info);
08756 }
08757
08758 /* Check for no convergence to an eigenvalue after a total
08759 of N*MAXIT iterations. */
08760
08761 if (jtot < nmaxit) {
08762 goto L10;
08763 }
08764 i__1 = *n - 1;
08765 for (i__ = 1; i__ <= i__1; ++i__) {
08766 if (e[i__] != 0.f) {
08767 ++(*info);
08768 }
08769 /* L160: */
08770 }
08771 goto L180;
08772
08773 /* Sort eigenvalues in increasing order. */
08774
08775 L170:
08776 slasrt_("I", n, &d__[1], info);
08777
08778 L180:
08779 return 0;
08780
08781 /* End of SSTERF */
08782
08783 } /* ssterf_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 11683 of file lapackblas.cpp. References c__1, doublereal, integer, logical, lsame_(), real, slamch_(), slanst_(), sqrt(), sscal_(), sstedc_(), ssterf_(), xerbla_(), and z___ref. Referenced by EMAN::PCAlarge::analyze(), EMAN::PCA::dopca_lan(), and EMAN::PCA::dopca_ooc(). 11686 {
11687 /* -- LAPACK driver routine (version 3.0) --
11688 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11689 Courant Institute, Argonne National Lab, and Rice University
11690 June 30, 1999
11691
11692
11693 Purpose
11694 =======
11695
11696 SSTEVD computes all eigenvalues and, optionally, eigenvectors of a
11697 real symmetric tridiagonal matrix. If eigenvectors are desired, it
11698 uses a divide and conquer algorithm.
11699
11700 The divide and conquer algorithm makes very mild assumptions about
11701 floating point arithmetic. It will work on machines with a guard
11702 digit in add/subtract, or on those binary machines without guard
11703 digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
11704 Cray-2. It could conceivably fail on hexadecimal or decimal machines
11705 without guard digits, but we know of none.
11706
11707 Arguments
11708 =========
11709
11710 JOBZ (input) CHARACTER*1
11711 = 'N': Compute eigenvalues only;
11712 = 'V': Compute eigenvalues and eigenvectors.
11713
11714 N (input) INTEGER
11715 The order of the matrix. N >= 0.
11716
11717 D (input/output) REAL array, dimension (N)
11718 On entry, the n diagonal elements of the tridiagonal matrix
11719 A.
11720 On exit, if INFO = 0, the eigenvalues in ascending order.
11721
11722 E (input/output) REAL array, dimension (N)
11723 On entry, the (n-1) subdiagonal elements of the tridiagonal
11724 matrix A, stored in elements 1 to N-1 of E; E(N) need not
11725 be set, but is used by the routine.
11726 On exit, the contents of E are destroyed.
11727
11728 Z (output) REAL array, dimension (LDZ, N)
11729 If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
11730 eigenvectors of the matrix A, with the i-th column of Z
11731 holding the eigenvector associated with D(i).
11732 If JOBZ = 'N', then Z is not referenced.
11733
11734 LDZ (input) INTEGER
11735 The leading dimension of the array Z. LDZ >= 1, and if
11736 JOBZ = 'V', LDZ >= max(1,N).
11737
11738 WORK (workspace/output) REAL array,
11739 dimension (LWORK)
11740 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
11741
11742 LWORK (input) INTEGER
11743 The dimension of the array WORK.
11744 If JOBZ = 'N' or N <= 1 then LWORK must be at least 1.
11745 If JOBZ = 'V' and N > 1 then LWORK must be at least
11746 ( 1 + 4*N + N**2 ).
11747
11748 If LWORK = -1, then a workspace query is assumed; the routine
11749 only calculates the optimal size of the WORK array, returns
11750 this value as the first entry of the WORK array, and no error
11751 message related to LWORK is issued by XERBLA.
11752
11753 IWORK (workspace/output) INTEGER array, dimension (LIWORK)
11754 On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
11755
11756 LIWORK (input) INTEGER
11757 The dimension of the array IWORK.
11758 If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1.
11759 If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N.
11760
11761 If LIWORK = -1, then a workspace query is assumed; the
11762 routine only calculates the optimal size of the IWORK array,
11763 returns this value as the first entry of the IWORK array, and
11764 no error message related to LIWORK is issued by XERBLA.
11765
11766 INFO (output) INTEGER
11767 = 0: successful exit
11768 < 0: if INFO = -i, the i-th argument had an illegal value
11769 > 0: if INFO = i, the algorithm failed to converge; i
11770 off-diagonal elements of E did not converge to zero.
11771
11772 =====================================================================
11773
11774
11775 Test the input parameters.
11776
11777 Parameter adjustments */
11778 /* Table of constant values */
11779 static integer c__1 = 1;
11780
11781 /* System generated locals */
11782 integer z_dim1, z_offset, i__1;
11783 real r__1;
11784 /* Builtin functions */
11785 // double sqrt(doublereal);
11786 /* Local variables */
11787 static real rmin, rmax, tnrm, sigma;
11788 extern logical lsame_(const char *, const char *);
11789 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
11790 static integer lwmin;
11791 static logical wantz;
11792 static integer iscale;
11793 extern doublereal slamch_(const char *);
11794 static real safmin;
11795 extern /* Subroutine */ int xerbla_(const char *, integer *);
11796 static real bignum;
11797 extern /* Subroutine */ int sstedc_(const char *, integer *, real *, real *,
11798 real *, integer *, real *, integer *, integer *, integer *,
11799 integer *);
11800 static integer liwmin;
11801 extern doublereal slanst_(const char *, integer *, real *, real *);
11802 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
11803 static real smlnum;
11804 static logical lquery;
11805 static real eps;
11806 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
11807
11808
11809 --d__;
11810 --e;
11811 z_dim1 = *ldz;
11812 z_offset = 1 + z_dim1 * 1;
11813 z__ -= z_offset;
11814 --work;
11815 --iwork;
11816
11817 /* Function Body */
11818 wantz = lsame_(jobz, "V");
11819 lquery = *lwork == -1 || *liwork == -1;
11820
11821 *info = 0;
11822 liwmin = 1;
11823 lwmin = 1;
11824 if (*n > 1 && wantz) {
11825 /* Computing 2nd power */
11826 i__1 = *n;
11827 lwmin = (*n << 2) + 1 + i__1 * i__1;
11828 liwmin = *n * 5 + 3;
11829 }
11830
11831 if (! (wantz || lsame_(jobz, "N"))) {
11832 *info = -1;
11833 } else if (*n < 0) {
11834 *info = -2;
11835 } else if (*ldz < 1 || wantz && *ldz < *n) {
11836 *info = -6;
11837 } else if (*lwork < lwmin && ! lquery) {
11838 *info = -8;
11839 } else if (*liwork < liwmin && ! lquery) {
11840 *info = -10;
11841 }
11842
11843 if (*info == 0) {
11844 work[1] = (real) lwmin;
11845 iwork[1] = liwmin;
11846 }
11847
11848 if (*info != 0) {
11849 i__1 = -(*info);
11850 xerbla_("SSTEVD", &i__1);
11851 return 0;
11852 } else if (lquery) {
11853 return 0;
11854 }
11855
11856 /* Quick return if possible */
11857
11858 if (*n == 0) {
11859 return 0;
11860 }
11861
11862 if (*n == 1) {
11863 if (wantz) {
11864 z___ref(1, 1) = 1.f;
11865 }
11866 return 0;
11867 }
11868
11869 /* Get machine constants. */
11870
11871 safmin = slamch_("Safe minimum");
11872 eps = slamch_("Precision");
11873 smlnum = safmin / eps;
11874 bignum = 1.f / smlnum;
11875 rmin = sqrt(smlnum);
11876 rmax = sqrt(bignum);
11877
11878 /* Scale matrix to allowable range, if necessary. */
11879
11880 iscale = 0;
11881 tnrm = slanst_("M", n, &d__[1], &e[1]);
11882 if (tnrm > 0.f && tnrm < rmin) {
11883 iscale = 1;
11884 sigma = rmin / tnrm;
11885 } else if (tnrm > rmax) {
11886 iscale = 1;
11887 sigma = rmax / tnrm;
11888 }
11889 if (iscale == 1) {
11890 sscal_(n, &sigma, &d__[1], &c__1);
11891 i__1 = *n - 1;
11892 sscal_(&i__1, &sigma, &e[1], &c__1);
11893 }
11894
11895 /* For eigenvalues only, call SSTERF. For eigenvalues and
11896 eigenvectors, call SSTEDC. */
11897
11898 if (! wantz) {
11899 ssterf_(n, &d__[1], &e[1], info);
11900 } else {
11901 sstedc_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], lwork,
11902 &iwork[1], liwork, info);
11903 }
11904
11905 /* If matrix was scaled, then rescale eigenvalues appropriately. */
11906
11907 if (iscale == 1) {
11908 r__1 = 1.f / sigma;
11909 sscal_(n, &r__1, &d__[1], &c__1);
11910 }
11911
11912 work[1] = (real) lwmin;
11913 iwork[1] = liwmin;
11914
11915 return 0;
11916
11917 /* End of SSTEVD */
11918
11919 } /* sstevd_ */
|
|
||||||||||||||||||||||||
|
Definition at line 8788 of file lapackblas.cpp. Referenced by sbdsqr_(), sstedc_(), and ssteqr_(). 08790 {
08791 /* System generated locals */
08792 integer i__1;
08793 /* Local variables */
08794 static integer i__, m;
08795 static real stemp;
08796 static integer ix, iy, mp1;
08797 /* interchanges two vectors.
08798 uses unrolled loops for increments equal to 1.
08799 jack dongarra, linpack, 3/11/78.
08800 modified 12/3/93, array(1) declarations changed to array(*)
08801 Parameter adjustments */
08802 --sy;
08803 --sx;
08804 /* Function Body */
08805 if (*n <= 0) {
08806 return 0;
08807 }
08808 if (*incx == 1 && *incy == 1) {
08809 goto L20;
08810 }
08811 /* code for unequal increments or equal increments not equal
08812 to 1 */
08813 ix = 1;
08814 iy = 1;
08815 if (*incx < 0) {
08816 ix = (-(*n) + 1) * *incx + 1;
08817 }
08818 if (*incy < 0) {
08819 iy = (-(*n) + 1) * *incy + 1;
08820 }
08821 i__1 = *n;
08822 for (i__ = 1; i__ <= i__1; ++i__) {
08823 stemp = sx[ix];
08824 sx[ix] = sy[iy];
08825 sy[iy] = stemp;
08826 ix += *incx;
08827 iy += *incy;
08828 /* L10: */
08829 }
08830 return 0;
08831 /* code for both increments equal to 1
08832 clean-up loop */
08833 L20:
08834 m = *n % 3;
08835 if (m == 0) {
08836 goto L40;
08837 }
08838 i__1 = m;
08839 for (i__ = 1; i__ <= i__1; ++i__) {
08840 stemp = sx[i__];
08841 sx[i__] = sy[i__];
08842 sy[i__] = stemp;
08843 /* L30: */
08844 }
08845 if (*n < 3) {
08846 return 0;
08847 }
08848 L40:
08849 mp1 = m + 1;
08850 i__1 = *n;
08851 for (i__ = mp1; i__ <= i__1; i__ += 3) {
08852 stemp = sx[i__];
08853 sx[i__] = sy[i__];
08854 sy[i__] = stemp;
08855 stemp = sx[i__ + 1];
08856 sx[i__ + 1] = sy[i__ + 1];
08857 sy[i__ + 1] = stemp;
08858 stemp = sx[i__ + 2];
08859 sx[i__ + 2] = sy[i__ + 2];
08860 sy[i__ + 2] = stemp;
08861 /* L50: */
08862 }
08863 return 0;
08864 } /* sswap_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 8869 of file lapackblas.cpp. References a_ref, c__0, c__1, c_n1, doublereal, f2cmax, ftnlen, ilaenv_(), integer, logical, lsame_(), real, slamch_(), slansy_(), slascl_(), sorgtr_(), sqrt(), sscal_(), ssteqr_(), ssterf_(), ssytrd_(), and xerbla_(). Referenced by EMAN::Util::coveig(). 08871 {
08872 /* -- LAPACK driver routine (version 3.0) --
08873 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
08874 Courant Institute, Argonne National Lab, and Rice University
08875 June 30, 1999
08876
08877
08878 Purpose
08879 =======
08880
08881 SSYEV computes all eigenvalues and, optionally, eigenvectors of a
08882 real symmetric matrix A.
08883
08884 Arguments
08885 =========
08886
08887 JOBZ (input) CHARACTER*1
08888 = 'N': Compute eigenvalues only;
08889 = 'V': Compute eigenvalues and eigenvectors.
08890
08891 UPLO (input) CHARACTER*1
08892 = 'U': Upper triangle of A is stored;
08893 = 'L': Lower triangle of A is stored.
08894
08895 N (input) INTEGER
08896 The order of the matrix A. N >= 0.
08897
08898 A (input/output) REAL array, dimension (LDA, N)
08899 On entry, the symmetric matrix A. If UPLO = 'U', the
08900 leading N-by-N upper triangular part of A contains the
08901 upper triangular part of the matrix A. If UPLO = 'L',
08902 the leading N-by-N lower triangular part of A contains
08903 the lower triangular part of the matrix A.
08904 On exit, if JOBZ = 'V', then if INFO = 0, A contains the
08905 orthonormal eigenvectors of the matrix A.
08906 If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
08907 or the upper triangle (if UPLO='U') of A, including the
08908 diagonal, is destroyed.
08909
08910 LDA (input) INTEGER
08911 The leading dimension of the array A. LDA >= f2cmax(1,N).
08912
08913 W (output) REAL array, dimension (N)
08914 If INFO = 0, the eigenvalues in ascending order.
08915
08916 WORK (workspace/output) REAL array, dimension (LWORK)
08917 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
08918
08919 LWORK (input) INTEGER
08920 The length of the array WORK. LWORK >= f2cmax(1,3*N-1).
08921 For optimal efficiency, LWORK >= (NB+2)*N,
08922 where NB is the blocksize for SSYTRD returned by ILAENV.
08923
08924 If LWORK = -1, then a workspace query is assumed; the routine
08925 only calculates the optimal size of the WORK array, returns
08926 this value as the first entry of the WORK array, and no error
08927 message related to LWORK is issued by XERBLA.
08928
08929 INFO (output) INTEGER
08930 = 0: successful exit
08931 < 0: if INFO = -i, the i-th argument had an illegal value
08932 > 0: if INFO = i, the algorithm failed to converge; i
08933 off-diagonal elements of an intermediate tridiagonal
08934 form did not converge to zero.
08935
08936 =====================================================================
08937
08938
08939 Test the input parameters.
08940
08941 Parameter adjustments */
08942 /* Table of constant values */
08943 static integer c__1 = 1;
08944 static integer c_n1 = -1;
08945 static integer c__0 = 0;
08946 static real c_b17 = 1.f;
08947
08948 /* System generated locals */
08949 integer a_dim1, a_offset, i__1, i__2;
08950 real r__1;
08951 /* Builtin functions */
08952 // double sqrt(doublereal);
08953 /* Local variables */
08954 static integer inde;
08955 static real anrm;
08956 static integer imax;
08957 static real rmin, rmax;
08958 static integer lopt;
08959 static real sigma;
08960 extern logical lsame_(const char *, const char *);
08961 static integer iinfo;
08962 extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
08963 static logical lower, wantz;
08964 static integer nb, iscale;
08965 extern doublereal slamch_(const char *);
08966 static real safmin;
08967 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
08968 integer *, integer *, ftnlen, ftnlen);
08969 extern /* Subroutine */ int xerbla_(const char *, integer *);
08970 static real bignum;
08971 extern /* Subroutine */ int slascl_(const char *, integer *, integer *, real *,
08972 real *, integer *, integer *, real *, integer *, integer *);
08973 static integer indtau, indwrk;
08974 extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
08975 extern doublereal slansy_(const char *, char *, integer *, real *, integer *,
08976 real *);
08977 static integer llwork;
08978 static real smlnum;
08979 static integer lwkopt;
08980 static logical lquery;
08981 extern /* Subroutine */ int sorgtr_(char *, integer *, real *, integer *,
08982 real *, real *, integer *, integer *), ssteqr_(const char *,
08983 integer *, real *, real *, real *, integer *, real *, integer *), ssytrd_(char *, integer *, real *, integer *, real *,
08984 real *, real *, real *, integer *, integer *);
08985 static real eps;
08986 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
08987
08988
08989 a_dim1 = *lda;
08990 a_offset = 1 + a_dim1 * 1;
08991 a -= a_offset;
08992 --w;
08993 --work;
08994
08995 /* Function Body */
08996 wantz = lsame_(jobz, "V");
08997 lower = lsame_(uplo, "L");
08998 lquery = *lwork == -1;
08999
09000 *info = 0;
09001 if (! (wantz || lsame_(jobz, "N"))) {
09002 *info = -1;
09003 } else if (! (lower || lsame_(uplo, "U"))) {
09004 *info = -2;
09005 } else if (*n < 0) {
09006 *info = -3;
09007 } else if (*lda < f2cmax(1,*n)) {
09008 *info = -5;
09009 } else /* if(complicated condition) */ {
09010 /* Computing MAX */
09011 i__1 = 1, i__2 = *n * 3 - 1;
09012 if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
09013 *info = -8;
09014 }
09015 }
09016
09017 if (*info == 0) {
09018 nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
09019 (ftnlen)1);
09020 /* Computing MAX */
09021 i__1 = 1, i__2 = (nb + 2) * *n;
09022 lwkopt = f2cmax(i__1,i__2);
09023 work[1] = (real) lwkopt;
09024 }
09025
09026 if (*info != 0) {
09027 i__1 = -(*info);
09028 xerbla_("SSYEV ", &i__1);
09029 return 0;
09030 } else if (lquery) {
09031 return 0;
09032 }
09033
09034 /* Quick return if possible */
09035
09036 if (*n == 0) {
09037 work[1] = 1.f;
09038 return 0;
09039 }
09040
09041 if (*n == 1) {
09042 w[1] = a_ref(1, 1);
09043 work[1] = 3.f;
09044 if (wantz) {
09045 a_ref(1, 1) = 1.f;
09046 }
09047 return 0;
09048 }
09049
09050 /* Get machine constants. */
09051
09052 safmin = slamch_("Safe minimum");
09053 eps = slamch_("Precision");
09054 smlnum = safmin / eps;
09055 bignum = 1.f / smlnum;
09056 rmin = sqrt(smlnum);
09057 rmax = sqrt(bignum);
09058
09059 /* Scale matrix to allowable range, if necessary. */
09060
09061 anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
09062 iscale = 0;
09063 if (anrm > 0.f && anrm < rmin) {
09064 iscale = 1;
09065 sigma = rmin / anrm;
09066 } else if (anrm > rmax) {
09067 iscale = 1;
09068 sigma = rmax / anrm;
09069 }
09070 if (iscale == 1) {
09071 slascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda,
09072 info);
09073 }
09074
09075 /* Call SSYTRD to reduce symmetric matrix to tridiagonal form. */
09076
09077 inde = 1;
09078 indtau = inde + *n;
09079 indwrk = indtau + *n;
09080 llwork = *lwork - indwrk + 1;
09081 ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
09082 work[indwrk], &llwork, &iinfo);
09083 lopt = static_cast<integer>( (*n << 1) + work[indwrk] );
09084
09085 /* For eigenvalues only, call SSTERF. For eigenvectors, first call
09086 SORGTR to generate the orthogonal matrix, then call SSTEQR. */
09087
09088 if (! wantz) {
09089 ssterf_(n, &w[1], &work[inde], info);
09090 } else {
09091 sorgtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], &
09092 llwork, &iinfo);
09093 ssteqr_(jobz, n, &w[1], &work[inde], &a[a_offset], lda, &work[indtau],
09094 info);
09095 }
09096
09097 /* If matrix was scaled, then rescale eigenvalues appropriately. */
09098
09099 if (iscale == 1) {
09100 if (*info == 0) {
09101 imax = *n;
09102 } else {
09103 imax = *info - 1;
09104 }
09105 r__1 = 1.f / sigma;
09106 sscal_(&imax, &r__1, &w[1], &c__1);
09107 }
09108
09109 /* Set WORK(1) to optimal workspace size. */
09110
09111 work[1] = (real) lwkopt;
09112
09113 return 0;
09114
09115 /* End of SSYEV */
09116
09117 } /* ssyev_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 9125 of file lapackblas.cpp. References a_ref, f2cmax, integer, logical, lsame_(), real, x, xerbla_(), and y. Referenced by slatrd_(), and ssytd2_(). 09128 {
09129 /* System generated locals */
09130 integer a_dim1, a_offset, i__1, i__2;
09131 /* Local variables */
09132 static integer info;
09133 static real temp1, temp2;
09134 static integer i__, j;
09135 extern logical lsame_(const char *, const char *);
09136 static integer ix, iy, jx, jy, kx, ky;
09137 extern /* Subroutine */ int xerbla_(const char *, integer *);
09138 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
09139 /* Purpose
09140 =======
09141 SSYMV performs the matrix-vector operation
09142 y := alpha*A*x + beta*y,
09143 where alpha and beta are scalars, x and y are n element vectors and
09144 A is an n by n symmetric matrix.
09145 Parameters
09146 ==========
09147 UPLO - CHARACTER*1.
09148 On entry, UPLO specifies whether the upper or lower
09149 triangular part of the array A is to be referenced as
09150 follows:
09151 UPLO = 'U' or 'u' Only the upper triangular part of A
09152 is to be referenced.
09153 UPLO = 'L' or 'l' Only the lower triangular part of A
09154 is to be referenced.
09155 Unchanged on exit.
09156 N - INTEGER.
09157 On entry, N specifies the order of the matrix A.
09158 N must be at least zero.
09159 Unchanged on exit.
09160 ALPHA - REAL .
09161 On entry, ALPHA specifies the scalar alpha.
09162 Unchanged on exit.
09163 A - REAL array of DIMENSION ( LDA, n ).
09164 Before entry with UPLO = 'U' or 'u', the leading n by n
09165 upper triangular part of the array A must contain the upper
09166 triangular part of the symmetric matrix and the strictly
09167 lower triangular part of A is not referenced.
09168 Before entry with UPLO = 'L' or 'l', the leading n by n
09169 lower triangular part of the array A must contain the lower
09170 triangular part of the symmetric matrix and the strictly
09171 upper triangular part of A is not referenced.
09172 Unchanged on exit.
09173 LDA - INTEGER.
09174 On entry, LDA specifies the first dimension of A as declared
09175 in the calling (sub) program. LDA must be at least
09176 f2cmax( 1, n ).
09177 Unchanged on exit.
09178 X - REAL array of dimension at least
09179 ( 1 + ( n - 1 )*abs( INCX ) ).
09180 Before entry, the incremented array X must contain the n
09181 element vector x.
09182 Unchanged on exit.
09183 INCX - INTEGER.
09184 On entry, INCX specifies the increment for the elements of
09185 X. INCX must not be zero.
09186 Unchanged on exit.
09187 BETA - REAL .
09188 On entry, BETA specifies the scalar beta. When BETA is
09189 supplied as zero then Y need not be set on input.
09190 Unchanged on exit.
09191 Y - REAL array of dimension at least
09192 ( 1 + ( n - 1 )*abs( INCY ) ).
09193 Before entry, the incremented array Y must contain the n
09194 element vector y. On exit, Y is overwritten by the updated
09195 vector y.
09196 INCY - INTEGER.
09197 On entry, INCY specifies the increment for the elements of
09198 Y. INCY must not be zero.
09199 Unchanged on exit.
09200 Level 2 Blas routine.
09201 -- Written on 22-October-1986.
09202 Jack Dongarra, Argonne National Lab.
09203 Jeremy Du Croz, Nag Central Office.
09204 Sven Hammarling, Nag Central Office.
09205 Richard Hanson, Sandia National Labs.
09206 Test the input parameters.
09207 Parameter adjustments */
09208 a_dim1 = *lda;
09209 a_offset = 1 + a_dim1 * 1;
09210 a -= a_offset;
09211 --x;
09212 --y;
09213 /* Function Body */
09214 info = 0;
09215 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
09216 info = 1;
09217 } else if (*n < 0) {
09218 info = 2;
09219 } else if (*lda < f2cmax(1,*n)) {
09220 info = 5;
09221 } else if (*incx == 0) {
09222 info = 7;
09223 } else if (*incy == 0) {
09224 info = 10;
09225 }
09226 if (info != 0) {
09227 xerbla_("SSYMV ", &info);
09228 return 0;
09229 }
09230 /* Quick return if possible. */
09231 if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
09232 return 0;
09233 }
09234 /* Set up the start points in X and Y. */
09235 if (*incx > 0) {
09236 kx = 1;
09237 } else {
09238 kx = 1 - (*n - 1) * *incx;
09239 }
09240 if (*incy > 0) {
09241 ky = 1;
09242 } else {
09243 ky = 1 - (*n - 1) * *incy;
09244 }
09245 /* Start the operations. In this version the elements of A are
09246 accessed sequentially with one pass through the triangular part
09247 of A.
09248 First form y := beta*y. */
09249 if (*beta != 1.f) {
09250 if (*incy == 1) {
09251 if (*beta == 0.f) {
09252 i__1 = *n;
09253 for (i__ = 1; i__ <= i__1; ++i__) {
09254 y[i__] = 0.f;
09255 /* L10: */
09256 }
09257 } else {
09258 i__1 = *n;
09259 for (i__ = 1; i__ <= i__1; ++i__) {
09260 y[i__] = *beta * y[i__];
09261 /* L20: */
09262 }
09263 }
09264 } else {
09265 iy = ky;
09266 if (*beta == 0.f) {
09267 i__1 = *n;
09268 for (i__ = 1; i__ <= i__1; ++i__) {
09269 y[iy] = 0.f;
09270 iy += *incy;
09271 /* L30: */
09272 }
09273 } else {
09274 i__1 = *n;
09275 for (i__ = 1; i__ <= i__1; ++i__) {
09276 y[iy] = *beta * y[iy];
09277 iy += *incy;
09278 /* L40: */
09279 }
09280 }
09281 }
09282 }
09283 if (*alpha == 0.f) {
09284 return 0;
09285 }
09286 if (lsame_(uplo, "U")) {
09287 /* Form y when A is stored in upper triangle. */
09288 if (*incx == 1 && *incy == 1) {
09289 i__1 = *n;
09290 for (j = 1; j <= i__1; ++j) {
09291 temp1 = *alpha * x[j];
09292 temp2 = 0.f;
09293 i__2 = j - 1;
09294 for (i__ = 1; i__ <= i__2; ++i__) {
09295 y[i__] += temp1 * a_ref(i__, j);
09296 temp2 += a_ref(i__, j) * x[i__];
09297 /* L50: */
09298 }
09299 y[j] = y[j] + temp1 * a_ref(j, j) + *alpha * temp2;
09300 /* L60: */
09301 }
09302 } else {
09303 jx = kx;
09304 jy = ky;
09305 i__1 = *n;
09306 for (j = 1; j <= i__1; ++j) {
09307 temp1 = *alpha * x[jx];
09308 temp2 = 0.f;
09309 ix = kx;
09310 iy = ky;
09311 i__2 = j - 1;
09312 for (i__ = 1; i__ <= i__2; ++i__) {
09313 y[iy] += temp1 * a_ref(i__, j);
09314 temp2 += a_ref(i__, j) * x[ix];
09315 ix += *incx;
09316 iy += *incy;
09317 /* L70: */
09318 }
09319 y[jy] = y[jy] + temp1 * a_ref(j, j) + *alpha * temp2;
09320 jx += *incx;
09321 jy += *incy;
09322 /* L80: */
09323 }
09324 }
09325 } else {
09326 /* Form y when A is stored in lower triangle. */
09327 if (*incx == 1 && *incy == 1) {
09328 i__1 = *n;
09329 for (j = 1; j <= i__1; ++j) {
09330 temp1 = *alpha * x[j];
09331 temp2 = 0.f;
09332 y[j] += temp1 * a_ref(j, j);
09333 i__2 = *n;
09334 for (i__ = j + 1; i__ <= i__2; ++i__) {
09335 y[i__] += temp1 * a_ref(i__, j);
09336 temp2 += a_ref(i__, j) * x[i__];
09337 /* L90: */
09338 }
09339 y[j] += *alpha * temp2;
09340 /* L100: */
09341 }
09342 } else {
09343 jx = kx;
09344 jy = ky;
09345 i__1 = *n;
09346 for (j = 1; j <= i__1; ++j) {
09347 temp1 = *alpha * x[jx];
09348 temp2 = 0.f;
09349 y[jy] += temp1 * a_ref(j, j);
09350 ix = jx;
09351 iy = jy;
09352 i__2 = *n;
09353 for (i__ = j + 1; i__ <= i__2; ++i__) {
09354 ix += *incx;
09355 iy += *incy;
09356 y[iy] += temp1 * a_ref(i__, j);
09357 temp2 += a_ref(i__, j) * x[ix];
09358 /* L110: */
09359 }
09360 y[jy] += *alpha * temp2;
09361 jx += *incx;
09362 jy += *incy;
09363 /* L120: */
09364 }
09365 }
09366 }
09367 return 0;
09368 /* End of SSYMV . */
09369 } /* ssymv_ */
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 9375 of file lapackblas.cpp. References a_ref, f2cmax, integer, logical, lsame_(), real, x, xerbla_(), and y. Referenced by ssytd2_(). 09377 {
09378 /* System generated locals */
09379 integer a_dim1, a_offset, i__1, i__2;
09380 /* Local variables */
09381 static integer info;
09382 static real temp1, temp2;
09383 static integer i__, j;
09384 extern logical lsame_(const char *, const char *);
09385 static integer ix, iy, jx, jy, kx, ky;
09386 extern /* Subroutine */ int xerbla_(const char *, integer *);
09387 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
09388 /* Purpose
09389 =======
09390 SSYR2 performs the symmetric rank 2 operation
09391 A := alpha*x*y' + alpha*y*x' + A,
09392 where alpha is a scalar, x and y are n element vectors and A is an n
09393 by n symmetric matrix.
09394 Parameters
09395 ==========
09396 UPLO - CHARACTER*1.
09397 On entry, UPLO specifies whether the upper or lower
09398 triangular part of the array A is to be referenced as
09399 follows:
09400 UPLO = 'U' or 'u' Only the upper triangular part of A
09401 is to be referenced.
09402 UPLO = 'L' or 'l' Only the lower triangular part of A
09403 is to be referenced.
09404 Unchanged on exit.
09405 N - INTEGER.
09406 On entry, N specifies the order of the matrix A.
09407 N must be at least zero.
09408 Unchanged on exit.
09409 ALPHA - REAL .
09410 On entry, ALPHA specifies the scalar alpha.
09411 Unchanged on exit.
09412 X - REAL array of dimension at least
09413 ( 1 + ( n - 1 )*abs( INCX ) ).
09414 Before entry, the incremented array X must contain the n
09415 element vector x.
09416 Unchanged on exit.
09417 INCX - INTEGER.
09418 On entry, INCX specifies the increment for the elements of
09419 X. INCX must not be zero.
09420 Unchanged on exit.
09421 Y - REAL array of dimension at least
09422 ( 1 + ( n - 1 )*abs( INCY ) ).
09423 Before entry, the incremented array Y must contain the n
09424 element vector y.
09425 Unchanged on exit.
09426 INCY - INTEGER.
09427 On entry, INCY specifies the increment for the elements of
09428 Y. INCY must not be zero.
09429 Unchanged on exit.
09430 A - REAL array of DIMENSION ( LDA, n ).
09431 Before entry with UPLO = 'U' or 'u', the leading n by n
09432 upper triangular part of the array A must contain the upper
09433 triangular part of the symmetric matrix and the strictly
09434 lower triangular part of A is not referenced. On exit, the
09435 upper triangular part of the array A is overwritten by the
09436 upper triangular part of the updated matrix.
09437 Before entry with UPLO = 'L' or 'l', the leading n by n
09438 lower triangular part of the array A must contain the lower
09439 triangular part of the symmetric matrix and the strictly
09440 upper triangular part of A is not referenced. On exit, the
09441 lower triangular part of the array A is overwritten by the
09442 lower triangular part of the updated matrix.
09443 LDA - INTEGER.
09444 On entry, LDA specifies the first dimension of A as declared
09445 in the calling (sub) program. LDA must be at least
09446 f2cmax( 1, n ).
09447 Unchanged on exit.
09448 Level 2 Blas routine.
09449 -- Written on 22-October-1986.
09450 Jack Dongarra, Argonne National Lab.
09451 Jeremy Du Croz, Nag Central Office.
09452 Sven Hammarling, Nag Central Office.
09453 Richard Hanson, Sandia National Labs.
09454 Test the input parameters.
09455 Parameter adjustments */
09456 --x;
09457 --y;
09458 a_dim1 = *lda;
09459 a_offset = 1 + a_dim1 * 1;
09460 a -= a_offset;
09461 /* Function Body */
09462 info = 0;
09463 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
09464 info = 1;
09465 } else if (*n < 0) {
09466 info = 2;
09467 } else if (*incx == 0) {
09468 info = 5;
09469 } else if (*incy == 0) {
09470 info = 7;
09471 } else if (*lda < f2cmax(1,*n)) {
09472 info = 9;
09473 }
09474 if (info != 0) {
09475 xerbla_("SSYR2 ", &info);
09476 return 0;
09477 }
09478 /* Quick return if possible. */
09479 if (*n == 0 || *alpha == 0.f) {
09480 return 0;
09481 }
09482 /* Set up the start points in X and Y if the increments are not both
09483 unity. */
09484 if (*incx != 1 || *incy != 1) {
09485 if (*incx > 0) {
09486 kx = 1;
09487 } else {
09488 kx = 1 - (*n - 1) * *incx;
09489 }
09490 if (*incy > 0) {
09491 ky = 1;
09492 } else {
09493 ky = 1 - (*n - 1) * *incy;
09494 }
09495 jx = kx;
09496 jy = ky;
09497 }
09498 /* Start the operations. In this version the elements of A are
09499 accessed sequentially with one pass through the triangular part
09500 of A. */
09501 if (lsame_(uplo, "U")) {
09502 /* Form A when A is stored in the upper triangle. */
09503 if (*incx == 1 && *incy == 1) {
09504 i__1 = *n;
09505 for (j = 1; j <= i__1; ++j) {
09506 if (x[j] != 0.f || y[j] != 0.f) {
09507 temp1 = *alpha * y[j];
09508 temp2 = *alpha * x[j];
09509 i__2 = j;
09510 for (i__ = 1; i__ <= i__2; ++i__) {
09511 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[
09512 i__] * temp2;
09513 /* L10: */
09514 }
09515 }
09516 /* L20: */
09517 }
09518 } else {
09519 i__1 = *n;
09520 for (j = 1; j <= i__1; ++j) {
09521 if (x[jx] != 0.f || y[jy] != 0.f) {
09522 temp1 = *alpha * y[jy];
09523 temp2 = *alpha * x[jx];
09524 ix = kx;
09525 iy = ky;
09526 i__2 = j;
09527 for (i__ = 1; i__ <= i__2; ++i__) {
09528 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy]
09529 * temp2;
09530 ix += *incx;
09531 iy += *incy;
09532 /* L30: */
09533 }
09534 }
09535 jx += *incx;
09536 jy += *incy;
09537 /* L40: */
09538 }
09539 }
09540 } else {
09541 /* Form A when A is stored in the lower triangle. */
09542 if (*incx == 1 && *incy == 1) {
09543 i__1 = *n;
09544 for (j = 1; j <= i__1; ++j) {
09545 if (x[j] != 0.f || y[j] != 0.f) {
09546 temp1 = *alpha * y[j];
09547 temp2 = *alpha * x[j];
09548 i__2 = *n;
09549 for (i__ = j; i__ <= i__2; ++i__) {
09550 a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[
09551 i__] * temp2;
09552 /* L50: */
09553 }
09554 }
09555 /* L60: */
09556 }
09557 } else {
09558 i__1 = *n;
09559 for (j = 1; j <= i__1; ++j) {
09560 if (x[jx] != 0.f || y[jy] != 0.f) {
09561 temp1 = *alpha * y[jy];
09562 temp2 = *alpha * x[jx];
09563 ix = jx;
09564 iy = jy;
09565 i__2 = *n;
09566 for (i__ = j; i__ <= i__2; ++i__) {
09567 a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy]
09568 * temp2;
09569 ix += *incx;
09570 iy += *incy;
09571 /* L70: */
09572 }
09573 }
09574 jx += *incx;
09575 jy += *incy;
09576 /* L80: */
09577 }
09578 }
09579 }
09580 return 0;
09581 /* End of SSYR2 . */
09582 } /* ssyr2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 9588 of file lapackblas.cpp. References a_ref, b, b_ref, c___ref, f2cmax, integer, logical, lsame_(), real, and xerbla_(). Referenced by ssytrd_(). 09591 {
09592 /* System generated locals */
09593 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
09594 i__3;
09595 /* Local variables */
09596 static integer info;
09597 static real temp1, temp2;
09598 static integer i__, j, l;
09599 extern logical lsame_(const char *, const char *);
09600 static integer nrowa;
09601 static logical upper;
09602 extern /* Subroutine */ int xerbla_(const char *, integer *);
09603 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
09604 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
09605 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
09606 /* Purpose
09607 =======
09608 SSYR2K performs one of the symmetric rank 2k operations
09609 C := alpha*A*B' + alpha*B*A' + beta*C,
09610 or
09611 C := alpha*A'*B + alpha*B'*A + beta*C,
09612 where alpha and beta are scalars, C is an n by n symmetric matrix
09613 and A and B are n by k matrices in the first case and k by n
09614 matrices in the second case.
09615 Parameters
09616 ==========
09617 UPLO - CHARACTER*1.
09618 On entry, UPLO specifies whether the upper or lower
09619 triangular part of the array C is to be referenced as
09620 follows:
09621 UPLO = 'U' or 'u' Only the upper triangular part of C
09622 is to be referenced.
09623 UPLO = 'L' or 'l' Only the lower triangular part of C
09624 is to be referenced.
09625 Unchanged on exit.
09626 TRANS - CHARACTER*1.
09627 On entry, TRANS specifies the operation to be performed as
09628 follows:
09629 TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' +
09630 beta*C.
09631 TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A +
09632 beta*C.
09633 TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A +
09634 beta*C.
09635 Unchanged on exit.
09636 N - INTEGER.
09637 On entry, N specifies the order of the matrix C. N must be
09638 at least zero.
09639 Unchanged on exit.
09640 K - INTEGER.
09641 On entry with TRANS = 'N' or 'n', K specifies the number
09642 of columns of the matrices A and B, and on entry with
09643 TRANS = 'T' or 't' or 'C' or 'c', K specifies the number
09644 of rows of the matrices A and B. K must be at least zero.
09645 Unchanged on exit.
09646 ALPHA - REAL .
09647 On entry, ALPHA specifies the scalar alpha.
09648 Unchanged on exit.
09649 A - REAL array of DIMENSION ( LDA, ka ), where ka is
09650 k when TRANS = 'N' or 'n', and is n otherwise.
09651 Before entry with TRANS = 'N' or 'n', the leading n by k
09652 part of the array A must contain the matrix A, otherwise
09653 the leading k by n part of the array A must contain the
09654 matrix A.
09655 Unchanged on exit.
09656 LDA - INTEGER.
09657 On entry, LDA specifies the first dimension of A as declared
09658 in the calling (sub) program. When TRANS = 'N' or 'n'
09659 then LDA must be at least f2cmax( 1, n ), otherwise LDA must
09660 be at least f2cmax( 1, k ).
09661 Unchanged on exit.
09662 B - REAL array of DIMENSION ( LDB, kb ), where kb is
09663 k when TRANS = 'N' or 'n', and is n otherwise.
09664 Before entry with TRANS = 'N' or 'n', the leading n by k
09665 part of the array B must contain the matrix B, otherwise
09666 the leading k by n part of the array B must contain the
09667 matrix B.
09668 Unchanged on exit.
09669 LDB - INTEGER.
09670 On entry, LDB specifies the first dimension of B as declared
09671 in the calling (sub) program. When TRANS = 'N' or 'n'
09672 then LDB must be at least f2cmax( 1, n ), otherwise LDB must
09673 be at least f2cmax( 1, k ).
09674 Unchanged on exit.
09675 BETA - REAL .
09676 On entry, BETA specifies the scalar beta.
09677 Unchanged on exit.
09678 C - REAL array of DIMENSION ( LDC, n ).
09679 Before entry with UPLO = 'U' or 'u', the leading n by n
09680 upper triangular part of the array C must contain the upper
09681 triangular part of the symmetric matrix and the strictly
09682 lower triangular part of C is not referenced. On exit, the
09683 upper triangular part of the array C is overwritten by the
09684 upper triangular part of the updated matrix.
09685 Before entry with UPLO = 'L' or 'l', the leading n by n
09686 lower triangular part of the array C must contain the lower
09687 triangular part of the symmetric matrix and the strictly
09688 upper triangular part of C is not referenced. On exit, the
09689 lower triangular part of the array C is overwritten by the
09690 lower triangular part of the updated matrix.
09691 LDC - INTEGER.
09692 On entry, LDC specifies the first dimension of C as declared
09693 in the calling (sub) program. LDC must be at least
09694 f2cmax( 1, n ).
09695 Unchanged on exit.
09696 Level 3 Blas routine.
09697 -- Written on 8-February-1989.
09698 Jack Dongarra, Argonne National Laboratory.
09699 Iain Duff, AERE Harwell.
09700 Jeremy Du Croz, Numerical Algorithms Group Ltd.
09701 Sven Hammarling, Numerical Algorithms Group Ltd.
09702 Test the input parameters.
09703 Parameter adjustments */
09704 a_dim1 = *lda;
09705 a_offset = 1 + a_dim1 * 1;
09706 a -= a_offset;
09707 b_dim1 = *ldb;
09708 b_offset = 1 + b_dim1 * 1;
09709 b -= b_offset;
09710 c_dim1 = *ldc;
09711 c_offset = 1 + c_dim1 * 1;
09712 c__ -= c_offset;
09713 /* Function Body */
09714 if (lsame_(trans, "N")) {
09715 nrowa = *n;
09716 } else {
09717 nrowa = *k;
09718 }
09719 upper = lsame_(uplo, "U");
09720 info = 0;
09721 if (! upper && ! lsame_(uplo, "L")) {
09722 info = 1;
09723 } else if (! lsame_(trans, "N") && ! lsame_(trans,
09724 "T") && ! lsame_(trans, "C")) {
09725 info = 2;
09726 } else if (*n < 0) {
09727 info = 3;
09728 } else if (*k < 0) {
09729 info = 4;
09730 } else if (*lda < f2cmax(1,nrowa)) {
09731 info = 7;
09732 } else if (*ldb < f2cmax(1,nrowa)) {
09733 info = 9;
09734 } else if (*ldc < f2cmax(1,*n)) {
09735 info = 12;
09736 }
09737 if (info != 0) {
09738 xerbla_("SSYR2K", &info);
09739 return 0;
09740 }
09741 /* Quick return if possible. */
09742 if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
09743 return 0;
09744 }
09745 /* And when alpha.eq.zero. */
09746 if (*alpha == 0.f) {
09747 if (upper) {
09748 if (*beta == 0.f) {
09749 i__1 = *n;
09750 for (j = 1; j <= i__1; ++j) {
09751 i__2 = j;
09752 for (i__ = 1; i__ <= i__2; ++i__) {
09753 c___ref(i__, j) = 0.f;
09754 /* L10: */
09755 }
09756 /* L20: */
09757 }
09758 } else {
09759 i__1 = *n;
09760 for (j = 1; j <= i__1; ++j) {
09761 i__2 = j;
09762 for (i__ = 1; i__ <= i__2; ++i__) {
09763 c___ref(i__, j) = *beta * c___ref(i__, j);
09764 /* L30: */
09765 }
09766 /* L40: */
09767 }
09768 }
09769 } else {
09770 if (*beta == 0.f) {
09771 i__1 = *n;
09772 for (j = 1; j <= i__1; ++j) {
09773 i__2 = *n;
09774 for (i__ = j; i__ <= i__2; ++i__) {
09775 c___ref(i__, j) = 0.f;
09776 /* L50: */
09777 }
09778 /* L60: */
09779 }
09780 } else {
09781 i__1 = *n;
09782 for (j = 1; j <= i__1; ++j) {
09783 i__2 = *n;
09784 for (i__ = j; i__ <= i__2; ++i__) {
09785 c___ref(i__, j) = *beta * c___ref(i__, j);
09786 /* L70: */
09787 }
09788 /* L80: */
09789 }
09790 }
09791 }
09792 return 0;
09793 }
09794 /* Start the operations. */
09795 if (lsame_(trans, "N")) {
09796 /* Form C := alpha*A*B' + alpha*B*A' + C. */
09797 if (upper) {
09798 i__1 = *n;
09799 for (j = 1; j <= i__1; ++j) {
09800 if (*beta == 0.f) {
09801 i__2 = j;
09802 for (i__ = 1; i__ <= i__2; ++i__) {
09803 c___ref(i__, j) = 0.f;
09804 /* L90: */
09805 }
09806 } else if (*beta != 1.f) {
09807 i__2 = j;
09808 for (i__ = 1; i__ <= i__2; ++i__) {
09809 c___ref(i__, j) = *beta * c___ref(i__, j);
09810 /* L100: */
09811 }
09812 }
09813 i__2 = *k;
09814 for (l = 1; l <= i__2; ++l) {
09815 if (a_ref(j, l) != 0.f || b_ref(j, l) != 0.f) {
09816 temp1 = *alpha * b_ref(j, l);
09817 temp2 = *alpha * a_ref(j, l);
09818 i__3 = j;
09819 for (i__ = 1; i__ <= i__3; ++i__) {
09820 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l)
09821 * temp1 + b_ref(i__, l) * temp2;
09822 /* L110: */
09823 }
09824 }
09825 /* L120: */
09826 }
09827 /* L130: */
09828 }
09829 } else {
09830 i__1 = *n;
09831 for (j = 1; j <= i__1; ++j) {
09832 if (*beta == 0.f) {
09833 i__2 = *n;
09834 for (i__ = j; i__ <= i__2; ++i__) {
09835 c___ref(i__, j) = 0.f;
09836 /* L140: */
09837 }
09838 } else if (*beta != 1.f) {
09839 i__2 = *n;
09840 for (i__ = j; i__ <= i__2; ++i__) {
09841 c___ref(i__, j) = *beta * c___ref(i__, j);
09842 /* L150: */
09843 }
09844 }
09845 i__2 = *k;
09846 for (l = 1; l <= i__2; ++l) {
09847 if (a_ref(j, l) != 0.f || b_ref(j, l) != 0.f) {
09848 temp1 = *alpha * b_ref(j, l);
09849 temp2 = *alpha * a_ref(j, l);
09850 i__3 = *n;
09851 for (i__ = j; i__ <= i__3; ++i__) {
09852 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l)
09853 * temp1 + b_ref(i__, l) * temp2;
09854 /* L160: */
09855 }
09856 }
09857 /* L170: */
09858 }
09859 /* L180: */
09860 }
09861 }
09862 } else {
09863 /* Form C := alpha*A'*B + alpha*B'*A + C. */
09864 if (upper) {
09865 i__1 = *n;
09866 for (j = 1; j <= i__1; ++j) {
09867 i__2 = j;
09868 for (i__ = 1; i__ <= i__2; ++i__) {
09869 temp1 = 0.f;
09870 temp2 = 0.f;
09871 i__3 = *k;
09872 for (l = 1; l <= i__3; ++l) {
09873 temp1 += a_ref(l, i__) * b_ref(l, j);
09874 temp2 += b_ref(l, i__) * a_ref(l, j);
09875 /* L190: */
09876 }
09877 if (*beta == 0.f) {
09878 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
09879 } else {
09880 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha *
09881 temp1 + *alpha * temp2;
09882 }
09883 /* L200: */
09884 }
09885 /* L210: */
09886 }
09887 } else {
09888 i__1 = *n;
09889 for (j = 1; j <= i__1; ++j) {
09890 i__2 = *n;
09891 for (i__ = j; i__ <= i__2; ++i__) {
09892 temp1 = 0.f;
09893 temp2 = 0.f;
09894 i__3 = *k;
09895 for (l = 1; l <= i__3; ++l) {
09896 temp1 += a_ref(l, i__) * b_ref(l, j);
09897 temp2 += b_ref(l, i__) * a_ref(l, j);
09898 /* L220: */
09899 }
09900 if (*beta == 0.f) {
09901 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
09902 } else {
09903 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha *
09904 temp1 + *alpha * temp2;
09905 }
09906 /* L230: */
09907 }
09908 /* L240: */
09909 }
09910 }
09911 }
09912 return 0;
09913 /* End of SSYR2K. */
09914 } /* ssyr2k_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 9922 of file lapackblas.cpp. References a_ref, c__1, doublereal, f2cmax, f2cmin, integer, logical, lsame_(), real, saxpy_(), sdot_(), slarfg_(), ssymv_(), ssyr2_(), and xerbla_(). Referenced by ssytrd_(). 09924 {
09925 /* -- LAPACK routine (version 3.0) --
09926 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
09927 Courant Institute, Argonne National Lab, and Rice University
09928 October 31, 1992
09929
09930
09931 Purpose
09932 =======
09933
09934 SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
09935 form T by an orthogonal similarity transformation: Q' * A * Q = T.
09936
09937 Arguments
09938 =========
09939
09940 UPLO (input) CHARACTER*1
09941 Specifies whether the upper or lower triangular part of the
09942 symmetric matrix A is stored:
09943 = 'U': Upper triangular
09944 = 'L': Lower triangular
09945
09946 N (input) INTEGER
09947 The order of the matrix A. N >= 0.
09948
09949 A (input/output) REAL array, dimension (LDA,N)
09950 On entry, the symmetric matrix A. If UPLO = 'U', the leading
09951 n-by-n upper triangular part of A contains the upper
09952 triangular part of the matrix A, and the strictly lower
09953 triangular part of A is not referenced. If UPLO = 'L', the
09954 leading n-by-n lower triangular part of A contains the lower
09955 triangular part of the matrix A, and the strictly upper
09956 triangular part of A is not referenced.
09957 On exit, if UPLO = 'U', the diagonal and first superdiagonal
09958 of A are overwritten by the corresponding elements of the
09959 tridiagonal matrix T, and the elements above the first
09960 superdiagonal, with the array TAU, represent the orthogonal
09961 matrix Q as a product of elementary reflectors; if UPLO
09962 = 'L', the diagonal and first subdiagonal of A are over-
09963 written by the corresponding elements of the tridiagonal
09964 matrix T, and the elements below the first subdiagonal, with
09965 the array TAU, represent the orthogonal matrix Q as a product
09966 of elementary reflectors. See Further Details.
09967
09968 LDA (input) INTEGER
09969 The leading dimension of the array A. LDA >= f2cmax(1,N).
09970
09971 D (output) REAL array, dimension (N)
09972 The diagonal elements of the tridiagonal matrix T:
09973 D(i) = A(i,i).
09974
09975 E (output) REAL array, dimension (N-1)
09976 The off-diagonal elements of the tridiagonal matrix T:
09977 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
09978
09979 TAU (output) REAL array, dimension (N-1)
09980 The scalar factors of the elementary reflectors (see Further
09981 Details).
09982
09983 INFO (output) INTEGER
09984 = 0: successful exit
09985 < 0: if INFO = -i, the i-th argument had an illegal value.
09986
09987 Further Details
09988 ===============
09989
09990 If UPLO = 'U', the matrix Q is represented as a product of elementary
09991 reflectors
09992
09993 Q = H(n-1) . . . H(2) H(1).
09994
09995 Each H(i) has the form
09996
09997 H(i) = I - tau * v * v'
09998
09999 where tau is a real scalar, and v is a real vector with
10000 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
10001 A(1:i-1,i+1), and tau in TAU(i).
10002
10003 If UPLO = 'L', the matrix Q is represented as a product of elementary
10004 reflectors
10005
10006 Q = H(1) H(2) . . . H(n-1).
10007
10008 Each H(i) has the form
10009
10010 H(i) = I - tau * v * v'
10011
10012 where tau is a real scalar, and v is a real vector with
10013 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
10014 and tau in TAU(i).
10015
10016 The contents of A on exit are illustrated by the following examples
10017 with n = 5:
10018
10019 if UPLO = 'U': if UPLO = 'L':
10020
10021 ( d e v2 v3 v4 ) ( d )
10022 ( d e v3 v4 ) ( e d )
10023 ( d e v4 ) ( v1 e d )
10024 ( d e ) ( v1 v2 e d )
10025 ( d ) ( v1 v2 v3 e d )
10026
10027 where d and e denote diagonal and off-diagonal elements of T, and vi
10028 denotes an element of the vector defining H(i).
10029
10030 =====================================================================
10031
10032
10033 Test the input parameters
10034
10035 Parameter adjustments */
10036 /* Table of constant values */
10037 static integer c__1 = 1;
10038 static real c_b8 = 0.f;
10039 static real c_b14 = -1.f;
10040
10041 /* System generated locals */
10042 integer a_dim1, a_offset, i__1, i__2, i__3;
10043 /* Local variables */
10044 static real taui;
10045 extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
10046 static integer i__;
10047 extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *,
10048 integer *, real *, integer *, real *, integer *);
10049 static real alpha;
10050 extern logical lsame_(const char *, const char *);
10051 static logical upper;
10052 extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
10053 real *, integer *), ssymv_(const char *, integer *, real *, real *,
10054 integer *, real *, integer *, real *, real *, integer *),
10055 xerbla_(const char *, integer *), slarfg_(integer *, real *,
10056 real *, integer *, real *);
10057 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
10058
10059
10060 a_dim1 = *lda;
10061 a_offset = 1 + a_dim1 * 1;
10062 a -= a_offset;
10063 --d__;
10064 --e;
10065 --tau;
10066
10067 /* Function Body */
10068 *info = 0;
10069 upper = lsame_(uplo, "U");
10070 if (! upper && ! lsame_(uplo, "L")) {
10071 *info = -1;
10072 } else if (*n < 0) {
10073 *info = -2;
10074 } else if (*lda < f2cmax(1,*n)) {
10075 *info = -4;
10076 }
10077 if (*info != 0) {
10078 i__1 = -(*info);
10079 xerbla_("SSYTD2", &i__1);
10080 return 0;
10081 }
10082
10083 /* Quick return if possible */
10084
10085 if (*n <= 0) {
10086 return 0;
10087 }
10088
10089 if (upper) {
10090
10091 /* Reduce the upper triangle of A */
10092
10093 for (i__ = *n - 1; i__ >= 1; --i__) {
10094
10095 /* Generate elementary reflector H(i) = I - tau * v * v'
10096 to annihilate A(1:i-1,i+1) */
10097
10098 slarfg_(&i__, &a_ref(i__, i__ + 1), &a_ref(1, i__ + 1), &c__1, &
10099 taui);
10100 e[i__] = a_ref(i__, i__ + 1);
10101
10102 if (taui != 0.f) {
10103
10104 /* Apply H(i) from both sides to A(1:i,1:i) */
10105
10106 a_ref(i__, i__ + 1) = 1.f;
10107
10108 /* Compute x := tau * A * v storing x in TAU(1:i) */
10109
10110 ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a_ref(1, i__ +
10111 1), &c__1, &c_b8, &tau[1], &c__1);
10112
10113 /* Compute w := x - 1/2 * tau * (x'*v) * v */
10114
10115 alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a_ref(1,
10116 i__ + 1), &c__1);
10117 saxpy_(&i__, &alpha, &a_ref(1, i__ + 1), &c__1, &tau[1], &
10118 c__1);
10119
10120 /* Apply the transformation as a rank-2 update:
10121 A := A - v * w' - w * v' */
10122
10123 ssyr2_(uplo, &i__, &c_b14, &a_ref(1, i__ + 1), &c__1, &tau[1],
10124 &c__1, &a[a_offset], lda);
10125
10126 a_ref(i__, i__ + 1) = e[i__];
10127 }
10128 d__[i__ + 1] = a_ref(i__ + 1, i__ + 1);
10129 tau[i__] = taui;
10130 /* L10: */
10131 }
10132 d__[1] = a_ref(1, 1);
10133 } else {
10134
10135 /* Reduce the lower triangle of A */
10136
10137 i__1 = *n - 1;
10138 for (i__ = 1; i__ <= i__1; ++i__) {
10139
10140 /* Generate elementary reflector H(i) = I - tau * v * v'
10141 to annihilate A(i+2:n,i)
10142
10143 Computing MIN */
10144 i__2 = i__ + 2;
10145 i__3 = *n - i__;
10146 slarfg_(&i__3, &a_ref(i__ + 1, i__), &a_ref(f2cmin(i__2,*n), i__), &
10147 c__1, &taui);
10148 e[i__] = a_ref(i__ + 1, i__);
10149
10150 if (taui != 0.f) {
10151
10152 /* Apply H(i) from both sides to A(i+1:n,i+1:n) */
10153
10154 a_ref(i__ + 1, i__) = 1.f;
10155
10156 /* Compute x := tau * A * v storing y in TAU(i:n-1) */
10157
10158 i__2 = *n - i__;
10159 ssymv_(uplo, &i__2, &taui, &a_ref(i__ + 1, i__ + 1), lda, &
10160 a_ref(i__ + 1, i__), &c__1, &c_b8, &tau[i__], &c__1);
10161
10162 /* Compute w := x - 1/2 * tau * (x'*v) * v */
10163
10164 i__2 = *n - i__;
10165 alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a_ref(
10166 i__ + 1, i__), &c__1);
10167 i__2 = *n - i__;
10168 saxpy_(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &tau[i__],
10169 &c__1);
10170
10171 /* Apply the transformation as a rank-2 update:
10172 A := A - v * w' - w * v' */
10173
10174 i__2 = *n - i__;
10175 ssyr2_(uplo, &i__2, &c_b14, &a_ref(i__ + 1, i__), &c__1, &tau[
10176 i__], &c__1, &a_ref(i__ + 1, i__ + 1), lda)
10177 ;
10178
10179 a_ref(i__ + 1, i__) = e[i__];
10180 }
10181 d__[i__] = a_ref(i__, i__);
10182 tau[i__] = taui;
10183 /* L20: */
10184 }
10185 d__[*n] = a_ref(*n, *n);
10186 }
10187
10188 return 0;
10189
10190 /* End of SSYTD2 */
10191
10192 } /* ssytd2_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 10200 of file lapackblas.cpp. References a_ref, c__1, c__2, c__3, c_n1, f2cmax, ftnlen, ilaenv_(), integer, logical, lsame_(), nx, real, slatrd_(), ssyr2k_(), ssytd2_(), and xerbla_(). Referenced by ssyev_(). 10203 {
10204 /* -- LAPACK routine (version 3.0) --
10205 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
10206 Courant Institute, Argonne National Lab, and Rice University
10207 June 30, 1999
10208
10209
10210 Purpose
10211 =======
10212
10213 SSYTRD reduces a real symmetric matrix A to real symmetric
10214 tridiagonal form T by an orthogonal similarity transformation:
10215 Q**T * A * Q = T.
10216
10217 Arguments
10218 =========
10219
10220 UPLO (input) CHARACTER*1
10221 = 'U': Upper triangle of A is stored;
10222 = 'L': Lower triangle of A is stored.
10223
10224 N (input) INTEGER
10225 The order of the matrix A. N >= 0.
10226
10227 A (input/output) REAL array, dimension (LDA,N)
10228 On entry, the symmetric matrix A. If UPLO = 'U', the leading
10229 N-by-N upper triangular part of A contains the upper
10230 triangular part of the matrix A, and the strictly lower
10231 triangular part of A is not referenced. If UPLO = 'L', the
10232 leading N-by-N lower triangular part of A contains the lower
10233 triangular part of the matrix A, and the strictly upper
10234 triangular part of A is not referenced.
10235 On exit, if UPLO = 'U', the diagonal and first superdiagonal
10236 of A are overwritten by the corresponding elements of the
10237 tridiagonal matrix T, and the elements above the first
10238 superdiagonal, with the array TAU, represent the orthogonal
10239 matrix Q as a product of elementary reflectors; if UPLO
10240 = 'L', the diagonal and first subdiagonal of A are over-
10241 written by the corresponding elements of the tridiagonal
10242 matrix T, and the elements below the first subdiagonal, with
10243 the array TAU, represent the orthogonal matrix Q as a product
10244 of elementary reflectors. See Further Details.
10245
10246 LDA (input) INTEGER
10247 The leading dimension of the array A. LDA >= f2cmax(1,N).
10248
10249 D (output) REAL array, dimension (N)
10250 The diagonal elements of the tridiagonal matrix T:
10251 D(i) = A(i,i).
10252
10253 E (output) REAL array, dimension (N-1)
10254 The off-diagonal elements of the tridiagonal matrix T:
10255 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
10256
10257 TAU (output) REAL array, dimension (N-1)
10258 The scalar factors of the elementary reflectors (see Further
10259 Details).
10260
10261 WORK (workspace/output) REAL array, dimension (LWORK)
10262 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
10263
10264 LWORK (input) INTEGER
10265 The dimension of the array WORK. LWORK >= 1.
10266 For optimum performance LWORK >= N*NB, where NB is the
10267 optimal blocksize.
10268
10269 If LWORK = -1, then a workspace query is assumed; the routine
10270 only calculates the optimal size of the WORK array, returns
10271 this value as the first entry of the WORK array, and no error
10272 message related to LWORK is issued by XERBLA.
10273
10274 INFO (output) INTEGER
10275 = 0: successful exit
10276 < 0: if INFO = -i, the i-th argument had an illegal value
10277
10278 Further Details
10279 ===============
10280
10281 If UPLO = 'U', the matrix Q is represented as a product of elementary
10282 reflectors
10283
10284 Q = H(n-1) . . . H(2) H(1).
10285
10286 Each H(i) has the form
10287
10288 H(i) = I - tau * v * v'
10289
10290 where tau is a real scalar, and v is a real vector with
10291 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
10292 A(1:i-1,i+1), and tau in TAU(i).
10293
10294 If UPLO = 'L', the matrix Q is represented as a product of elementary
10295 reflectors
10296
10297 Q = H(1) H(2) . . . H(n-1).
10298
10299 Each H(i) has the form
10300
10301 H(i) = I - tau * v * v'
10302
10303 where tau is a real scalar, and v is a real vector with
10304 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
10305 and tau in TAU(i).
10306
10307 The contents of A on exit are illustrated by the following examples
10308 with n = 5:
10309
10310 if UPLO = 'U': if UPLO = 'L':
10311
10312 ( d e v2 v3 v4 ) ( d )
10313 ( d e v3 v4 ) ( e d )
10314 ( d e v4 ) ( v1 e d )
10315 ( d e ) ( v1 v2 e d )
10316 ( d ) ( v1 v2 v3 e d )
10317
10318 where d and e denote diagonal and off-diagonal elements of T, and vi
10319 denotes an element of the vector defining H(i).
10320
10321 =====================================================================
10322
10323
10324 Test the input parameters
10325
10326 Parameter adjustments */
10327 /* Table of constant values */
10328 static integer c__1 = 1;
10329 static integer c_n1 = -1;
10330 static integer c__3 = 3;
10331 static integer c__2 = 2;
10332 static real c_b22 = -1.f;
10333 static real c_b23 = 1.f;
10334
10335 /* System generated locals */
10336 integer a_dim1, a_offset, i__1, i__2, i__3;
10337 /* Local variables */
10338 static integer i__, j;
10339 extern logical lsame_(const char *, const char *);
10340 static integer nbmin, iinfo;
10341 static logical upper;
10342 static integer nb, kk;
10343 extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *,
10344 real *, real *, real *, integer *), ssyr2k_(char *, const char *
10345 , integer *, integer *, real *, real *, integer *, real *,
10346 integer *, real *, real *, integer *);
10347 static integer nx;
10348 extern /* Subroutine */ int xerbla_(const char *, integer *);
10349 extern integer ilaenv_(integer *, const char *, const char *, integer *, integer *,
10350 integer *, integer *, ftnlen, ftnlen);
10351 extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *,
10352 integer *, real *, real *, real *, integer *);
10353 static integer ldwork, lwkopt;
10354 static logical lquery;
10355 static integer iws;
10356 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
10357
10358
10359 a_dim1 = *lda;
10360 a_offset = 1 + a_dim1 * 1;
10361 a -= a_offset;
10362 --d__;
10363 --e;
10364 --tau;
10365 --work;
10366
10367 /* Function Body */
10368 *info = 0;
10369 upper = lsame_(uplo, "U");
10370 lquery = *lwork == -1;
10371 if (! upper && ! lsame_(uplo, "L")) {
10372 *info = -1;
10373 } else if (*n < 0) {
10374 *info = -2;
10375 } else if (*lda < f2cmax(1,*n)) {
10376 *info = -4;
10377 } else if (*lwork < 1 && ! lquery) {
10378 *info = -9;
10379 }
10380
10381 if (*info == 0) {
10382
10383 /* Determine the block size. */
10384
10385 nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
10386 (ftnlen)1);
10387 lwkopt = *n * nb;
10388 work[1] = (real) lwkopt;
10389 }
10390
10391 if (*info != 0) {
10392 i__1 = -(*info);
10393 xerbla_("SSYTRD", &i__1);
10394 return 0;
10395 } else if (lquery) {
10396 return 0;
10397 }
10398
10399 /* Quick return if possible */
10400
10401 if (*n == 0) {
10402 work[1] = 1.f;
10403 return 0;
10404 }
10405
10406 nx = *n;
10407 iws = 1;
10408 if (nb > 1 && nb < *n) {
10409
10410 /* Determine when to cross over from blocked to unblocked code
10411 (last block is always handled by unblocked code).
10412
10413 Computing MAX */
10414 i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, &
10415 c_n1, (ftnlen)6, (ftnlen)1);
10416 nx = f2cmax(i__1,i__2);
10417 if (nx < *n) {
10418
10419 /* Determine if workspace is large enough for blocked code. */
10420
10421 ldwork = *n;
10422 iws = ldwork * nb;
10423 if (*lwork < iws) {
10424
10425 /* Not enough workspace to use optimal NB: determine the
10426 minimum value of NB, and reduce NB or force use of
10427 unblocked code by setting NX = N.
10428
10429 Computing MAX */
10430 i__1 = *lwork / ldwork;
10431 nb = f2cmax(i__1,1);
10432 nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
10433 (ftnlen)6, (ftnlen)1);
10434 if (nb < nbmin) {
10435 nx = *n;
10436 }
10437 }
10438 } else {
10439 nx = *n;
10440 }
10441 } else {
10442 nb = 1;
10443 }
10444
10445 if (upper) {
10446
10447 /* Reduce the upper triangle of A.
10448 Columns 1:kk are handled by the unblocked method. */
10449
10450 kk = *n - (*n - nx + nb - 1) / nb * nb;
10451 i__1 = kk + 1;
10452 i__2 = -nb;
10453 for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
10454 i__2) {
10455
10456 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
10457 matrix W which is needed to update the unreduced part of
10458 the matrix */
10459
10460 i__3 = i__ + nb - 1;
10461 slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
10462 work[1], &ldwork);
10463
10464 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an
10465 update of the form: A := A - V*W' - W*V' */
10466
10467 i__3 = i__ - 1;
10468 ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__),
10469 lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);
10470
10471 /* Copy superdiagonal elements back into A, and diagonal
10472 elements into D */
10473
10474 i__3 = i__ + nb - 1;
10475 for (j = i__; j <= i__3; ++j) {
10476 a_ref(j - 1, j) = e[j - 1];
10477 d__[j] = a_ref(j, j);
10478 /* L10: */
10479 }
10480 /* L20: */
10481 }
10482
10483 /* Use unblocked code to reduce the last or only block */
10484
10485 ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
10486 } else {
10487
10488 /* Reduce the lower triangle of A */
10489
10490 i__2 = *n - nx;
10491 i__1 = nb;
10492 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
10493
10494 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
10495 matrix W which is needed to update the unreduced part of
10496 the matrix */
10497
10498 i__3 = *n - i__ + 1;
10499 slatrd_(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[
10500 i__], &work[1], &ldwork);
10501
10502 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
10503 an update of the form: A := A - V*W' - W*V' */
10504
10505 i__3 = *n - i__ - nb + 1;
10506 ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb,
10507 i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ +
10508 nb, i__ + nb), lda);
10509
10510 /* Copy subdiagonal elements back into A, and diagonal
10511 elements into D */
10512
10513 i__3 = i__ + nb - 1;
10514 for (j = i__; j <= i__3; ++j) {
10515 a_ref(j + 1, j) = e[j];
10516 d__[j] = a_ref(j, j);
10517 /* L30: */
10518 }
10519 /* L40: */
10520 }
10521
10522 /* Use unblocked code to reduce the last or only block */
10523
10524 i__1 = *n - i__ + 1;
10525 ssytd2_(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[
10526 i__], &iinfo);
10527 }
10528
10529 work[1] = (real) lwkopt;
10530 return 0;
10531
10532 /* End of SSYTRD */
10533
10534 } /* ssytrd_ */
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 10542 of file lapackblas.cpp. References a_ref, b, b_ref, diag, f2cmax, integer, logical, lsame_(), real, and xerbla_(). Referenced by slarfb_(). 10545 {
10546 /* System generated locals */
10547 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
10548 /* Local variables */
10549 static integer info;
10550 static real temp;
10551 static integer i__, j, k;
10552 static logical lside;
10553 extern logical lsame_(const char *, const char *);
10554 static integer nrowa;
10555 static logical upper;
10556 extern /* Subroutine */ int xerbla_(const char *, integer *);
10557 static logical nounit;
10558 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
10559 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
10560 /* Purpose
10561 =======
10562 STRMM performs one of the matrix-matrix operations
10563 B := alpha*op( A )*B, or B := alpha*B*op( A ),
10564 where alpha is a scalar, B is an m by n matrix, A is a unit, or
10565 non-unit, upper or lower triangular matrix and op( A ) is one of
10566 op( A ) = A or op( A ) = A'.
10567 Parameters
10568 ==========
10569 SIDE - CHARACTER*1.
10570 On entry, SIDE specifies whether op( A ) multiplies B from
10571 the left or right as follows:
10572 SIDE = 'L' or 'l' B := alpha*op( A )*B.
10573 SIDE = 'R' or 'r' B := alpha*B*op( A ).
10574 Unchanged on exit.
10575 UPLO - CHARACTER*1.
10576 On entry, UPLO specifies whether the matrix A is an upper or
10577 lower triangular matrix as follows:
10578 UPLO = 'U' or 'u' A is an upper triangular matrix.
10579 UPLO = 'L' or 'l' A is a lower triangular matrix.
10580 Unchanged on exit.
10581 TRANSA - CHARACTER*1.
10582 On entry, TRANSA specifies the form of op( A ) to be used in
10583 the matrix multiplication as follows:
10584 TRANSA = 'N' or 'n' op( A ) = A.
10585 TRANSA = 'T' or 't' op( A ) = A'.
10586 TRANSA = 'C' or 'c' op( A ) = A'.
10587 Unchanged on exit.
10588 DIAG - CHARACTER*1.
10589 On entry, DIAG specifies whether or not A is unit triangular
10590 as follows:
10591 DIAG = 'U' or 'u' A is assumed to be unit triangular.
10592 DIAG = 'N' or 'n' A is not assumed to be unit
10593 triangular.
10594 Unchanged on exit.
10595 M - INTEGER.
10596 On entry, M specifies the number of rows of B. M must be at
10597 least zero.
10598 Unchanged on exit.
10599 N - INTEGER.
10600 On entry, N specifies the number of columns of B. N must be
10601 at least zero.
10602 Unchanged on exit.
10603 ALPHA - REAL .
10604 On entry, ALPHA specifies the scalar alpha. When alpha is
10605 zero then A is not referenced and B need not be set before
10606 entry.
10607 Unchanged on exit.
10608 A - REAL array of DIMENSION ( LDA, k ), where k is m
10609 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
10610 Before entry with UPLO = 'U' or 'u', the leading k by k
10611 upper triangular part of the array A must contain the upper
10612 triangular matrix and the strictly lower triangular part of
10613 A is not referenced.
10614 Before entry with UPLO = 'L' or 'l', the leading k by k
10615 lower triangular part of the array A must contain the lower
10616 triangular matrix and the strictly upper triangular part of
10617 A is not referenced.
10618 Note that when DIAG = 'U' or 'u', the diagonal elements of
10619 A are not referenced either, but are assumed to be unity.
10620 Unchanged on exit.
10621 LDA - INTEGER.
10622 On entry, LDA specifies the first dimension of A as declared
10623 in the calling (sub) program. When SIDE = 'L' or 'l' then
10624 LDA must be at least f2cmax( 1, m ), when SIDE = 'R' or 'r'
10625 then LDA must be at least f2cmax( 1, n ).
10626 Unchanged on exit.
10627 B - REAL array of DIMENSION ( LDB, n ).
10628 Before entry, the leading m by n part of the array B must
10629 contain the matrix B, and on exit is overwritten by the
10630 transformed matrix.
10631 LDB - INTEGER.
10632 On entry, LDB specifies the first dimension of B as declared
10633 in the calling (sub) program. LDB must be at least
10634 f2cmax( 1, m ).
10635 Unchanged on exit.
10636 Level 3 Blas routine.
10637 -- Written on 8-February-1989.
10638 Jack Dongarra, Argonne National Laboratory.
10639 Iain Duff, AERE Harwell.
10640 Jeremy Du Croz, Numerical Algorithms Group Ltd.
10641 Sven Hammarling, Numerical Algorithms Group Ltd.
10642 Test the input parameters.
10643 Parameter adjustments */
10644 a_dim1 = *lda;
10645 a_offset = 1 + a_dim1 * 1;
10646 a -= a_offset;
10647 b_dim1 = *ldb;
10648 b_offset = 1 + b_dim1 * 1;
10649 b -= b_offset;
10650 /* Function Body */
10651 lside = lsame_(side, "L");
10652 if (lside) {
10653 nrowa = *m;
10654 } else {
10655 nrowa = *n;
10656 }
10657 nounit = lsame_(diag, "N");
10658 upper = lsame_(uplo, "U");
10659 info = 0;
10660 if (! lside && ! lsame_(side, "R")) {
10661 info = 1;
10662 } else if (! upper && ! lsame_(uplo, "L")) {
10663 info = 2;
10664 } else if (! lsame_(transa, "N") && ! lsame_(transa,
10665 "T") && ! lsame_(transa, "C")) {
10666 info = 3;
10667 } else if (! lsame_(diag, "U") && ! lsame_(diag,
10668 "N")) {
10669 info = 4;
10670 } else if (*m < 0) {
10671 info = 5;
10672 } else if (*n < 0) {
10673 info = 6;
10674 } else if (*lda < f2cmax(1,nrowa)) {
10675 info = 9;
10676 } else if (*ldb < f2cmax(1,*m)) {
10677 info = 11;
10678 }
10679 if (info != 0) {
10680 xerbla_("STRMM ", &info);
10681 return 0;
10682 }
10683 /* Quick return if possible. */
10684 if (*n == 0) {
10685 return 0;
10686 }
10687 /* And when alpha.eq.zero. */
10688 if (*alpha == 0.f) {
10689 i__1 = *n;
10690 for (j = 1; j <= i__1; ++j) {
10691 i__2 = *m;
10692 for (i__ = 1; i__ <= i__2; ++i__) {
10693 b_ref(i__, j) = 0.f;
10694 /* L10: */
10695 }
10696 /* L20: */
10697 }
10698 return 0;
10699 }
10700 /* Start the operations. */
10701 if (lside) {
10702 if (lsame_(transa, "N")) {
10703 /* Form B := alpha*A*B. */
10704 if (upper) {
10705 i__1 = *n;
10706 for (j = 1; j <= i__1; ++j) {
10707 i__2 = *m;
10708 for (k = 1; k <= i__2; ++k) {
10709 if (b_ref(k, j) != 0.f) {
10710 temp = *alpha * b_ref(k, j);
10711 i__3 = k - 1;
10712 for (i__ = 1; i__ <= i__3; ++i__) {
10713 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
10714 i__, k);
10715 /* L30: */
10716 }
10717 if (nounit) {
10718 temp *= a_ref(k, k);
10719 }
10720 b_ref(k, j) = temp;
10721 }
10722 /* L40: */
10723 }
10724 /* L50: */
10725 }
10726 } else {
10727 i__1 = *n;
10728 for (j = 1; j <= i__1; ++j) {
10729 for (k = *m; k >= 1; --k) {
10730 if (b_ref(k, j) != 0.f) {
10731 temp = *alpha * b_ref(k, j);
10732 b_ref(k, j) = temp;
10733 if (nounit) {
10734 b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
10735 }
10736 i__2 = *m;
10737 for (i__ = k + 1; i__ <= i__2; ++i__) {
10738 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
10739 i__, k);
10740 /* L60: */
10741 }
10742 }
10743 /* L70: */
10744 }
10745 /* L80: */
10746 }
10747 }
10748 } else {
10749 /* Form B := alpha*A'*B. */
10750 if (upper) {
10751 i__1 = *n;
10752 for (j = 1; j <= i__1; ++j) {
10753 for (i__ = *m; i__ >= 1; --i__) {
10754 temp = b_ref(i__, j);
10755 if (nounit) {
10756 temp *= a_ref(i__, i__);
10757 }
10758 i__2 = i__ - 1;
10759 for (k = 1; k <= i__2; ++k) {
10760 temp += a_ref(k, i__) * b_ref(k, j);
10761 /* L90: */
10762 }
10763 b_ref(i__, j) = *alpha * temp;
10764 /* L100: */
10765 }
10766 /* L110: */
10767 }
10768 } else {
10769 i__1 = *n;
10770 for (j = 1; j <= i__1; ++j) {
10771 i__2 = *m;
10772 for (i__ = 1; i__ <= i__2; ++i__) {
10773 temp = b_ref(i__, j);
10774 if (nounit) {
10775 temp *= a_ref(i__, i__);
10776 }
10777 i__3 = *m;
10778 for (k = i__ + 1; k <= i__3; ++k) {
10779 temp += a_ref(k, i__) * b_ref(k, j);
10780 /* L120: */
10781 }
10782 b_ref(i__, j) = *alpha * temp;
10783 /* L130: */
10784 }
10785 /* L140: */
10786 }
10787 }
10788 }
10789 } else {
10790 if (lsame_(transa, "N")) {
10791 /* Form B := alpha*B*A. */
10792 if (upper) {
10793 for (j = *n; j >= 1; --j) {
10794 temp = *alpha;
10795 if (nounit) {
10796 temp *= a_ref(j, j);
10797 }
10798 i__1 = *m;
10799 for (i__ = 1; i__ <= i__1; ++i__) {
10800 b_ref(i__, j) = temp * b_ref(i__, j);
10801 /* L150: */
10802 }
10803 i__1 = j - 1;
10804 for (k = 1; k <= i__1; ++k) {
10805 if (a_ref(k, j) != 0.f) {
10806 temp = *alpha * a_ref(k, j);
10807 i__2 = *m;
10808 for (i__ = 1; i__ <= i__2; ++i__) {
10809 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
10810 i__, k);
10811 /* L160: */
10812 }
10813 }
10814 /* L170: */
10815 }
10816 /* L180: */
10817 }
10818 } else {
10819 i__1 = *n;
10820 for (j = 1; j <= i__1; ++j) {
10821 temp = *alpha;
10822 if (nounit) {
10823 temp *= a_ref(j, j);
10824 }
10825 i__2 = *m;
10826 for (i__ = 1; i__ <= i__2; ++i__) {
10827 b_ref(i__, j) = temp * b_ref(i__, j);
10828 /* L190: */
10829 }
10830 i__2 = *n;
10831 for (k = j + 1; k <= i__2; ++k) {
10832 if (a_ref(k, j) != 0.f) {
10833 temp = *alpha * a_ref(k, j);
10834 i__3 = *m;
10835 for (i__ = 1; i__ <= i__3; ++i__) {
10836 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
10837 i__, k);
10838 /* L200: */
10839 }
10840 }
10841 /* L210: */
10842 }
10843 /* L220: */
10844 }
10845 }
10846 } else {
10847 /* Form B := alpha*B*A'. */
10848 if (upper) {
10849 i__1 = *n;
10850 for (k = 1; k <= i__1; ++k) {
10851 i__2 = k - 1;
10852 for (j = 1; j <= i__2; ++j) {
10853 if (a_ref(j, k) != 0.f) {
10854 temp = *alpha * a_ref(j, k);
10855 i__3 = *m;
10856 for (i__ = 1; i__ <= i__3; ++i__) {
10857 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
10858 i__, k);
10859 /* L230: */
10860 }
10861 }
10862 /* L240: */
10863 }
10864 temp = *alpha;
10865 if (nounit) {
10866 temp *= a_ref(k, k);
10867 }
10868 if (temp != 1.f) {
10869 i__2 = *m;
10870 for (i__ = 1; i__ <= i__2; ++i__) {
10871 b_ref(i__, k) = temp * b_ref(i__, k);
10872 /* L250: */
10873 }
10874 }
10875 /* L260: */
10876 }
10877 } else {
10878 for (k = *n; k >= 1; --k) {
10879 i__1 = *n;
10880 for (j = k + 1; j <= i__1; ++j) {
10881 if (a_ref(j, k) != 0.f) {
10882 temp = *alpha * a_ref(j, k);
10883 i__2 = *m;
10884 for (i__ = 1; i__ <= i__2; ++i__) {
10885 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
10886 i__, k);
10887 /* L270: */
10888 }
10889 }
10890 /* L280: */
10891 }
10892 temp = *alpha;
10893 if (nounit) {
10894 temp *= a_ref(k, k);
10895 }
10896 if (temp != 1.f) {
10897 i__1 = *m;
10898 for (i__ = 1; i__ <= i__1; ++i__) {
10899 b_ref(i__, k) = temp * b_ref(i__, k);
10900 /* L290: */
10901 }
10902 }
10903 /* L300: */
10904 }
10905 }
10906 }
10907 }
10908 return 0;
10909 /* End of STRMM . */
10910 } /* strmm_ */
|
|
||||||||||||||||||||||||||||||||||||
|
Definition at line 10917 of file lapackblas.cpp. References a_ref, diag, f2cmax, integer, logical, lsame_(), real, x, and xerbla_(). Referenced by slarft_(). 10919 {
10920 /* System generated locals */
10921 integer a_dim1, a_offset, i__1, i__2;
10922 /* Local variables */
10923 static integer info;
10924 static real temp;
10925 static integer i__, j;
10926 extern logical lsame_(const char *, const char *);
10927 static integer ix, jx, kx;
10928 extern /* Subroutine */ int xerbla_(const char *, integer *);
10929 static logical nounit;
10930 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
10931 /* Purpose
10932 =======
10933 STRMV performs one of the matrix-vector operations
10934 x := A*x, or x := A'*x,
10935 where x is an n element vector and A is an n by n unit, or non-unit,
10936 upper or lower triangular matrix.
10937 Parameters
10938 ==========
10939 UPLO - CHARACTER*1.
10940 On entry, UPLO specifies whether the matrix is an upper or
10941 lower triangular matrix as follows:
10942 UPLO = 'U' or 'u' A is an upper triangular matrix.
10943 UPLO = 'L' or 'l' A is a lower triangular matrix.
10944 Unchanged on exit.
10945 TRANS - CHARACTER*1.
10946 On entry, TRANS specifies the operation to be performed as
10947 follows:
10948 TRANS = 'N' or 'n' x := A*x.
10949 TRANS = 'T' or 't' x := A'*x.
10950 TRANS = 'C' or 'c' x := A'*x.
10951 Unchanged on exit.
10952 DIAG - CHARACTER*1.
10953 On entry, DIAG specifies whether or not A is unit
10954 triangular as follows:
10955 DIAG = 'U' or 'u' A is assumed to be unit triangular.
10956 DIAG = 'N' or 'n' A is not assumed to be unit
10957 triangular.
10958 Unchanged on exit.
10959 N - INTEGER.
10960 On entry, N specifies the order of the matrix A.
10961 N must be at least zero.
10962 Unchanged on exit.
10963 A - REAL array of DIMENSION ( LDA, n ).
10964 Before entry with UPLO = 'U' or 'u', the leading n by n
10965 upper triangular part of the array A must contain the upper
10966 triangular matrix and the strictly lower triangular part of
10967 A is not referenced.
10968 Before entry with UPLO = 'L' or 'l', the leading n by n
10969 lower triangular part of the array A must contain the lower
10970 triangular matrix and the strictly upper triangular part of
10971 A is not referenced.
10972 Note that when DIAG = 'U' or 'u', the diagonal elements of
10973 A are not referenced either, but are assumed to be unity.
10974 Unchanged on exit.
10975 LDA - INTEGER.
10976 On entry, LDA specifies the first dimension of A as declared
10977 in the calling (sub) program. LDA must be at least
10978 f2cmax( 1, n ).
10979 Unchanged on exit.
10980 X - REAL array of dimension at least
10981 ( 1 + ( n - 1 )*abs( INCX ) ).
10982 Before entry, the incremented array X must contain the n
10983 element vector x. On exit, X is overwritten with the
10984 tranformed vector x.
10985 INCX - INTEGER.
10986 On entry, INCX specifies the increment for the elements of
10987 X. INCX must not be zero.
10988 Unchanged on exit.
10989 Level 2 Blas routine.
10990 -- Written on 22-October-1986.
10991 Jack Dongarra, Argonne National Lab.
10992 Jeremy Du Croz, Nag Central Office.
10993 Sven Hammarling, Nag Central Office.
10994 Richard Hanson, Sandia National Labs.
10995 Test the input parameters.
10996 Parameter adjustments */
10997 a_dim1 = *lda;
10998 a_offset = 1 + a_dim1 * 1;
10999 a -= a_offset;
11000 --x;
11001 /* Function Body */
11002 info = 0;
11003 if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
11004 info = 1;
11005 } else if (! lsame_(trans, "N") && ! lsame_(trans,
11006 "T") && ! lsame_(trans, "C")) {
11007 info = 2;
11008 } else if (! lsame_(diag, "U") && ! lsame_(diag,
11009 "N")) {
11010 info = 3;
11011 } else if (*n < 0) {
11012 info = 4;
11013 } else if (*lda < f2cmax(1,*n)) {
11014 info = 6;
11015 } else if (*incx == 0) {
11016 info = 8;
11017 }
11018 if (info != 0) {
11019 xerbla_("STRMV ", &info);
11020 return 0;
11021 }
11022 /* Quick return if possible. */
11023 if (*n == 0) {
11024 return 0;
11025 }
11026 nounit = lsame_(diag, "N");
11027 /* Set up the start point in X if the increment is not unity. This
11028 will be ( N - 1 )*INCX too small for descending loops. */
11029 if (*incx <= 0) {
11030 kx = 1 - (*n - 1) * *incx;
11031 } else if (*incx != 1) {
11032 kx = 1;
11033 }
11034 /* Start the operations. In this version the elements of A are
11035 accessed sequentially with one pass through A. */
11036 if (lsame_(trans, "N")) {
11037 /* Form x := A*x. */
11038 if (lsame_(uplo, "U")) {
11039 if (*incx == 1) {
11040 i__1 = *n;
11041 for (j = 1; j <= i__1; ++j) {
11042 if (x[j] != 0.f) {
11043 temp = x[j];
11044 i__2 = j - 1;
11045 for (i__ = 1; i__ <= i__2; ++i__) {
11046 x[i__] += temp * a_ref(i__, j);
11047 /* L10: */
11048 }
11049 if (nounit) {
11050 x[j] *= a_ref(j, j);
11051 }
11052 }
11053 /* L20: */
11054 }
11055 } else {
11056 jx = kx;
11057 i__1 = *n;
11058 for (j = 1; j <= i__1; ++j) {
11059 if (x[jx] != 0.f) {
11060 temp = x[jx];
11061 ix = kx;
11062 i__2 = j - 1;
11063 for (i__ = 1; i__ <= i__2; ++i__) {
11064 x[ix] += temp * a_ref(i__, j);
11065 ix += *incx;
11066 /* L30: */
11067 }
11068 if (nounit) {
11069 x[jx] *= a_ref(j, j);
11070 }
11071 }
11072 jx += *incx;
11073 /* L40: */
11074 }
11075 }
11076 } else {
11077 if (*incx == 1) {
11078 for (j = *n; j >= 1; --j) {
11079 if (x[j] != 0.f) {
11080 temp = x[j];
11081 i__1 = j + 1;
11082 for (i__ = *n; i__ >= i__1; --i__) {
11083 x[i__] += temp * a_ref(i__, j);
11084 /* L50: */
11085 }
11086 if (nounit) {
11087 x[j] *= a_ref(j, j);
11088 }
11089 }
11090 /* L60: */
11091 }
11092 } else {
11093 kx += (*n - 1) * *incx;
11094 jx = kx;
11095 for (j = *n; j >= 1; --j) {
11096 if (x[jx] != 0.f) {
11097 temp = x[jx];
11098 ix = kx;
11099 i__1 = j + 1;
11100 for (i__ = *n; i__ >= i__1; --i__) {
11101 x[ix] += temp * a_ref(i__, j);
11102 ix -= *incx;
11103 /* L70: */
11104 }
11105 if (nounit) {
11106 x[jx] *= a_ref(j, j);
11107 }
11108 }
11109 jx -= *incx;
11110 /* L80: */
11111 }
11112 }
11113 }
11114 } else {
11115 /* Form x := A'*x. */
11116 if (lsame_(uplo, "U")) {
11117 if (*incx == 1) {
11118 for (j = *n; j >= 1; --j) {
11119 temp = x[j];
11120 if (nounit) {
11121 temp *= a_ref(j, j);
11122 }
11123 for (i__ = j - 1; i__ >= 1; --i__) {
11124 temp += a_ref(i__, j) * x[i__];
11125 /* L90: */
11126 }
11127 x[j] = temp;
11128 /* L100: */
11129 }
11130 } else {
11131 jx = kx + (*n - 1) * *incx;
11132 for (j = *n; j >= 1; --j) {
11133 temp = x[jx];
11134 ix = jx;
11135 if (nounit) {
11136 temp *= a_ref(j, j);
11137 }
11138 for (i__ = j - 1; i__ >= 1; --i__) {
11139 ix -= *incx;
11140 temp += a_ref(i__, j) * x[ix];
11141 /* L110: */
11142 }
11143 x[jx] = temp;
11144 jx -= *incx;
11145 /* L120: */
11146 }
11147 }
11148 } else {
11149 if (*incx == 1) {
11150 i__1 = *n;
11151 for (j = 1; j <= i__1; ++j) {
11152 temp = x[j];
11153 if (nounit) {
11154 temp *= a_ref(j, j);
11155 }
11156 i__2 = *n;
11157 for (i__ = j + 1; i__ <= i__2; ++i__) {
11158 temp += a_ref(i__, j) * x[i__];
11159 /* L130: */
11160 }
11161 x[j] = temp;
11162 /* L140: */
11163 }
11164 } else {
11165 jx = kx;
11166 i__1 = *n;
11167 for (j = 1; j <= i__1; ++j) {
11168 temp = x[jx];
11169 ix = jx;
11170 if (nounit) {
11171 temp *= a_ref(j, j);
11172 }
11173 i__2 = *n;
11174 for (i__ = j + 1; i__ <= i__2; ++i__) {
11175 ix += *incx;
11176 temp += a_ref(i__, j) * x[ix];
11177 /* L150: */
11178 }
11179 x[jx] = temp;
11180 jx += *incx;
11181 /* L160: */
11182 }
11183 }
11184 }
11185 }
11186 return 0;
11187 /* End of STRMV . */
11188 } /* strmv_ */
|
|
||||||||||||
|
Definition at line 11194 of file lapackblas.cpp. Referenced by sbdsqr_(), sgebd2_(), sgebrd_(), sgelq2_(), sgelqf_(), sgemm_(), sgemv_(), sgeqr2_(), sgeqrf_(), sger_(), sgesvd_(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed7_(), slaed8_(), slaed9_(), slaeda_(), slascl_(), slasq1_(), slasq2_(), slasr_(), slasrt_(), sorg2l_(), sorg2r_(), sorgbr_(), sorgl2_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sorm2r_(), sormbr_(), sorml2_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), ssyev_(), ssymv_(), ssyr2_(), ssyr2k_(), ssytd2_(), ssytrd_(), strmm_(), and strmv_(). 11195 {
11196 /* -- LAPACK auxiliary routine (version 2.0) --
11197 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11198 Courant Institute, Argonne National Lab, and Rice University
11199 September 30, 1994
11200
11201
11202 Purpose
11203 =======
11204
11205 XERBLA is an error handler for the LAPACK routines.
11206 It is called by an LAPACK routine if an input parameter has an
11207 invalid value. A message is printed and execution stops.
11208
11209 Installers may consider modifying the STOP statement in order to
11210 call system-specific exception-handling facilities.
11211
11212 Arguments
11213 =========
11214
11215 SRNAME (input) CHARACTER*6
11216 The name of the routine which called XERBLA.
11217
11218 INFO (input) INTEGER
11219 The position of the invalid parameter in the parameter list
11220
11221 of the calling routine.
11222
11223 =====================================================================
11224 */
11225
11226 printf("** On entry to %6s, parameter number %2i had an illegal value\n",
11227 srname, *info);
11228
11229 /* End of XERBLA */
11230
11231 return 0;
11232 } /* xerbla_ */
|
|
|
Definition at line 16370 of file lapackblas.cpp. Referenced by ilaenv_(), sgesvd_(), slaed0_(), slasq1_(), sstedc_(), ssteqr_(), ssterf_(), and ssyev_(). |
|
|
Definition at line 13567 of file lapackblas.cpp. Referenced by ilaenv_(), sbdsqr_(), sgebd2_(), sgebrd_(), sgelqf_(), sgeqr2_(), sgeqrf_(), sgesvd_(), slabrd_(), slaed0_(), slaed1_(), slaed2_(), slaed3_(), slaed7_(), slaed8_(), slaed9_(), slaeda_(), slange_(), slanst_(), slansy_(), slarf_(), slarfb_(), slarft_(), slasq1_(), slasq2_(), slatrd_(), sorg2l_(), sorg2r_(), sorgbr_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sorm2r_(), sormbr_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), ssterf_(), sstevd_(), ssyev_(), ssytd2_(), and ssytrd_(). |
|
|
Definition at line 25498 of file lapackblas.cpp. Referenced by slasq2_(). |
|
|
Definition at line 25501 of file lapackblas.cpp. Referenced by slasq2_(). |
|
|
Definition at line 16371 of file lapackblas.cpp. Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), sgesvd_(), slaed0_(), slaed7_(), slaeda_(), slasq1_(), slasq2_(), sorglq_(), sorgql_(), sorgqr_(), sormbr_(), sormlq_(), sormqr_(), sstedc_(), ssteqr_(), and ssytrd_(). |
|
|
Definition at line 25499 of file lapackblas.cpp. Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), slasq2_(), sorglq_(), sorgql_(), sorgqr_(), and ssytrd_(). |
|
|
Definition at line 25500 of file lapackblas.cpp. Referenced by slasq2_(). |
|
|
Definition at line 16369 of file lapackblas.cpp. Referenced by sgesvd_(). |
|
|
Definition at line 22949 of file lapackblas.cpp. Referenced by sbdsqr_(). |
|
|
Definition at line 13566 of file lapackblas.cpp. |
|
|
Definition at line 16374 of file lapackblas.cpp. Referenced by sgesvd_(). |
|
|
Definition at line 16375 of file lapackblas.cpp. Referenced by sgesvd_(). |
|
|
Definition at line 22951 of file lapackblas.cpp. Referenced by sbdsqr_(). |
|
|
Definition at line 22952 of file lapackblas.cpp. Referenced by sbdsqr_(). |
|
|
Definition at line 16373 of file lapackblas.cpp. Referenced by sgebrd_(), sgelqf_(), sgeqrf_(), sgesvd_(), slaed1_(), slaed7_(), sorgbr_(), sorglq_(), sorgql_(), sorgqr_(), sorgtr_(), sormbr_(), sormlq_(), sormqr_(), ssyev_(), and ssytrd_(). |
1.3.9.1