Actual source code: invit.c
slepc-3.12.2 2020-01-13
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: struct HRtr
15: {
16: PetscScalar *data;
17: PetscInt m;
18: PetscInt idx[2];
19: PetscInt n[2];
20: PetscScalar tau[2];
21: PetscReal alpha;
22: PetscReal cs;
23: PetscReal sn;
24: PetscInt type;
25: };
27: /*
28: Generates a hyperbolic rotation
29: if x1*x1 - x2*x2 != 0
30: r = sqrt(|x1*x1 - x2*x2|)
31: c = x1/r s = x2/r
33: | c -s||x1| |d*r|
34: |-s c||x2| = | 0 |
35: where d = 1 for type==1 and -1 for type==2
36: Returns the condition number of the reduction
37: */
38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
39: {
40: PetscReal t,n2,xa,xb;
41: PetscInt type_;
44: if (x2==0.0) {
45: *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
46: if (type) *type = 1;
47: return(0);
48: }
49: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
50: /* hyperbolic rotation doesn't exist */
51: *c = *s = *r = 0.0;
52: if (type) *type = 0;
53: *cond = PETSC_MAX_REAL;
54: return(0);
55: }
57: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
58: xa = x1; xb = x2; type_ = 1;
59: } else {
60: xa = x2; xb = x1; type_ = 2;
61: }
62: t = xb/xa;
63: n2 = PetscAbsReal(1 - t*t);
64: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
65: *c = x1/(*r);
66: *s = x2/(*r);
67: if (type_ == 2) *r *= -1;
68: if (type) *type = type_;
69: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
70: return(0);
71: }
73: /*
74: |c s|
75: Applies an hyperbolic rotator |s c|
76: |c s|
77: [x1 x2]|s c|
78: */
79: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
80: {
81: PetscInt i;
82: PetscReal t;
83: PetscScalar tmp;
86: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
87: t = s/c;
88: for (i=0;i<n;i++) {
89: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
90: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
91: }
92: } else { /* Type II */
93: t = c/s;
94: for (i=0;i<n;i++) {
95: tmp = x1[i*inc1];
96: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
97: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
98: }
99: }
100: return(0);
101: }
103: /*
104: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
106: Input:
107: A symmetric (only lower triangular part is referred)
108: s vector +1 and -1 (signature matrix)
109: Output:
110: d,e
111: s
112: Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
113: */
114: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
115: {
116: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
118: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
119: #else
121: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
122: PetscInt nwu=0;
123: PetscReal *ss,cond=1.0,cs,sn,r;
124: PetscScalar tau,t,*AA;
125: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
126: PetscBool breakdown = PETSC_TRUE;
129: if (n<3) {
130: if (n==1) Q[0]=1;
131: if (n==2) {
132: Q[0] = Q[1+ldq] = 1;
133: Q[1] = Q[ldq] = 0;
134: }
135: return(0);
136: }
137: PetscBLASIntCast(lda,&lda_);
138: PetscBLASIntCast(n,&n_);
139: PetscBLASIntCast(ldq,&ldq_);
140: ss = rwork;
141: perm = iwork;
142: AA = work;
143: for (i=0;i<n;i++) {
144: PetscArraycpy(AA+i*n,A+i*lda,n);
145: }
146: nwu += n*n;
147: k=0;
148: while (breakdown && k<n) {
149: breakdown = PETSC_FALSE;
150: /* Classify (and flip) A and s according to sign */
151: if (flip) {
152: for (i=0;i<n;i++) {
153: perm[i] = n-1-perm_[i];
154: if (perm[i]==0) i0 = i;
155: if (perm[i]==k) ik = i;
156: }
157: } else {
158: for (i=0;i<n;i++) {
159: perm[i] = perm_[i];
160: if (perm[i]==0) i0 = i;
161: if (perm[i]==k) ik = i;
162: }
163: }
164: perm[ik] = 0;
165: perm[i0] = k;
166: i=1;
167: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
168: if (s[perm[i]]!=s[perm[0]]) {
169: j=i+1;
170: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
171: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
172: }
173: i++;
174: }
175: for (i=0;i<n;i++) {
176: ss[i] = s[perm[i]];
177: }
178: if (flip) {
179: ii = &j;
180: jj = &i;
181: } else {
182: ii = &i;
183: jj = &j;
184: }
185: for (i=0;i<n;i++)
186: for (j=0;j<n;j++)
187: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
188: /* Initialize Q */
189: for (i=0;i<n;i++) {
190: PetscArrayzero(Q+i*ldq,n);
191: Q[perm[i]+i*ldq] = 1.0;
192: }
193: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
194: n0 = ni-1;
195: n1 = n_-ni;
196: for (j=0;j<n-2;j++) {
197: PetscBLASIntCast(n-j-1,&m);
198: /* Forming and applying reflectors */
199: if (n0 > 1) {
200: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
201: /* Apply reflector */
202: if (PetscAbsScalar(tau) != 0.0) {
203: t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
204: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
205: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
206: /* Update Q */
207: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
208: *(A+ni-n0+j*lda) = t;
209: for (i=1;i<n0;i++) {
210: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
211: }
212: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
213: }
214: }
215: if (n1 > 1) {
216: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
217: /* Apply reflector */
218: if (PetscAbsScalar(tau) != 0.0) {
219: t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
220: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
221: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
222: /* Update Q */
223: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
224: *(A+n-n1+j*lda) = t;
225: for (i=1;i<n1;i++) {
226: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
227: }
228: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
229: }
230: }
231: /* Hyperbolic rotation */
232: if (n0 > 0 && n1 > 0) {
233: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
234: /* Check condition number */
235: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
236: breakdown = PETSC_TRUE;
237: k++;
238: if (k==n || flip) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
239: break;
240: }
241: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
242: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
243: /* Apply to A */
244: HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
245: HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);
247: /* Update Q */
248: HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
249: if (type==2) {
250: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
251: n0++;ni++;n1--;
252: }
253: }
254: if (n0>0) n0--;
255: else n1--;
256: }
257: }
259: /* flip matrices */
260: if (flip) {
261: for (i=0;i<n-1;i++) {
262: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
263: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
264: s[i] = ss[n-i-1];
265: }
266: s[n-1] = ss[0];
267: d[n-1] = PetscRealPart(A[0]);
268: for (i=0;i<n;i++) {
269: ierr=PetscArraycpy(work+i*n,Q+i*ldq,n);
270: }
271: for (i=0;i<n;i++)
272: for (j=0;j<n;j++)
273: Q[i+j*ldq] = work[i+(n-j-1)*n];
274: } else {
275: for (i=0;i<n-1;i++) {
276: d[i] = PetscRealPart(A[i+i*lda]);
277: e[i] = PetscRealPart(A[i+1+i*lda]);
278: s[i] = ss[i];
279: }
280: s[n-1] = ss[n-1];
281: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
282: }
283: return(0);
284: #endif
285: }
287: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
288: {
289: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
291: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
292: #else
294: PetscScalar *x,*y;
295: PetscReal ncond2=1.0;
296: PetscBLASInt n0_,n1_,inc=1;
299: /* Hyperbolic transformation to make zeros in x */
300: x = tr1->data;
301: tr1->n[0] = n0;
302: tr1->n[1] = n1;
303: tr1->idx[0] = idx0;
304: tr1->idx[1] = idx1;
305: PetscBLASIntCast(tr1->n[0],&n0_);
306: PetscBLASIntCast(tr1->n[1],&n1_);
307: if (tr1->n[0] > 1) {
308: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
309: }
310: if (tr1->n[1]> 1) {
311: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
312: }
313: if (tr1->idx[0]<tr1->idx[1]) {
314: HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
315: } else {
316: tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
317: *ncond = 1.0;
318: }
319: if (sz==2) {
320: y = tr2->data;
321: /* Apply first transformation to second column */
322: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
323: x[tr1->idx[0]] = 1.0;
324: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
325: }
326: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
327: x[tr1->idx[1]] = 1.0;
328: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
329: }
330: if (tr1->idx[0]<tr1->idx[1]) {
331: HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
332: }
333: tr2->n[0] = tr1->n[0];
334: tr2->n[1] = tr1->n[1];
335: tr2->idx[0] = tr1->idx[0];
336: tr2->idx[1] = tr1->idx[1];
337: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
338: tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
339: }
340: if (tr2->n[0]>0) {
341: tr2->n[0]--; tr2->idx[0]++;
342: if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
343: } else {
344: tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
345: }
346: /* Hyperbolic transformation to make zeros in y */
347: PetscBLASIntCast(tr2->n[0],&n0_);
348: PetscBLASIntCast(tr2->n[1],&n1_);
349: if (tr2->n[0] > 1) {
350: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
351: }
352: if (tr2->n[1]> 1) {
353: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
354: }
355: if (tr2->idx[0]<tr2->idx[1]) {
356: HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
357: } else {
358: tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
359: ncond2 = 1.0;
360: }
361: if (ncond2>*ncond) *ncond = ncond2;
362: }
363: return(0);
364: #endif
365: }
367: /*
368: Auxiliary function to try perform one iteration of hr routine,
369: checking condition number. If it is < tolD, apply the
370: transformation to H and R, if not, ok=false and it do nothing
371: tolE, tolerance to exchange complex pairs to improve conditioning
372: */
373: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
374: {
375: #if defined(SLEPC_MISSING_LAPACK_LARF)
377: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARF - Lapack routine is unavailable");
378: #else
380: struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
381: PetscScalar *x,*y;
382: PetscReal ncond,ncond_e;
383: PetscInt nwu=0,i,d=1;
384: PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
385: PetscReal tolD = 1e+5;
388: if (cond) *cond = 1.0;
389: PetscBLASIntCast(n,&n_);
390: PetscBLASIntCast(ldr,&ldr_);
391: PetscBLASIntCast(ldh,&ldh_);
392: x = work+nwu;
393: nwu += n;
394: PetscArraycpy(x,R+j*ldr,n);
395: *exg = PETSC_FALSE;
396: *ok = PETSC_TRUE;
397: tr1_t.data = x;
398: if (sz==1) {
399: /* Hyperbolic transformation to make zeros in x */
400: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu);
401: /* Check condition number to single column*/
402: if (ncond>tolD) *ok = PETSC_FALSE;
403: tr1 = &tr1_t;
404: tr2 = &tr2_t;
405: } else {
406: y = work+nwu;
407: nwu += n;
408: PetscArraycpy(y,R+(j+1)*ldr,n);
409: tr2_t.data = y;
410: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu);
411: /* Computing hyperbolic transformations also for exchanged vectors */
412: tr1_te.data = work+nwu;
413: nwu += n;
414: PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n);
415: tr2_te.data = work+nwu;
416: nwu += n;
417: PetscArraycpy(tr2_te.data,R+j*ldr,n);
418: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu);
419: if (ncond > d*ncond_e) {
420: *exg = PETSC_TRUE;
421: tr1 = &tr1_te;
422: tr2 = &tr2_te;
423: ncond = ncond_e;
424: } else {
425: tr1 = &tr1_t;
426: tr2 = &tr2_t;
427: }
428: if (ncond>tolD) *ok = PETSC_FALSE;
429: }
430: if (*ok) {
431: /* Everything is OK, apply transformations to R and H */
432: /* First column */
433: if (cond && *cond<ncond) *cond = ncond;
434: x = tr1->data;
435: PetscBLASIntCast(tr1->n[0],&n0_);
436: PetscBLASIntCast(tr1->n[1],&n1_);
437: PetscBLASIntCast(n-j-sz,&mr);
438: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
439: x[tr1->idx[0]] = 1.0;
440: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
441: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
442: }
443: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
444: x[tr1->idx[1]] = 1.0;
445: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
446: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
447: }
448: if (tr1->idx[0]<tr1->idx[1]) {
449: HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
450: if (tr1->type==1) {
451: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
452: } else {
453: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
454: s[tr1->idx[0]] = -s[tr1->idx[0]];
455: s[tr1->idx[1]] = -s[tr1->idx[1]];
456: }
457: }
458: for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
459: for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
460: *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
461: if (sz==2) {
462: y = tr2->data;
463: /* Second column */
464: PetscBLASIntCast(tr2->n[0],&n0_);
465: PetscBLASIntCast(tr2->n[1],&n1_);
466: PetscBLASIntCast(n-j-sz,&mr);
467: PetscBLASIntCast(n-tr2->idx[0],&mh);
468: if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
469: y[tr2->idx[0]] = 1.0;
470: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
471: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
472: }
473: if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
474: y[tr2->idx[1]] = 1.0;
475: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
476: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
477: }
478: if (tr2->idx[0]<tr2->idx[1]) {
479: HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
480: if (tr2->type==1) {
481: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
482: } else {
483: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
484: s[tr2->idx[0]] = -s[tr2->idx[0]];
485: s[tr2->idx[1]] = -s[tr2->idx[1]];
486: }
487: }
488: for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
489: *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
490: for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
491: *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
492: *n0 = tr2->n[0];
493: *n1 = tr2->n[1];
494: *idx0 = tr2->idx[0];
495: *idx1 = tr2->idx[1];
496: if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
497: (*idx1)++; (*n1)--; (*n0)++;
498: }
499: } else {
500: *n0 = tr1->n[0];
501: *n1 = tr1->n[1];
502: *idx0 = tr1->idx[0];
503: *idx1 = tr1->idx[1];
504: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
505: (*idx1)++; (*n1)--; (*n0)++;
506: }
507: }
508: if (*n0>0) {
509: (*n0)--; (*idx0)++;
510: if (*n1==0) *idx1 = *idx0;
511: } else {
512: (*n1)--; (*idx1)++; *idx0 = *idx1;
513: }
514: }
515: return(0);
516: #endif
517: }
519: /*
520: compute V = HR whit H s-orthogonal and R upper triangular
521: */
522: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
523: {
525: PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
526: PetscScalar *col1,*col2;
527: PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
530: n = *nv;
531: col1 = work+nwu;
532: nwu += n;
533: col2 = work+nwu;
534: nwu += n;
535: /* Sort R and s according to sing(s) */
536: np = 0;
537: for (i=0;i<n;i++) if (s[i]>0) np++;
538: if (s[0]>0) n1 = np;
539: else n1 = n-np;
540: n0 = 0;
541: for (i=0;i<n;i++) {
542: if (s[i]==s[0]) {
543: s[n0] = s[0];
544: perm[n0++] = i;
545: } else perm[n1++] = i;
546: }
547: for (i=n0;i<n;i++) s[i] = -s[0];
548: n1 -= n0;
549: idx0 = 0;
550: idx1 = n0;
551: if (idx1==n) idx1=idx0;
552: for (i=0;i<n;i++) {
553: for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
554: }
555: /* Initialize H */
556: for (i=0;i<n;i++) {
557: PetscArrayzero(V+i*ldv,n);
558: V[perm[i]+i*ldv] = 1.0;
559: }
560: for (i=0;i<n;i++) perm[i] = i;
561: j = 0;
562: while (j<n-k) {
563: if (cmplxEig[j]==0) sz=1;
564: else sz=2;
565: TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu);
566: if (ok) {
567: if (exg) cmplxEig[j] = -cmplxEig[j];
568: j = j+sz;
569: } else { /* to be discarded */
570: k = k+1;
571: if (cmplxEig[j]==0) {
572: if (j<n) {
573: t1 = perm[j];
574: for (i=j;i<n-1;i++) perm[i] = perm[i+1];
575: perm[n-1] = t1;
576: t1 = cmplxEig[j];
577: for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
578: cmplxEig[n-1] = t1;
579: PetscArraycpy(col1,R+j*ldr,n*sizeof(PetscScalar));
580: for (i=j;i<n-1;i++) {
581: PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n*sizeof(PetscScalar));
582: }
583: PetscArraycpy(R+(n-1)*ldr,col1,n*sizeof(PetscScalar));
584: }
585: } else {
586: k = k+1;
587: if (j<n-1) {
588: t1 = perm[j]; t2 = perm[j+1];
589: for (i=j;i<n-2;i++) perm[i] = perm[i+2];
590: perm[n-2] = t1; perm[n-1] = t2;
591: t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
592: for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
593: cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
594: PetscArraycpy(col1,R+j*ldr,n);
595: PetscArraycpy(col2,R+(j+1)*ldr,n);
596: for (i=j;i<n-2;i++) {
597: PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n);
598: }
599: PetscArraycpy(R+(n-2)*ldr,col1,n);
600: PetscArraycpy(R+(n-1)*ldr,col2,n);
601: }
602: }
603: }
604: }
605: if (k!=0) {
606: if (breakdown) *breakdown = PETSC_TRUE;
607: *nv = n-k;
608: }
609: return(0);
610: }
612: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
613: {
615: PetscInt lws,nwus=0,nwui=0,lwi;
616: PetscInt off,n,nv,ld,i,ldr,l;
617: PetscScalar *W,*X,*R,*ts,zeroS=0.0,oneS=1.0;
618: PetscReal *s,vi,vr,tr,*d,*e;
619: PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
622: l = ds->l;
623: n = ds->n-l;
624: PetscBLASIntCast(n,&n_);
625: ld = ds->ld;
626: PetscBLASIntCast(ld,&ld_);
627: off = l*ld+l;
628: s = ds->rmat[DS_MAT_D];
629: if (!ds->compact) {
630: for (i=l;i<ds->n;i++) s[i] = PetscRealPart(*(ds->mat[DS_MAT_B]+i*ld+i));
631: }
632: lws = n*n+7*n;
633: lwi = 2*n;
634: DSAllocateWork_Private(ds,lws,0,lwi);
635: R = ds->work+nwus;
636: nwus += n*n;
637: ldr = n;
638: perm = ds->iwork + nwui;
639: nwui += n;
640: cmplxEig = ds->iwork+nwui;
641: X = ds->mat[mat];
642: for (i=0;i<n;i++) {
643: #if defined(PETSC_USE_COMPLEX)
644: vi = PetscImaginaryPart(wr[l+i]);
645: #else
646: vi = PetscRealPart(wi[l+i]);
647: #endif
648: if (vi!=0) {
649: cmplxEig[i] = 1;
650: cmplxEig[i+1] = 2;
651: i++;
652: } else cmplxEig[i] = 0;
653: }
654: nv = n;
656: /* Perform HR decomposition */
657: /* Hyperbolic rotators */
658: PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus);
659: /* Sort wr,wi perm */
660: ts = ds->work+nwus;
661: PetscArraycpy(ts,wr+l,n);
662: for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
663: #if !defined(PETSC_USE_COMPLEX)
664: PetscArraycpy(ts,wi+l,n);
665: for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
666: #endif
667: /* Projected Matrix */
668: PetscArrayzero(ds->rmat[DS_MAT_T]+2*ld,ld);
669: d = ds->rmat[DS_MAT_T];
670: e = d+ld;
671: for (i=0;i<nv;i++) {
672: if (cmplxEig[i]==0) { /* Real */
673: d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
674: e[l+i] = 0.0;
675: } else {
676: vr = PetscRealPart(wr[l+i]);
677: #if defined(PETSC_USE_COMPLEX)
678: vi = PetscImaginaryPart(wr[l+i]);
679: #else
680: vi = PetscRealPart(wi[l+i]);
681: #endif
682: if (cmplxEig[i]==-1) vi = -vi;
683: tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
684: d[l+i] = (vr-tr)*s[l+i];
685: d[l+i+1] = (vr+tr)*s[l+i+1];
686: e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
687: e[l+i+1] = 0.0;
688: i++;
689: }
690: }
691: /* accumulate previous Q */
692: if (accum) {
693: PetscBLASIntCast(nv,&nv_);
694: DSAllocateMat_Private(ds,DS_MAT_W);
695: W = ds->mat[DS_MAT_W];
696: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
697: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&oneS,W+off,&ld_,X+off,&ld_,&zeroS,ds->mat[DS_MAT_Q]+off,&ld_));
698: } else {
699: PetscArrayzero(ds->mat[DS_MAT_Q],ld*ld);
700: for (i=0;i<ds->l;i++) *(ds->mat[DS_MAT_Q]+i+i*ld) = 1.0;
701: for (i=0;i<n;i++) { PetscArraycpy(ds->mat[DS_MAT_Q]+off+i*ld,X+off+i*ld,n); }
702: }
703: ds->t = nv+l;
704: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }
705: return(0);
706: }
708: /*
709: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
710: */
711: PetscErrorCode DSIntermediate_GHIEP(DS ds)
712: {
714: PetscInt i,ld,off;
715: PetscInt nwall,nwallr,nwalli;
716: PetscScalar *A,*B,*Q;
717: PetscReal *d,*e,*s;
720: ld = ds->ld;
721: A = ds->mat[DS_MAT_A];
722: B = ds->mat[DS_MAT_B];
723: Q = ds->mat[DS_MAT_Q];
724: d = ds->rmat[DS_MAT_T];
725: e = ds->rmat[DS_MAT_T]+ld;
726: s = ds->rmat[DS_MAT_D];
727: off = ds->l+ds->l*ld;
728: PetscArrayzero(Q,ld*ld);
729: nwall = ld*ld+ld;
730: nwallr = ld;
731: nwalli = ld;
732: DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
733: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
734: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
735: if (ds->compact) {
736: if (ds->state < DS_STATE_INTERMEDIATE) {
737: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
738: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
739: ds->k = ds->l;
740: PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l);
741: }
742: } else {
743: if (ds->state < DS_STATE_INTERMEDIATE) {
744: for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
745: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
746: PetscArrayzero(d+2*ld,ds->n);
747: ds->k = ds->l;
748: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
749: } else {
750: DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
751: }
752: }
753: return(0);
754: }