Actual source code: bvcontour.c
slepc-3.19.0 2023-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BV developer functions needed in contour integral methods
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: #define p_id(i) (i*subcomm->n + subcomm->color)
19: /*@
20: BVScatter - Scatters the columns of a BV to another BV created in a
21: subcommunicator.
23: Collective
25: Input Parameters:
26: + Vin - input basis vectors (defined on the whole communicator)
27: . scat - VecScatter object that contains the info for the communication
28: - xdup - an auxiliary vector
30: Output Parameter:
31: . Vout - output basis vectors (defined on the subcommunicator)
33: Notes:
34: Currently implemented as a loop for each the active column, where each
35: column is scattered independently. The vector xdup is defined on the
36: contiguous parent communicator and have enough space to store one
37: duplicate of the original vector per each subcommunicator.
39: Level: developer
41: .seealso: BVGetColumn()
42: @*/
43: PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
44: {
45: PetscInt i;
46: Vec v;
47: const PetscScalar *array;
49: PetscFunctionBegin;
54: for (i=Vin->l;i<Vin->k;i++) {
55: PetscCall(BVGetColumn(Vin,i,&v));
56: PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
57: PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
58: PetscCall(BVRestoreColumn(Vin,i,&v));
59: PetscCall(VecGetArrayRead(xdup,&array));
60: PetscCall(VecPlaceArray(Vout->t,array));
61: PetscCall(BVInsertVec(Vout,i,Vout->t));
62: PetscCall(VecResetArray(Vout->t));
63: PetscCall(VecRestoreArrayRead(xdup,&array));
64: }
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
68: /*@
69: BVSumQuadrature - Computes the sum of terms required in the quadrature
70: rule to approximate the contour integral.
72: Collective
74: Input Parameters:
75: + Y - input basis vectors
76: . M - number of moments
77: . L - block size
78: . L_max - maximum block size
79: . w - quadrature weights
80: . zn - normalized quadrature points
81: . scat - (optional) VecScatter object to communicate between subcommunicators
82: . subcomm - subcommunicator layout
83: . npoints - number of points to process by the subcommunicator
84: - useconj - whether conjugate points can be used or not
86: Output Parameter:
87: . S - output basis vectors
89: Notes:
90: This is a generalization of BVMult(). The resulting matrix S consists of M
91: panels of L columns, and the following formula is computed for each panel
92: S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
93: the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
94: the width of the panels in Y.
96: When using subcommunicators, Y is stored in the subcommunicators for a subset
97: of integration points. In that case, the computation is done in the subcomm
98: and then scattered to the whole communicator in S using the VecScatter scat.
99: The value npoints is the number of points to be processed in this subcomm
100: and the flag useconj indicates whether symmetric points can be reused.
102: Level: developer
104: .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
105: @*/
106: PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
107: {
108: PetscInt i,j,k,nloc;
109: Vec v,sj;
110: PetscScalar *ppk,*pv,one=1.0;
112: PetscFunctionBegin;
117: PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
118: PetscCall(PetscMalloc1(npoints,&ppk));
119: for (i=0;i<npoints;i++) ppk[i] = 1.0;
120: PetscCall(BVCreateVec(Y,&v));
121: for (k=0;k<M;k++) {
122: for (j=0;j<L;j++) {
123: PetscCall(VecSet(v,0.0));
124: for (i=0;i<npoints;i++) {
125: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
126: PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
127: }
128: if (PetscUnlikely(useconj)) {
129: PetscCall(VecGetArray(v,&pv));
130: for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
131: PetscCall(VecRestoreArray(v,&pv));
132: }
133: PetscCall(BVGetColumn(S,k*L+j,&sj));
134: if (PetscUnlikely(scat)) {
135: PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
136: PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
137: } else PetscCall(VecCopy(v,sj));
138: PetscCall(BVRestoreColumn(S,k*L+j,&sj));
139: }
140: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
141: }
142: PetscCall(PetscFree(ppk));
143: PetscCall(VecDestroy(&v));
144: PetscFunctionReturn(PETSC_SUCCESS);
145: }
147: /*@
148: BVDotQuadrature - Computes the projection terms required in the quadrature
149: rule to approximate the contour integral.
151: Collective
153: Input Parameters:
154: + Y - first basis vectors
155: . V - second basis vectors
156: . M - number of moments
157: . L - block size
158: . L_max - maximum block size
159: . w - quadrature weights
160: . zn - normalized quadrature points
161: . subcomm - subcommunicator layout
162: . npoints - number of points to process by the subcommunicator
163: - useconj - whether conjugate points can be used or not
165: Output Parameter:
166: . Mu - computed result
168: Notes:
169: This is a generalization of BVDot(). The resulting matrix Mu consists of M
170: blocks of size LxL (placed horizontally), each of them computed as
171: Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
172: the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
173: the width of the panels in Y.
175: When using subcommunicators, Y is stored in the subcommunicators for a subset
176: of integration points. In that case, the computation is done in the subcomm
177: and then the final result is combined via reduction.
178: The value npoints is the number of points to be processed in this subcomm
179: and the flag useconj indicates whether symmetric points can be reused.
181: Level: developer
183: .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
184: @*/
185: PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
186: {
187: PetscMPIInt sub_size,count;
188: PetscInt i,j,k,s;
189: PetscScalar *temp,*temp2,*ppk,alp;
190: Mat H;
191: const PetscScalar *pH;
192: MPI_Comm child,parent;
194: PetscFunctionBegin;
198: PetscCall(PetscSubcommGetChild(subcomm,&child));
199: PetscCallMPI(MPI_Comm_size(child,&sub_size));
200: PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
201: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
202: PetscCall(PetscArrayzero(temp2,2*M*L*L));
203: PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
204: PetscCall(BVSetActiveColumns(V,0,L));
205: PetscCall(BVDot(Y,V,H));
206: PetscCall(MatDenseGetArrayRead(H,&pH));
207: for (i=0;i<npoints;i++) {
208: for (j=0;j<L;j++) {
209: for (k=0;k<L;k++) {
210: temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
211: }
212: }
213: }
214: PetscCall(MatDenseRestoreArrayRead(H,&pH));
215: for (i=0;i<npoints;i++) ppk[i] = 1;
216: for (k=0;k<2*M;k++) {
217: for (j=0;j<L;j++) {
218: for (i=0;i<npoints;i++) {
219: alp = ppk[i]*w[p_id(i)];
220: for (s=0;s<L;s++) {
221: if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
222: else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
223: }
224: }
225: }
226: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
227: }
228: for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
229: PetscCall(PetscMPIIntCast(2*M*L*L,&count));
230: PetscCall(PetscSubcommGetParent(subcomm,&parent));
231: PetscCall(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
232: PetscCall(PetscFree3(temp,temp2,ppk));
233: PetscCall(MatDestroy(&H));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: /*@
238: BVTraceQuadrature - Computes an estimate of the number of eigenvalues
239: inside a region via quantities computed in the quadrature rule of
240: contour integral methods.
242: Collective
244: Input Parameters:
245: + Y - first basis vectors
246: . V - second basis vectors
247: . L - block size
248: . L_max - maximum block size
249: . w - quadrature weights
250: . scat - (optional) VecScatter object to communicate between subcommunicators
251: . subcomm - subcommunicator layout
252: . npoints - number of points to process by the subcommunicator
253: - useconj - whether conjugate points can be used or not
255: Output Parameter:
256: . est_eig - estimated eigenvalue count
258: Notes:
259: This function returns an estimation of the number of eigenvalues in the
260: region, computed as trace(V'*S_0), where S_0 is the first panel of S
261: computed by BVSumQuadrature().
263: When using subcommunicators, Y is stored in the subcommunicators for a subset
264: of integration points. In that case, the computation is done in the subcomm
265: and then scattered to the whole communicator in S using the VecScatter scat.
266: The value npoints is the number of points to be processed in this subcomm
267: and the flag useconj indicates whether symmetric points can be reused.
269: Level: developer
271: .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
272: @*/
273: PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
274: {
275: PetscInt i,j;
276: Vec y,yall,vj;
277: PetscScalar dot,sum=0.0,one=1.0;
279: PetscFunctionBegin;
284: PetscCall(BVCreateVec(Y,&y));
285: PetscCall(BVCreateVec(V,&yall));
286: for (j=0;j<L;j++) {
287: PetscCall(VecSet(y,0.0));
288: for (i=0;i<npoints;i++) {
289: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
290: PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
291: }
292: PetscCall(BVGetColumn(V,j,&vj));
293: if (scat) {
294: PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
295: PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
296: PetscCall(VecDot(vj,yall,&dot));
297: } else PetscCall(VecDot(vj,y,&dot));
298: PetscCall(BVRestoreColumn(V,j,&vj));
299: if (useconj) sum += 2.0*PetscRealPart(dot);
300: else sum += dot;
301: }
302: *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
303: PetscCall(VecDestroy(&y));
304: PetscCall(VecDestroy(&yall));
305: PetscFunctionReturn(PETSC_SUCCESS);
306: }
308: PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
309: {
310: PetscInt i,j,k,ml=S->k;
311: PetscMPIInt len;
312: PetscScalar *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
313: PetscBLASInt l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc;
314: #if defined(PETSC_USE_COMPLEX)
315: PetscReal *rwork;
316: #endif
318: PetscFunctionBegin;
319: PetscCall(BVGetArray(S,&sarray));
320: PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
321: #if defined(PETSC_USE_COMPLEX)
322: PetscCall(PetscMalloc1(5*ml,&rwork));
323: #endif
324: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
326: PetscCall(PetscArrayzero(B,ml*ml));
327: for (i=0;i<ml;i++) B[i*ml+i]=1;
329: for (k=0;k<2;k++) {
330: PetscCall(PetscBLASIntCast(S->n,&m));
331: PetscCall(PetscBLASIntCast(ml,&l));
332: n = l; lda = m; ldb = m; ldc = l;
333: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lda,sarray,&ldb,&beta,pA,&ldc));
334: else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
335: PetscCall(PetscArrayzero(temp2,ml*ml));
336: PetscCall(PetscMPIIntCast(ml*ml,&len));
337: PetscCall(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
339: PetscCall(PetscBLASIntCast(ml,&m));
340: n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
341: #if defined(PETSC_USE_COMPLEX)
342: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
343: #else
344: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
345: #endif
346: SlepcCheckLapackInfo("gesvd",info);
348: PetscCall(PetscBLASIntCast(S->n,&l));
349: PetscCall(PetscBLASIntCast(ml,&n));
350: m = n; lda = l; ldb = m; ldc = l;
351: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lda,temp2,&ldb,&beta,Q1,&ldc));
352: else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
354: PetscCall(PetscBLASIntCast(ml,&l));
355: m = l; n = l; lda = l; ldb = m; ldc = l;
356: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
357: for (i=0;i<ml;i++) {
358: sigma[i] = PetscSqrtReal(sigma[i]);
359: for (j=0;j<S->n;j++) {
360: if (k%2) Q2[j+i*S->n] /= sigma[i];
361: else Q1[j+i*S->n] /= sigma[i];
362: }
363: for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
364: }
365: }
367: PetscCall(PetscBLASIntCast(ml,&m));
368: n = m; lda = m; ldu=1; ldvt=1;
369: #if defined(PETSC_USE_COMPLEX)
370: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
371: #else
372: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
373: #endif
374: SlepcCheckLapackInfo("gesvd",info);
376: PetscCall(PetscBLASIntCast(S->n,&l));
377: PetscCall(PetscBLASIntCast(ml,&n));
378: m = n; lda = l; ldb = m; ldc = l;
379: if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&ldc));
380: else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&ldc));
382: PetscCall(PetscFPTrapPop());
383: PetscCall(BVRestoreArray(S,&sarray));
385: if (rank) {
386: (*rank) = 0;
387: for (i=0;i<ml;i++) {
388: if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
389: }
390: }
391: PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
392: #if defined(PETSC_USE_COMPLEX)
393: PetscCall(PetscFree(rwork));
394: #endif
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
399: {
400: PetscInt i,n,ml=S->k;
401: PetscBLASInt m,lda,lwork,info;
402: PetscScalar *work;
403: PetscReal *rwork;
404: Mat A;
405: Vec v;
407: PetscFunctionBegin;
408: /* Compute QR factorizaton of S */
409: PetscCall(BVGetSizes(S,NULL,&n,NULL));
410: n = PetscMin(n,ml);
411: PetscCall(BVSetActiveColumns(S,0,n));
412: PetscCall(PetscArrayzero(pA,ml*n));
413: PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
414: PetscCall(BVOrthogonalize(S,A));
415: if (n<ml) {
416: /* the rest of the factorization */
417: for (i=n;i<ml;i++) {
418: PetscCall(BVGetColumn(S,i,&v));
419: PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
420: PetscCall(BVRestoreColumn(S,i,&v));
421: }
422: }
423: PetscCall(PetscBLASIntCast(n,&lda));
424: PetscCall(PetscBLASIntCast(ml,&m));
425: PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
426: lwork = 5*m;
427: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
428: #if !defined (PETSC_USE_COMPLEX)
429: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
430: #else
431: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
432: #endif
433: SlepcCheckLapackInfo("gesvd",info);
434: PetscCall(PetscFPTrapPop());
435: *rank = 0;
436: for (i=0;i<n;i++) {
437: if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
438: }
439: /* n first columns of A have the left singular vectors */
440: PetscCall(BVMultInPlace(S,A,0,*rank));
441: PetscCall(PetscFree2(work,rwork));
442: PetscCall(MatDestroy(&A));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
447: {
448: PetscInt i,j,n,ml=S->k;
449: PetscBLASInt m,k_,lda,lwork,info;
450: PetscScalar *work,*T,*U,*R,sone=1.0,zero=0.0;
451: PetscReal *rwork;
452: Mat A;
454: PetscFunctionBegin;
455: /* Compute QR factorizaton of S */
456: PetscCall(BVGetSizes(S,NULL,&n,NULL));
457: PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
458: PetscCall(BVSetActiveColumns(S,0,ml));
459: PetscCall(PetscArrayzero(pA,ml*ml));
460: PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
461: PetscCall(BVOrthogonalize(S,A));
462: PetscCall(MatDestroy(&A));
464: /* SVD of first (M-1)*L diagonal block */
465: PetscCall(PetscBLASIntCast((M-1)*L,&m));
466: PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
467: for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
468: lwork = 5*m;
469: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
470: #if !defined (PETSC_USE_COMPLEX)
471: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
472: #else
473: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
474: #endif
475: SlepcCheckLapackInfo("gesvd",info);
476: PetscCall(PetscFPTrapPop());
477: *rank = 0;
478: for (i=0;i<m;i++) {
479: if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
480: }
481: PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
482: PetscCall(BVSetActiveColumns(S,0,m));
483: PetscCall(BVMultInPlace(S,A,0,*rank));
484: PetscCall(MatDestroy(&A));
485: /* Projected linear system */
486: /* m first columns of A have the right singular vectors */
487: PetscCall(PetscBLASIntCast(*rank,&k_));
488: PetscCall(PetscBLASIntCast(ml,&lda));
489: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
490: PetscCall(PetscArrayzero(pA,ml*ml));
491: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
492: for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
493: PetscCall(PetscFree5(T,R,U,work,rwork));
494: PetscFunctionReturn(PETSC_SUCCESS);
495: }
497: /*@
498: BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
499: values) and determine the numerical rank according to a tolerance.
501: Collective
503: Input Parameters:
504: + S - the basis vectors
505: . m - the moment degree
506: . l - the block size
507: . delta - the tolerance used to determine the rank
508: - meth - the method to be used
510: Output Parameters:
511: + A - workspace, on output contains relevant values in the CAA method
512: . sigma - computed singular values
513: - rank - estimated rank (optional)
515: Notes:
516: This function computes [U,Sigma,V] = svd(S) and replaces S with U.
517: The current implementation computes this via S'*S, and it may include
518: some kind of iterative refinement to improve accuracy in some cases.
520: The parameters m and l refer to the moment and block size of contour
521: integral methods. All columns up to m*l are modified, and the active
522: columns are set to 0..m*l.
524: The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
526: The A workspace should be m*l*m*l in size.
528: Once the decomposition is computed, the numerical rank is estimated
529: by counting the number of singular values that are larger than the
530: tolerance delta, relative to the first singular value.
532: Level: developer
534: .seealso: BVSetActiveColumns()
535: @*/
536: PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
537: {
538: PetscFunctionBegin;
548: PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
549: PetscCall(BVSetActiveColumns(S,0,m*l));
550: switch (meth) {
551: case BV_SVD_METHOD_REFINE:
552: PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
553: break;
554: case BV_SVD_METHOD_QR:
555: PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
556: break;
557: case BV_SVD_METHOD_QR_CAA:
558: PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
559: break;
560: }
561: PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
562: PetscFunctionReturn(PETSC_SUCCESS);
563: }
565: /*@
566: BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
568: Logically Collective
570: Input Parameters:
571: + S - basis of L*M columns
572: . V - basis of L columns (may be associated to subcommunicators)
573: . Y - basis of npoints*L columns
574: . Lold - old value of L
575: . Lnew - new value of L
576: . M - the moment size
577: - npoints - number of integration points
579: Level: developer
581: .seealso: BVResize()
582: @*/
583: PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
584: {
585: PetscInt i,j;
587: PetscFunctionBegin;
596: PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
597: PetscCall(BVResize(V,Lnew,PETSC_TRUE));
598: PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
599: /* columns of Y are interleaved */
600: for (i=npoints-1;i>=0;i--) {
601: for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
602: }
603: PetscFunctionReturn(PETSC_SUCCESS);
604: }