Actual source code: nepdefl.c
slepc-3.19.0 2023-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
17: PetscFunctionBegin;
18: if (X) *X = extop->X;
19: if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
20: PetscFunctionReturn(PETSC_SUCCESS);
21: }
23: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
24: {
25: Vec uu;
26: PetscInt ld,i;
27: PetscMPIInt np;
28: PetscReal norm;
30: PetscFunctionBegin;
31: PetscCall(BVGetColumn(extop->X,k,&uu));
32: ld = extop->szd+1;
33: PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
34: PetscCall(BVRestoreColumn(extop->X,k,&uu));
35: PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
36: PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
37: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
38: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
39: extop->H[k*(ld+1)] = lambda;
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
44: {
45: Mat A,H;
46: PetscInt ldh=extop->szd+1,ldds,k1=k+1;
47: PetscScalar *eigr,*eigi,*t,*Z;
48: Vec x;
50: PetscFunctionBegin;
51: PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
52: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
53: PetscCall(MatDenseSetLDA(H,ldh));
54: PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
55: PetscCall(DSReset(ds));
56: PetscCall(DSSetType(ds,DSNHEP));
57: PetscCall(DSAllocate(ds,ldh));
58: PetscCall(DSGetLeadingDimension(ds,&ldds));
59: PetscCall(DSSetDimensions(ds,k1,0,k1));
60: PetscCall(DSGetMat(ds,DS_MAT_A,&A));
61: PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
62: PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
63: PetscCall(MatDestroy(&H));
64: PetscCall(DSSolve(ds,eigr,eigi));
65: PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
66: PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
67: PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
68: PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
69: PetscCall(BVGetColumn(extop->X,k,&x));
70: PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
71: PetscCall(BVRestoreColumn(extop->X,k,&x));
72: PetscCall(PetscFree3(eigr,eigi,t));
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
77: {
78: PetscMPIInt np,rk,count;
79: PetscScalar *array1,*array2;
80: PetscInt nloc;
82: PetscFunctionBegin;
83: if (extop->szd) {
84: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
85: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
86: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
87: if (v) {
88: PetscCall(VecGetArray(v,&array1));
89: PetscCall(VecGetArray(vex,&array2));
90: if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
91: else PetscCall(PetscArraycpy(array2,array1,nloc));
92: PetscCall(VecRestoreArray(v,&array1));
93: PetscCall(VecRestoreArray(vex,&array2));
94: }
95: if (a) {
96: PetscCall(VecGetArray(vex,&array2));
97: if (back) {
98: PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
99: PetscCall(PetscMPIIntCast(extop->szd,&count));
100: PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101: } else {
102: PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103: PetscCall(PetscMPIIntCast(extop->szd,&count));
104: PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105: }
106: PetscCall(VecRestoreArray(vex,&array2));
107: }
108: } else {
109: if (back) PetscCall(VecCopy(vex,v));
110: else PetscCall(VecCopy(v,vex));
111: }
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116: {
117: PetscInt nloc;
118: Vec u;
119: VecType type;
121: PetscFunctionBegin;
122: if (extop->szd) {
123: PetscCall(BVGetColumn(extop->nep->V,0,&u));
124: PetscCall(VecGetType(u,&type));
125: PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126: PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127: PetscCall(VecSetType(*v,type));
128: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129: nloc += extop->szd;
130: PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131: } else PetscCall(BVCreateVec(extop->nep->V,v));
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136: {
137: PetscInt nloc;
138: BVType type;
139: BVOrthogType otype;
140: BVOrthogRefineType oref;
141: PetscReal oeta;
142: BVOrthogBlockType oblock;
143: NEP nep=extop->nep;
145: PetscFunctionBegin;
146: if (extop->szd) {
147: PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
148: PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
149: PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
150: PetscCall(BVGetType(nep->V,&type));
151: PetscCall(BVSetType(*V,type));
152: PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
153: PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
154: PetscCall(PetscObjectStateIncrease((PetscObject)*V));
155: } else PetscCall(BVDuplicateResize(nep->V,sz,V));
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
160: {
161: PetscInt n,next,i;
162: PetscRandom rand;
163: PetscScalar *array;
164: PetscMPIInt nn,np;
166: PetscFunctionBegin;
167: PetscCall(BVGetRandomContext(extop->nep->V,&rand));
168: PetscCall(VecSetRandom(v,rand));
169: if (extop->szd) {
170: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
171: PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
172: PetscCall(VecGetLocalSize(v,&next));
173: PetscCall(VecGetArray(v,&array));
174: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
175: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
176: PetscCall(PetscMPIIntCast(extop->n,&nn));
177: PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
178: PetscCall(VecRestoreArray(v,&array));
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
184: {
185: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
186: PetscScalar sone=1.0,zero=0.0;
187: PetscBLASInt ldh_,ldhj_,n_;
189: PetscFunctionBegin;
190: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
191: PetscCall(PetscArrayzero(Hj,i));
192: PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
193: PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
194: PetscCall(PetscBLASIntCast(n,&n_));
195: if (idx<1) {
196: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
197: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
198: } else {
199: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
200: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
201: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
202: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
203: }
204: if (idx<0) {
205: idx = -idx;
206: for (k=1;k<idx;k++) {
207: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
208: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
209: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
210: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
211: }
212: }
213: PetscFunctionReturn(PETSC_SUCCESS);
214: }
216: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
217: {
218: PetscInt i;
220: PetscFunctionBegin;
221: PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
222: extop->n++;
223: PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
224: if (extop->n <= extop->szd) {
225: /* update XpX */
226: PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
227: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
228: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
229: /* determine minimality index */
230: extop->midx = PetscMin(extop->max_midx,extop->n);
231: /* polynominal basis coefficients */
232: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
233: /* evaluate the polynomial basis in H */
234: PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
235: }
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
240: {
241: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
242: Mat A,B;
243: PetscScalar *array;
244: const PetscScalar *barray;
246: PetscFunctionBegin;
247: if (idx<0) {ini = 0; fin = extop->nep->nt;}
248: else {ini = idx; fin = idx+1;}
249: if (y) sz = hfjp?n+2:n+1;
250: else sz = hfjp?3*n:2*n;
251: ldh = extop->szd+1;
252: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
253: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
254: PetscCall(MatDenseGetArray(A,&array));
255: for (j=0;j<n;j++)
256: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
257: PetscCall(MatDenseRestoreArrayWrite(A,&array));
258: if (y) {
259: PetscCall(MatDenseGetArray(A,&array));
260: array[extop->n*(sz+1)] = lambda;
261: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
262: for (i=0;i<n;i++) array[n*sz+i] = y[i];
263: PetscCall(MatDenseRestoreArrayWrite(A,&array));
264: for (j=ini;j<fin;j++) {
265: PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
266: PetscCall(MatDenseGetArrayRead(B,&barray));
267: for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
268: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
269: PetscCall(MatDenseRestoreArrayRead(B,&barray));
270: }
271: } else {
272: off = idx<0?ld*n:0;
273: PetscCall(MatDenseGetArray(A,&array));
274: for (k=0;k<n;k++) {
275: array[(n+k)*sz+k] = 1.0;
276: array[(n+k)*sz+n+k] = lambda;
277: }
278: if (hfjp) for (k=0;k<n;k++) {
279: array[(2*n+k)*sz+n+k] = 1.0;
280: array[(2*n+k)*sz+2*n+k] = lambda;
281: }
282: PetscCall(MatDenseRestoreArray(A,&array));
283: for (j=ini;j<fin;j++) {
284: PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
285: PetscCall(MatDenseGetArrayRead(B,&barray));
286: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
287: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
288: PetscCall(MatDenseRestoreArrayRead(B,&barray));
289: }
290: }
291: PetscCall(MatDestroy(&A));
292: PetscCall(MatDestroy(&B));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
297: {
298: NEP_DEF_MATSHELL *matctx;
299: NEP_EXT_OP extop;
300: Vec x1,y1;
301: PetscScalar *yy,sone=1.0,zero=0.0;
302: const PetscScalar *xx;
303: PetscInt nloc,i;
304: PetscMPIInt np;
305: PetscBLASInt n_,one=1,szd_;
307: PetscFunctionBegin;
308: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
309: PetscCall(MatShellGetContext(M,&matctx));
310: extop = matctx->extop;
311: if (extop->ref) PetscCall(VecZeroEntries(y));
312: if (extop->szd) {
313: x1 = matctx->w[0]; y1 = matctx->w[1];
314: PetscCall(VecGetArrayRead(x,&xx));
315: PetscCall(VecPlaceArray(x1,xx));
316: PetscCall(VecGetArray(y,&yy));
317: PetscCall(VecPlaceArray(y1,yy));
318: PetscCall(MatMult(matctx->T,x1,y1));
319: if (!extop->ref && extop->n) {
320: PetscCall(VecGetLocalSize(x1,&nloc));
321: /* copy for avoiding warning of constant array xx */
322: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
323: PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
324: PetscCall(BVDotVec(extop->X,x1,matctx->work));
325: PetscCall(PetscBLASIntCast(extop->n,&n_));
326: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
327: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
328: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
329: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
330: }
331: PetscCall(VecResetArray(x1));
332: PetscCall(VecRestoreArrayRead(x,&xx));
333: PetscCall(VecResetArray(y1));
334: PetscCall(VecRestoreArray(y,&yy));
335: } else PetscCall(MatMult(matctx->T,x,y));
336: PetscFunctionReturn(PETSC_SUCCESS);
337: }
339: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
340: {
341: NEP_DEF_MATSHELL *matctx;
343: PetscFunctionBegin;
344: PetscCall(MatShellGetContext(M,&matctx));
345: if (right) PetscCall(VecDuplicate(matctx->w[0],right));
346: if (left) PetscCall(VecDuplicate(matctx->w[0],left));
347: PetscFunctionReturn(PETSC_SUCCESS);
348: }
350: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
351: {
352: NEP_DEF_MATSHELL *matctx;
354: PetscFunctionBegin;
355: PetscCall(MatShellGetContext(M,&matctx));
356: if (matctx->extop->szd) {
357: PetscCall(BVDestroy(&matctx->U));
358: PetscCall(PetscFree2(matctx->hfj,matctx->work));
359: PetscCall(PetscFree2(matctx->A,matctx->B));
360: PetscCall(VecDestroy(&matctx->w[0]));
361: PetscCall(VecDestroy(&matctx->w[1]));
362: }
363: if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
364: PetscCall(MatDestroy(&matctx->T));
365: PetscCall(PetscFree(matctx));
366: PetscFunctionReturn(PETSC_SUCCESS);
367: }
369: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
370: {
371: PetscScalar p;
372: PetscInt i;
374: PetscFunctionBegin;
375: if (!jacobian) {
376: val[0] = 1.0;
377: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
378: } else {
379: val[0] = 0.0;
380: p = 1.0;
381: for (i=1;i<extop->n;i++) {
382: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
383: p *= (lambda-extop->bc[i-1]);
384: }
385: }
386: PetscFunctionReturn(PETSC_SUCCESS);
387: }
389: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
390: {
391: NEP_DEF_MATSHELL *matctx,*matctxC;
392: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
393: Mat F,Mshell,Mcomp;
394: PetscBool ini=PETSC_FALSE;
395: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
396: PetscBLASInt n_,info,szd_;
398: PetscFunctionBegin;
399: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
400: else Mshell = *M;
401: Mcomp = jacobian?extop->MF:extop->MJ;
402: if (!Mshell) {
403: ini = PETSC_TRUE;
404: PetscCall(PetscNew(&matctx));
405: PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
406: nloc += szd; mloc += szd;
407: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
408: PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
409: PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
410: PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
411: matctx->nep = extop->nep;
412: matctx->extop = extop;
413: if (!M) {
414: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
415: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
416: PetscCall(PetscObjectReference((PetscObject)matctx->T));
417: if (!jacobian) {
418: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
419: matctx->P = extop->nep->function_pre;
420: PetscCall(PetscObjectReference((PetscObject)matctx->P));
421: } else matctx->P = matctx->T;
422: }
423: } else {
424: matctx->jacob = jacobian;
425: PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
426: *M = Mshell;
427: if (!jacobian) {
428: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
429: else matctx->P = matctx->T;
430: }
431: }
432: if (szd) {
433: PetscCall(BVCreateVec(extop->nep->V,matctx->w));
434: PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
435: PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
436: PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
437: PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
438: }
439: } else PetscCall(MatShellGetContext(Mshell,&matctx));
440: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
441: if (ini || matctx->theta != lambda) {
442: if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
443: else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
444: }
445: if (n) {
446: matctx->hfjset = PETSC_FALSE;
447: if (!extop->simpU) {
448: /* likely hfjp has been already computed */
449: if (Mcomp) {
450: PetscCall(MatShellGetContext(Mcomp,&matctxC));
451: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
452: PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
453: matctx->hfjset = PETSC_TRUE;
454: }
455: }
456: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
457: if (!matctx->hfjset) {
458: PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
459: matctx->hfjset = PETSC_TRUE;
460: }
461: PetscCall(BVSetActiveColumns(matctx->U,0,n));
462: hf = jacobian?hfjp:hfj;
463: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
464: PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
465: PetscCall(BVMultInPlace(matctx->U,F,0,n));
466: PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
467: for (j=1;j<extop->nep->nt;j++) {
468: PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
469: PetscCall(MatDensePlaceArray(F,hf+j*n*n));
470: PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
471: PetscCall(MatDenseResetArray(F));
472: }
473: PetscCall(MatDestroy(&F));
474: } else {
475: hfj = matctx->hfj;
476: PetscCall(BVSetActiveColumns(matctx->U,0,n));
477: PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
478: for (j=0;j<n;j++) {
479: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
480: hfj[j*(n+1)] += lambda;
481: }
482: PetscCall(PetscBLASIntCast(n,&n_));
483: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
484: PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
485: PetscCall(PetscFPTrapPop());
486: SlepcCheckLapackInfo("trtri",info);
487: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
488: PetscCall(BVMultInPlace(matctx->U,F,0,n));
489: if (jacobian) {
490: PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
491: PetscCall(MatShellGetContext(extop->MF,&matctxC));
492: PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
493: }
494: PetscCall(MatDestroy(&F));
495: }
496: PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
497: PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
498: A = matctx->A;
499: PetscCall(PetscArrayzero(A,szd*szd));
500: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
501: for (j=0;j<n;j++)
502: for (i=0;i<n;i++)
503: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
504: PetscCall(PetscBLASIntCast(n,&n_));
505: PetscCall(PetscBLASIntCast(szd,&szd_));
506: B = matctx->B;
507: PetscCall(PetscArrayzero(B,szd*szd));
508: for (i=1;i<extop->midx;i++) {
509: PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
510: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
511: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
512: pts = hHprev; hHprev = hH; hH = pts;
513: }
514: PetscCall(PetscFree3(basisv,hH,hHprev));
515: }
516: matctx->theta = lambda;
517: matctx->n = extop->n;
518: }
519: PetscFunctionReturn(PETSC_SUCCESS);
520: }
522: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
523: {
524: PetscFunctionBegin;
525: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
526: if (F) *F = extop->MF;
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
531: {
532: PetscFunctionBegin;
533: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
534: if (J) *J = extop->MJ;
535: PetscFunctionReturn(PETSC_SUCCESS);
536: }
538: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
539: {
540: NEP_DEF_MATSHELL *matctx;
541: NEP_DEF_FUN_SOLVE solve;
542: PetscInt i,j,n=extop->n;
543: Vec u,tu;
544: Mat F;
545: PetscScalar snone=-1.0,sone=1.0;
546: PetscBLASInt n_,szd_,ldh_,*p,info;
547: Mat Mshell;
549: PetscFunctionBegin;
550: solve = extop->solve;
551: if (lambda!=solve->theta || n!=solve->n) {
552: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
553: Mshell = (solve->sincf)?extop->MF:solve->T;
554: PetscCall(MatShellGetContext(Mshell,&matctx));
555: PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
556: if (!extop->ref && n) {
557: PetscCall(PetscBLASIntCast(n,&n_));
558: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
559: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
560: if (!extop->simpU) {
561: PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
562: for (i=0;i<n;i++) {
563: PetscCall(BVGetColumn(matctx->U,i,&u));
564: PetscCall(BVGetColumn(solve->T_1U,i,&tu));
565: PetscCall(KSPSolve(solve->ksp,u,tu));
566: PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
567: PetscCall(BVRestoreColumn(matctx->U,i,&u));
568: }
569: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
570: PetscCall(BVDot(solve->T_1U,extop->X,F));
571: PetscCall(MatDestroy(&F));
572: } else {
573: for (j=0;j<n;j++)
574: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
575: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
576: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
577: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
578: }
579: PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
580: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
581: PetscCall(PetscMalloc1(n,&p));
582: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
583: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
584: SlepcCheckLapackInfo("getrf",info);
585: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
586: SlepcCheckLapackInfo("getri",info);
587: PetscCall(PetscFPTrapPop());
588: PetscCall(PetscFree(p));
589: }
590: solve->theta = lambda;
591: solve->n = n;
592: }
593: PetscFunctionReturn(PETSC_SUCCESS);
594: }
596: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
597: {
598: Vec b1,x1;
599: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
600: NEP_DEF_MATSHELL *matctx;
601: NEP_DEF_FUN_SOLVE solve=extop->solve;
602: PetscBLASInt one=1,szd_,n_,ldh_;
603: PetscInt nloc,i;
604: PetscMPIInt np,count;
606: PetscFunctionBegin;
607: if (extop->ref) PetscCall(VecZeroEntries(x));
608: if (extop->szd) {
609: x1 = solve->w[0]; b1 = solve->w[1];
610: PetscCall(VecGetArray(x,&xx));
611: PetscCall(VecPlaceArray(x1,xx));
612: PetscCall(VecGetArray(b,&bb));
613: PetscCall(VecPlaceArray(b1,bb));
614: } else {
615: b1 = b; x1 = x;
616: }
617: PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
618: if (!extop->ref && extop->n && extop->szd) {
619: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
620: PetscCall(PetscBLASIntCast(extop->n,&n_));
621: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
622: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
623: PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
624: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
625: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
626: w = solve->work; w2 = solve->work+extop->n;
627: PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
628: PetscCall(PetscArraycpy(w2,b2,extop->n));
629: PetscCall(BVDotVec(extop->X,x1,w));
630: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
631: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
632: if (extop->simpU) {
633: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
634: for (i=0;i<extop->n;i++) w[i] = x2[i];
635: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
636: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
637: PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
638: } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
639: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
640: PetscCall(PetscMPIIntCast(extop->n,&count));
641: PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
642: }
643: if (extop->szd) {
644: PetscCall(VecResetArray(x1));
645: PetscCall(VecRestoreArray(x,&xx));
646: PetscCall(VecResetArray(b1));
647: PetscCall(VecRestoreArray(b,&bb));
648: if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
649: }
650: PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
654: {
655: PetscFunctionBegin;
656: extop->ref = ref;
657: PetscFunctionReturn(PETSC_SUCCESS);
658: }
660: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
661: {
662: PetscInt j;
663: NEP_DEF_FUN_SOLVE solve;
665: PetscFunctionBegin;
666: if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
667: PetscCall(PetscFree(extop->H));
668: PetscCall(BVDestroy(&extop->X));
669: if (extop->szd) {
670: PetscCall(VecDestroy(&extop->w));
671: PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
672: PetscCall(BVDestroy(&extop->W));
673: }
674: PetscCall(MatDestroy(&extop->MF));
675: PetscCall(MatDestroy(&extop->MJ));
676: if (extop->solve) {
677: solve = extop->solve;
678: if (extop->szd) {
679: if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
680: PetscCall(PetscFree2(solve->M,solve->work));
681: PetscCall(VecDestroy(&solve->w[0]));
682: PetscCall(VecDestroy(&solve->w[1]));
683: }
684: if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
685: PetscCall(PetscFree(extop->solve));
686: }
687: if (extop->proj) {
688: if (extop->szd) {
689: for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
690: PetscCall(MatDestroy(&extop->proj->XpV1));
691: PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
692: PetscCall(VecDestroy(&extop->proj->w));
693: PetscCall(BVDestroy(&extop->proj->V1));
694: }
695: PetscCall(PetscFree(extop->proj));
696: }
697: PetscCall(PetscFree(extop));
698: PetscFunctionReturn(PETSC_SUCCESS);
699: }
701: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
702: {
703: NEP_EXT_OP op;
704: NEP_DEF_FUN_SOLVE solve;
705: PetscInt szd;
706: Vec x;
708: PetscFunctionBegin;
709: PetscCall(NEPDeflationReset(*extop));
710: PetscCall(PetscNew(&op));
711: *extop = op;
712: op->nep = nep;
713: op->n = 0;
714: op->szd = szd = sz-1;
715: op->max_midx = PetscMin(MAX_MINIDX,szd);
716: op->X = X;
717: if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
718: else PetscCall(PetscObjectReference((PetscObject)X));
719: PetscCall(PetscCalloc1(sz*sz,&(op)->H));
720: if (op->szd) {
721: PetscCall(BVGetColumn(op->X,0,&x));
722: PetscCall(VecDuplicate(x,&op->w));
723: PetscCall(BVRestoreColumn(op->X,0,&x));
724: op->simpU = PETSC_FALSE;
725: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
726: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
727: PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
728: } else {
729: op->simpU = PETSC_TRUE;
730: }
731: PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
732: PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
733: }
734: if (ksp) {
735: PetscCall(PetscNew(&solve));
736: op->solve = solve;
737: solve->ksp = ksp;
738: solve->sincf = sincfun;
739: solve->n = -1;
740: if (op->szd) {
741: if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
742: PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
743: PetscCall(BVCreateVec(nep->V,&solve->w[0]));
744: PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
745: }
746: }
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
750: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
751: {
752: Mat A,Ei;
753: PetscScalar *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
754: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
755: const PetscScalar *E;
756: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
757: PetscBLASInt inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
758: PetscMPIInt np;
759: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
760: NEP_EXT_OP extop=proj->extop;
761: NEP nep=extop->nep;
763: PetscFunctionBegin;
764: PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
765: PetscCall(DSGetLeadingDimension(ds,&ldds));
766: PetscCall(DSGetMat(ds,mat,&A));
767: PetscCall(MatZeroEntries(A));
768: /* mat = V1^*T(lambda)V1 */
769: for (i=0;i<nep->nt;i++) {
770: if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
771: else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
772: PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
773: PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
774: PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
775: }
776: PetscCall(DSRestoreMat(ds,mat,&A));
777: if (!extop->ref && extop->n) {
778: PetscCall(DSGetArray(ds,mat,&T));
779: n = extop->n;
780: szd = extop->szd;
781: PetscCall(PetscArrayzero(proj->work,proj->lwork));
782: PetscCall(PetscBLASIntCast(nv,&nv_));
783: PetscCall(PetscBLASIntCast(n,&n_));
784: PetscCall(PetscBLASIntCast(ldds,&ldds_));
785: PetscCall(PetscBLASIntCast(szd,&szd_));
786: PetscCall(PetscBLASIntCast(proj->dim,&dim_));
787: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
788: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
789: nwork += 2*proj->dim*proj->dim;
791: /* mat = mat + V1^*U(lambda)V2 */
792: for (i=0;i<nep->nt;i++) {
793: if (extop->simpU) {
794: if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
795: else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
796: ww = w1; w = w2;
797: PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
798: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
799: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
800: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
801: alpha = -alpha;
802: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
803: if (deriv) {
804: PetscCall(PetscBLASIntCast(szd*nv,&szdk));
805: PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
806: PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
807: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
808: alpha2 = -alpha2;
809: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
810: alpha2 = 1.0;
811: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
812: PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
813: }
814: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
815: } else {
816: PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
817: w = deriv?w2:w1; ww = deriv?w1:w2;
818: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
819: s = PetscSqrtReal(np);
820: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
821: }
822: PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
823: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
824: PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
825: }
827: /* mat = mat + V2^*A(lambda)V1 */
828: basisv = proj->work+nwork; nwork += szd;
829: hH = proj->work+nwork; nwork += szd*szd;
830: hHprev = proj->work+nwork; nwork += szd*szd;
831: AB = proj->work+nwork;
832: PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
833: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
834: for (j=0;j<n;j++)
835: for (i=0;i<n;i++)
836: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
837: PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
838: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
839: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
840: PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
842: /* mat = mat + V2^*B(lambda)V2 */
843: PetscCall(PetscArrayzero(AB,szd*szd));
844: for (i=1;i<extop->midx;i++) {
845: PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
846: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
847: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
848: pts = hHprev; hHprev = hH; hH = pts;
849: }
850: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
851: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
852: PetscCall(DSRestoreArray(ds,mat,&T));
853: }
854: PetscFunctionReturn(PETSC_SUCCESS);
855: }
857: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
858: {
859: PetscInt k,j,n=extop->n,dim;
860: Vec v,ve;
861: BV V1;
862: Mat G;
863: NEP nep=extop->nep;
864: NEP_DEF_PROJECT proj;
866: PetscFunctionBegin;
867: NEPCheckSplit(extop->nep,1);
868: proj = extop->proj;
869: if (!proj) {
870: /* Initialize the projection data structure */
871: PetscCall(PetscNew(&proj));
872: extop->proj = proj;
873: proj->extop = extop;
874: PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
875: proj->dim = dim;
876: if (extop->szd) {
877: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
878: PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
879: for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
880: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
881: PetscCall(BVCreateVec(extop->X,&proj->w));
882: PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
883: }
884: PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
885: }
887: /* Split Vext in V1 and V2 */
888: if (extop->szd) {
889: for (j=j0;j<j1;j++) {
890: PetscCall(BVGetColumn(Vext,j,&ve));
891: PetscCall(BVGetColumn(proj->V1,j,&v));
892: PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
893: PetscCall(BVRestoreColumn(proj->V1,j,&v));
894: PetscCall(BVRestoreColumn(Vext,j,&ve));
895: }
896: V1 = proj->V1;
897: } else V1 = Vext;
899: /* Compute matrices V1^* A_i V1 */
900: PetscCall(BVSetActiveColumns(V1,j0,j1));
901: for (k=0;k<nep->nt;k++) {
902: PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
903: PetscCall(BVMatProject(V1,nep->A[k],V1,G));
904: PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
905: }
907: if (extop->n) {
908: if (extop->szd) {
909: /* Compute matrices V1^* A_i X and V1^* X */
910: PetscCall(BVSetActiveColumns(extop->W,0,n));
911: for (k=0;k<nep->nt;k++) {
912: PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
913: PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
914: }
915: PetscCall(BVDot(V1,extop->X,proj->XpV1));
916: }
917: }
918: PetscFunctionReturn(PETSC_SUCCESS);
919: }