Actual source code: cycliccuda.cu
slepc-3.19.0 2023-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc singular value solver: "cyclic" (CUDA implementation)
12: */
13: #include <slepc/private/svdimpl.h>
14: #include "../src/svd/impls/cyclic/cyclic.h"
16: PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y)
17: {
18: SVD_CYCLIC_SHELL *ctx;
19: const PetscScalar *d_px;
20: PetscScalar *d_py;
21: PetscInt m;
23: PetscFunctionBegin;
24: PetscCall(MatShellGetContext(B,&ctx));
25: PetscCall(MatGetLocalSize(ctx->A,&m,NULL));
26: PetscCall(VecCUDAGetArrayRead(x,&d_px));
27: PetscCall(VecCUDAGetArrayWrite(y,&d_py));
28: PetscCall(VecCUDAPlaceArray(ctx->x1,d_px));
29: PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m));
30: PetscCall(VecCUDAPlaceArray(ctx->y1,d_py));
31: PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m));
32: PetscCall(MatMult(ctx->A,ctx->x2,ctx->y1));
33: PetscCall(MatMult(ctx->AT,ctx->x1,ctx->y2));
34: PetscCall(VecCUDAResetArray(ctx->x1));
35: PetscCall(VecCUDAResetArray(ctx->x2));
36: PetscCall(VecCUDAResetArray(ctx->y1));
37: PetscCall(VecCUDAResetArray(ctx->y2));
38: PetscCall(VecCUDARestoreArrayRead(x,&d_px));
39: PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y)
44: {
45: SVD_CYCLIC_SHELL *ctx;
46: const PetscScalar *d_px;
47: PetscScalar *d_py;
48: PetscInt mn,m,n;
50: PetscFunctionBegin;
51: PetscCall(MatShellGetContext(B,&ctx));
52: PetscCall(MatGetLocalSize(ctx->A,NULL,&n));
53: PetscCall(VecGetLocalSize(y,&mn));
54: m = mn-n;
55: PetscCall(VecCUDAGetArrayRead(x,&d_px));
56: PetscCall(VecCUDAGetArrayWrite(y,&d_py));
57: PetscCall(VecCUDAPlaceArray(ctx->x1,d_px));
58: PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m));
59: PetscCall(VecCUDAPlaceArray(ctx->y1,d_py));
60: PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m));
61: PetscCall(VecCopy(ctx->x1,ctx->y1));
62: PetscCall(MatMult(ctx->A,ctx->x2,ctx->w));
63: PetscCall(MatMult(ctx->AT,ctx->w,ctx->y2));
64: PetscCall(VecCUDAResetArray(ctx->x1));
65: PetscCall(VecCUDAResetArray(ctx->x2));
66: PetscCall(VecCUDAResetArray(ctx->y1));
67: PetscCall(VecCUDAResetArray(ctx->y2));
68: PetscCall(VecCUDARestoreArrayRead(x,&d_px));
69: PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
70: PetscFunctionReturn(PETSC_SUCCESS);
71: }