8.3. DOMAIN DECOMPOSITION AND MPI 327
44. AII(1:n*n,1:3*n) = AI(1:n*n,1:3*n,i)
45. WI(1:3*n,1:3*n+1,i)=matmul(transpose(AII(1:n*n,1:3*n))&
46. ,ZI(1:n*n,1:3*n+1,i))
47. end do
48. call mpi_barrier(mpi_comm_world,ierr)
49. call mpi_gather(WI(1,1,bn),3*n*(3*n+1)*(en-bn+1),mpi_real,&
50. WI,3*n*(3*n+1)*(en-bn+1),mpi_real,0,&
51. mpi_comm_world,status ,ierr)
52. if (my_rank.eq.0) then
53. Ahat(1:3*n,1:3*n) = Ahat(1:3*n,1:3*n)-&
54. WI(1:3*n,1:3*n,1)-WI(1:3*n,1:3*n,2)-&
55. WI(1:3*n,1:3*n,3)-WI(1:3*n,1:3*n,4)
56. dhat(1:3*n) = dhat(1:3*n) -&
57. WI(1:3*n,1+3*n,1)-WI(1:3*n,1+3*n,2)-&
58. WI(1:3*n,1+3*n,3) -WI(1:3*n,1+3*n,4)
59.! Solve the Schur complement system via GE
60. call gespd(Ahat(1:3*n,1:3*n),dhat(1:3*n),xO(1:3*n),3*n,1)
61. end if
62. call mpi_bcast(xO,3*n,mpi_real,0,mpi_comm_world,ierr)
63.! Concurrently solve for the big blocks.
64. do i = bn,en
65. dI(1:n*n,i) = AI(1:n*n,3*n+1,i)-&
66. matmul(AI(1:n*n,1:3*n,i),xO(1:3*n))
67. ! call gesp d(AA,dI(1:n*n,i),XI(1:n*n,i),n*n,1)
68. call cgssor3(dI(1:n*n,i),&
69. xI(1:n*n,i),n*n,1,n)
70. end do
71. call mpi_barrier(mpi_comm_world,ierr)
72. call mpi_gather(xI(1,bn),n*n*(en-bn+1),mpi_real,&
73. xI,n*n*(en-bn+1),mpi_real,0,&
74. mpi_comm_world,status ,ierr)
75. call mpi_barrier(mpi_comm_world,ierr)
76. if (my_rank.eq.0) then
77. t1 = timef()
78. print*, t1
79. print*, xO(n/2),xO(n+n/2),xO(2*n+n/2)
80. print*, xI(n*n/2,1),xI(n*n/2,2),&
81. xI(n*n/2,3),xI(n*n/2,4)
82. end if
83. call mpi_finalize(ierr)
84. end program
The code was run for 1, 2 and 4 processors with both the gespd() and
cgssor3() subroutines for the four large solves with
D
n>n
for 1 n s = 4=
The computation times using gespd() were about 14 to 20 times longer than
© 2004 by Chapman & Hall/CRC