SCALE-RM
scale_fpm.F90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
10 module scale_fpm
11  !-----------------------------------------------------------------------------
12  !
13  !++ used modules
14  !
15  use mpi
16  use scale_io
17  !-----------------------------------------------------------------------------
18  implicit none
19  !-----------------------------------------------------------------------------
20  !
21  !++ Public procedure
22  !
23  public :: fpm_init
24  public :: fpm_polling
25 
26  !-----------------------------------------------------------------------------
27  !
28  !++ Public parameters & variables
29  !
30  integer, public :: fpm_max_failure = 1
31  integer, public :: fpm_polling_freq = 5
32  logical, public :: fpm_alive
33 
34  !-----------------------------------------------------------------------------
35  !
36  !++ Private procedure
37  !
38 
39  !-----------------------------------------------------------------------------
40  !
41  !++ Private parameters & variables
42  !
43  integer, private, parameter :: fpm_manager_master = 0
44 
45  integer, private :: fpm_universal_comm
46  integer, private :: fpm_unv_myproc
47  integer, private :: fpm_unv_nprocs
48  integer, private :: fpm_global_comm
49  integer, private :: fpm_glb_myproc
50  integer, private :: fpm_glb_nprocs
51  integer, private :: fpm_local_comm
52  integer, private :: fpm_local_master
53  integer, private :: fpm_lcl_myproc
54  integer, private :: fpm_lcl_nprocs
55 
56  integer, private :: fpm_manager_comm
57  integer, private :: fpm_num_member
58 
59  logical, private :: fpm_master
60  logical, private :: fpm_manager
61  logical, private, allocatable :: fpm_running(:)
62  logical, private, allocatable :: fpm_lcl_running(:)
63 
64  !-----------------------------------------------------------------------------
65 contains
66  !-----------------------------------------------------------------------------
67 
68  !-----------------------------------------------------------------------------
70  subroutine fpm_init( &
71  max_failure, &
72  polling_freq, &
73  universal_comm, &
74  global_comm, &
75  local_comm, &
76  num_member, &
77  global_root, &
78  use_fpm )
79  implicit none
80  integer, intent(in) :: max_failure
81  integer, intent(in) :: polling_freq
82  integer, intent(in) :: universal_comm
83  integer, intent(in) :: global_comm
84  integer, intent(in) :: local_comm
85  integer, intent(in) :: num_member
86  integer, intent(in) :: global_root(:)
87  logical, intent(in) :: use_fpm
88 
89  integer, allocatable :: manager_list(:)
90  integer, allocatable :: exclude_list(:)
91  integer :: num_exclude
92  integer :: group_univ
93  integer :: group_manager
94  integer :: i, j, k
95  integer :: ierr
96 
97  !---------------------------------------------------------------------------
98 
99  fpm_alive = use_fpm
100  fpm_master = .false.
101  fpm_manager = .false.
102  fpm_num_member = num_member
103  fpm_max_failure = max_failure
104  fpm_polling_freq = polling_freq
105 
106  if ( fpm_alive ) then
107  fpm_universal_comm = universal_comm
108  call mpi_comm_rank( fpm_universal_comm, fpm_unv_myproc, ierr )
109  call mpi_comm_size( fpm_universal_comm, fpm_unv_nprocs, ierr )
110  fpm_global_comm = global_comm
111  call mpi_comm_rank( fpm_global_comm, fpm_glb_myproc, ierr )
112  call mpi_comm_size( fpm_global_comm, fpm_glb_nprocs, ierr )
113  fpm_local_comm = local_comm
114  fpm_local_master = 0
115  call mpi_comm_rank( fpm_local_comm, fpm_lcl_myproc, ierr )
116  call mpi_comm_size( fpm_local_comm, fpm_lcl_nprocs, ierr )
117 
118  if ( fpm_unv_myproc == fpm_manager_master ) fpm_master = .true.
119  if ( fpm_master ) write(*,*) ''
120  if ( fpm_master ) write(*,*) '*** Failure Procs Manager: available'
121  if ( fpm_master ) write(*,*) '*** Threshold of Failure Procs = ', fpm_max_failure
122  if ( fpm_master ) then
123  if ( fpm_polling_freq > 0 ) then
124  write(*,*) '*** FPM Polling Frequency per DT = ', fpm_polling_freq
125  else
126  write(*,*) '*** FPM: NO Polling'
127  endif
128  endif
129 
130  ! create manager communicator
131  allocate( manager_list(fpm_num_member) )
132  do i=1, fpm_num_member
133  manager_list(i) = global_root(i)
134  if ( fpm_unv_myproc == manager_list(i) ) fpm_manager = .true.
135  enddo
136 
137  num_exclude = fpm_unv_nprocs - fpm_num_member
138  allocate( exclude_list(num_exclude) )
139  j = 1
140  k = 1
141  do i=0, fpm_unv_nprocs-1
142  if ( i == manager_list(j) ) then
143  if ( j < fpm_num_member ) j = j + 1
144  else
145  exclude_list(k) = i
146  if ( k < num_exclude ) k = k + 1
147  endif
148  enddo
149 
150  call mpi_comm_group( fpm_universal_comm, &
151  group_univ, &
152  ierr )
153  call mpi_group_excl( group_univ, &
154  num_exclude, &
155  exclude_list, &
156  group_manager, &
157  ierr )
158  call mpi_comm_create( fpm_universal_comm, &
159  group_manager, &
160  fpm_manager_comm, &
161  ierr )
162 
163  allocate( fpm_running(fpm_num_member) )
164  allocate( fpm_lcl_running(fpm_lcl_nprocs) )
165  fpm_running(:) = .true.
166  fpm_lcl_running(:) = .true.
167  endif
168 
169  end subroutine fpm_init
170 
171  !-----------------------------------------------------------------------------
173  subroutine fpm_polling( &
174  run_stat, &
175  stop_signal )
176  implicit none
177  logical, intent(in ) :: run_stat
178  logical, intent(out) :: stop_signal
179 
180  integer :: sendcounts, recvcounts
181  integer :: failcount
182  integer :: i
183  integer :: ierr
184 
185  logical :: local_stat
186  logical :: sendbuff
187  !---------------------------------------------------------------------------
188 
189  sendcounts = 1
190  recvcounts = 1
191  stop_signal = .false.
192  local_stat = .true.
193 
194  ! participants level
195  sendbuff = run_stat
196  call mpi_gather( sendbuff, &
197  sendcounts, &
198  mpi_logical, &
199  fpm_lcl_running(:), &
200  recvcounts, &
201  mpi_logical, &
202  fpm_local_master, &
203  fpm_local_comm, &
204  ierr )
205 
206  ! manager level
207  !-------------------------------------------<<<
208  if ( fpm_manager ) then
209  do i=1, fpm_lcl_nprocs
210  if ( .NOT. fpm_lcl_running(i) ) then
211  local_stat = .false.
212  exit
213  endif
214  enddo
215 
216  !call MPI_BARRIER(FPM_MANAGER_COMM, ierr)
217  sendbuff = local_stat
218  call mpi_gather( sendbuff, &
219  sendcounts, &
220  mpi_logical, &
221  fpm_running(:), &
222  recvcounts, &
223  mpi_logical, &
224  fpm_manager_master, &
225  fpm_manager_comm, &
226  ierr )
227 
228  ! master level
229  !=======================================<<<
230  if ( fpm_master ) then
231  failcount = 0
232  do i=1, fpm_num_member
233  if ( .NOT. fpm_running(i) ) then
234  failcount = failcount + 1
235  endif
236  enddo
237 
238  if ( failcount >= fpm_max_failure ) then
239  stop_signal = .true.
240  else
241  stop_signal = .false.
242  endif
243  endif
244  !========================================>>>
245  endif
246  !------------------------------------------->>>
247 
248  ! broadcast signal
249  call mpi_bcast( stop_signal, &
250  sendcounts, &
251  mpi_logical, &
252  fpm_manager_master, &
253  fpm_universal_comm, &
254  ierr )
255 
256  end subroutine fpm_polling
257 
258 end module scale_fpm !failure_process_management
259 !-------------------------------------------------------------------------------
scale_fpm::fpm_polling
subroutine, public fpm_polling(run_stat, stop_signal)
Main system of FPM.
Definition: scale_fpm.F90:176
scale_fpm::fpm_max_failure
integer, public fpm_max_failure
Definition: scale_fpm.F90:30
scale_fpm::fpm_init
subroutine, public fpm_init(max_failure, polling_freq, universal_comm, global_comm, local_comm, num_member, global_root, use_fpm)
Initialize FPM.
Definition: scale_fpm.F90:79
scale_io
module STDIO
Definition: scale_io.F90:10
scale_fpm::fpm_polling_freq
integer, public fpm_polling_freq
Definition: scale_fpm.F90:31
scale_fpm::fpm_alive
logical, public fpm_alive
Definition: scale_fpm.F90:32
scale_fpm
module FPM
Definition: scale_fpm.F90:10