SCALE-RM
Functions/Subroutines | Variables
scale_fpm Module Reference

module FPM More...

Functions/Subroutines

subroutine, public fpm_init (max_failure, polling_freq, universal_comm, global_comm, local_comm, num_member, global_root, use_fpm)
 Initialize FPM. More...
 
subroutine, public fpm_polling (run_stat, stop_signal)
 Main system of FPM. More...
 

Variables

integer, public fpm_max_failure = 1
 
integer, public fpm_polling_freq = 5
 
logical, public fpm_alive
 

Detailed Description

module FPM

Description
Failure Process Management module
Author
Team SCALE

Function/Subroutine Documentation

◆ fpm_init()

subroutine, public scale_fpm::fpm_init ( integer, intent(in)  max_failure,
integer, intent(in)  polling_freq,
integer, intent(in)  universal_comm,
integer, intent(in)  global_comm,
integer, intent(in)  local_comm,
integer, intent(in)  num_member,
integer, dimension(:), intent(in)  global_root,
logical, intent(in)  use_fpm 
)

Initialize FPM.

Parameters
[in]max_failurethreshold of failure procs
[in]polling_freqpolling frequency per time step (0: no polling)
[in]universal_commcommunicator
[in]global_commcommunicator
[in]local_commcommunicator
[in]num_membernumber of members (NUM_BULKJOB)
[in]global_rootroot ranks of global comms
[in]use_fpmfpm switch

Definition at line 79 of file scale_fpm.F90.

79  implicit none
80  integer, intent(in) :: max_failure
81  integer, intent(in) :: polling_freq
82  integer, intent(in) :: universal_comm
83  integer, intent(in) :: global_comm
84  integer, intent(in) :: local_comm
85  integer, intent(in) :: num_member
86  integer, intent(in) :: global_root(:)
87  logical, intent(in) :: use_fpm
88 
89  integer, allocatable :: manager_list(:)
90  integer, allocatable :: exclude_list(:)
91  integer :: num_exclude
92  integer :: group_univ
93  integer :: group_manager
94  integer :: i, j, k
95  integer :: ierr
96 
97  !---------------------------------------------------------------------------
98 
99  fpm_alive = use_fpm
100  fpm_master = .false.
101  fpm_manager = .false.
102  fpm_num_member = num_member
103  fpm_max_failure = max_failure
104  fpm_polling_freq = polling_freq
105 
106  if ( fpm_alive ) then
107  fpm_universal_comm = universal_comm
108  call mpi_comm_rank( fpm_universal_comm, fpm_unv_myproc, ierr )
109  call mpi_comm_size( fpm_universal_comm, fpm_unv_nprocs, ierr )
110  fpm_global_comm = global_comm
111  call mpi_comm_rank( fpm_global_comm, fpm_glb_myproc, ierr )
112  call mpi_comm_size( fpm_global_comm, fpm_glb_nprocs, ierr )
113  fpm_local_comm = local_comm
114  fpm_local_master = 0
115  call mpi_comm_rank( fpm_local_comm, fpm_lcl_myproc, ierr )
116  call mpi_comm_size( fpm_local_comm, fpm_lcl_nprocs, ierr )
117 
118  if ( fpm_unv_myproc == fpm_manager_master ) fpm_master = .true.
119  if ( fpm_master ) write(*,*) ''
120  if ( fpm_master ) write(*,*) '*** Failure Procs Manager: available'
121  if ( fpm_master ) write(*,*) '*** Threshold of Failure Procs = ', fpm_max_failure
122  if ( fpm_master ) then
123  if ( fpm_polling_freq > 0 ) then
124  write(*,*) '*** FPM Polling Frequency per DT = ', fpm_polling_freq
125  else
126  write(*,*) '*** FPM: NO Polling'
127  endif
128  endif
129 
130  ! create manager communicator
131  allocate( manager_list(fpm_num_member) )
132  do i=1, fpm_num_member
133  manager_list(i) = global_root(i)
134  if ( fpm_unv_myproc == manager_list(i) ) fpm_manager = .true.
135  enddo
136 
137  num_exclude = fpm_unv_nprocs - fpm_num_member
138  allocate( exclude_list(num_exclude) )
139  j = 1
140  k = 1
141  do i=0, fpm_unv_nprocs-1
142  if ( i == manager_list(j) ) then
143  if ( j < fpm_num_member ) j = j + 1
144  else
145  exclude_list(k) = i
146  if ( k < num_exclude ) k = k + 1
147  endif
148  enddo
149 
150  call mpi_comm_group( fpm_universal_comm, &
151  group_univ, &
152  ierr )
153  call mpi_group_excl( group_univ, &
154  num_exclude, &
155  exclude_list, &
156  group_manager, &
157  ierr )
158  call mpi_comm_create( fpm_universal_comm, &
159  group_manager, &
160  fpm_manager_comm, &
161  ierr )
162 
163  allocate( fpm_running(fpm_num_member) )
164  allocate( fpm_lcl_running(fpm_lcl_nprocs) )
165  fpm_running(:) = .true.
166  fpm_lcl_running(:) = .true.
167  endif
168 

References fpm_alive, fpm_max_failure, and fpm_polling_freq.

Referenced by mod_launcher::launcher().

Here is the caller graph for this function:

◆ fpm_polling()

subroutine, public scale_fpm::fpm_polling ( logical, intent(in)  run_stat,
logical, intent(out)  stop_signal 
)

Main system of FPM.

Parameters
[in]run_statrunning status
[out]stop_signalexit sign

Definition at line 176 of file scale_fpm.F90.

176  implicit none
177  logical, intent(in ) :: run_stat
178  logical, intent(out) :: stop_signal
179 
180  integer :: sendcounts, recvcounts
181  integer :: failcount
182  integer :: i
183  integer :: ierr
184 
185  logical :: local_stat
186  logical :: sendbuff
187  !---------------------------------------------------------------------------
188 
189  sendcounts = 1
190  recvcounts = 1
191  stop_signal = .false.
192  local_stat = .true.
193 
194  ! participants level
195  sendbuff = run_stat
196  call mpi_gather( sendbuff, &
197  sendcounts, &
198  mpi_logical, &
199  fpm_lcl_running(:), &
200  recvcounts, &
201  mpi_logical, &
202  fpm_local_master, &
203  fpm_local_comm, &
204  ierr )
205 
206  ! manager level
207  !-------------------------------------------<<<
208  if ( fpm_manager ) then
209  do i=1, fpm_lcl_nprocs
210  if ( .NOT. fpm_lcl_running(i) ) then
211  local_stat = .false.
212  exit
213  endif
214  enddo
215 
216  !call MPI_BARRIER(FPM_MANAGER_COMM, ierr)
217  sendbuff = local_stat
218  call mpi_gather( sendbuff, &
219  sendcounts, &
220  mpi_logical, &
221  fpm_running(:), &
222  recvcounts, &
223  mpi_logical, &
224  fpm_manager_master, &
225  fpm_manager_comm, &
226  ierr )
227 
228  ! master level
229  !=======================================<<<
230  if ( fpm_master ) then
231  failcount = 0
232  do i=1, fpm_num_member
233  if ( .NOT. fpm_running(i) ) then
234  failcount = failcount + 1
235  endif
236  enddo
237 
238  if ( failcount >= fpm_max_failure ) then
239  stop_signal = .true.
240  else
241  stop_signal = .false.
242  endif
243  endif
244  !========================================>>>
245  endif
246  !------------------------------------------->>>
247 
248  ! broadcast signal
249  call mpi_bcast( stop_signal, &
250  sendcounts, &
251  mpi_logical, &
252  fpm_manager_master, &
253  fpm_universal_comm, &
254  ierr )
255 

References fpm_max_failure.

Referenced by scale_prc::prc_mpifinish(), scale_prc::prc_timereorder(), and mod_rm_driver::rm_driver().

Here is the caller graph for this function:

Variable Documentation

◆ fpm_max_failure

integer, public scale_fpm::fpm_max_failure = 1

Definition at line 30 of file scale_fpm.F90.

30  integer, public :: FPM_MAX_FAILURE = 1

Referenced by fpm_init(), and fpm_polling().

◆ fpm_polling_freq

integer, public scale_fpm::fpm_polling_freq = 5

Definition at line 31 of file scale_fpm.F90.

31  integer, public :: FPM_POLLING_FREQ = 5

Referenced by fpm_init(), and mod_rm_driver::rm_driver().

◆ fpm_alive

logical, public scale_fpm::fpm_alive

Definition at line 32 of file scale_fpm.F90.

32  logical, public :: FPM_alive

Referenced by fpm_init(), scale_prc::prc_mpifinish(), scale_prc::prc_timereorder(), and mod_rm_driver::rm_driver().

scale_fpm::fpm_polling_freq
integer, public fpm_polling_freq
Definition: scale_fpm.F90:31
scale_fpm::fpm_alive
logical, public fpm_alive
Definition: scale_fpm.F90:32