Go to the documentation of this file.
67 logical,
intent(in) :: execute_preprocess
68 logical,
intent(in) :: execute_model
70 integer :: num_bulkjob = 1
71 integer :: num_bulkjob_once = 1
72 logical :: bulkjob = .false.
73 integer :: num_iteration_bulk = 1
74 integer :: bulkjob_start_dirnum = 0
75 logical :: add_bulkjob_path = .false.
76 integer :: num_domain = 1
77 integer :: num_fail_tolerance = 1
78 integer :: freq_fail_check = 5
81 logical :: abort_all_jobs = .false.
82 logical :: log_split = .false.
83 logical :: color_reorder = .true.
84 logical :: failure_prc_manage = .false.
86 namelist / param_launcher / &
92 bulkjob_start_dirnum, &
104 integer :: universal_comm
105 integer :: universal_nprocs
106 integer :: universal_myrank
107 logical :: universal_master
108 character(len=H_LONG) :: universal_cnf_fname
110 integer :: global_comm
111 integer :: global_nprocs
113 integer :: id_bulkjob
115 logical :: use_fpm = .false.
117 integer :: local_comm
119 integer :: intercomm_parent
120 integer :: intercomm_child
121 character(len=5) :: path
137 integer :: ngpus, gpuid
138 ngpus = acc_get_num_devices(acc_device_nvidia)
139 if( universal_master )
write(*,*)
"*** Number of GPUs: ", min(ngpus, universal_nprocs)
140 if ( ngpus > 0 )
then
141 gpuid = mod(universal_myrank, ngpus)
142 call acc_set_device_num(gpuid, acc_device_nvidia)
147 if( universal_master )
write(*,*)
'*** Start Launch System for SCALE-RM'
157 conf_files(1) = universal_cnf_fname
161 read(fid,nml=param_launcher,iostat=ierr)
164 elseif( ierr > 0 )
then
165 if( universal_master )
write(*,*)
'xxx Not appropriate names in namelist PARAM_LAUNCHER. Check!'
171 if ( execute_preprocess &
172 .OR. execute_model )
then
173 if( universal_master )
write(*,*)
"*** Execute preprocess? : ", execute_preprocess
174 if( universal_master )
write(*,*)
"*** Execute model? : ", execute_model
176 if( universal_master )
write(*,*)
'xxx No execution. please check PARAM_LAUNCHER. STOP'
182 bulkjob = bulkjob .or. ( num_bulkjob > 1 )
184 global_nprocs = universal_nprocs
187 if ( num_bulkjob == 1 ) num_iteration_bulk = 1
188 num_bulkjob_once = ceiling( real(num_bulkjob) / num_iteration_bulk )
189 if ( mod(universal_nprocs,num_bulkjob_once) /= 0 )
then
190 if( universal_master )
write(*,*)
'xxx Total Num of Processes must be divisible by NUM_BULKJOB/NUM_ITERATION_BULK. Check!'
191 if( universal_master )
write(*,*)
'xxx Total Num of Processes = ', universal_nprocs
192 if( universal_master )
write(*,*)
'xxx NUM_BULKJOB = ', num_bulkjob
193 if( universal_master )
write(*,*)
'xxx NUM_ITERATION_BULK = ', num_iteration_bulk
194 if( universal_master )
write(*,*)
'xxx NUM_BULKJOB / NUM_ITERATION_BULK = ', num_bulkjob_once
198 global_nprocs = universal_nprocs / num_bulkjob_once
200 if( universal_master )
write(*,
'(1x,A,I5)')
"*** TOTAL # of BULK JOBS = ", num_bulkjob
201 if( universal_master )
write(*,
'(1x,A,I5)')
"*** # of BULK JOB for each iteration = ", num_bulkjob_once
202 if( universal_master )
write(*,
'(1x,A,I5)')
"*** Total # of PROCESS = ", universal_nprocs
203 if( universal_master )
write(*,
'(1x,A,I5)')
"*** # of PROCESS of each JOB = ", global_nprocs
205 if ( bulkjob_start_dirnum < 0 )
then
206 if( universal_master )
write(*,*)
'xxx BULKJOB_START_DIRNUM must >=0'
209 if ( bulkjob_start_dirnum + num_bulkjob -1 > 9999 )
then
210 if( universal_master )
write(*,*)
'xxx BULKJOB_START_DIRNUM + NUM_BULKJOB must <= 9999'
214 if ( failure_prc_manage )
then
215 if( universal_master )
write(*,
'(1x,A)')
"*** Available: Failure Process Management"
217 if ( num_fail_tolerance <= 0 )
then
218 if( universal_master )
write(*,*)
'xxx Num of Failure Processes must be positive number. Check!'
219 if( universal_master )
write(*,*)
'xxx NUM_FAIL_TOLERANCE = ', num_fail_tolerance
223 if ( num_fail_tolerance > num_bulkjob )
then
224 write(*,*)
'xxx NUM_FAIL_TOLERANCE is bigger than # of NUM_BLUKJOB'
225 write(*,*)
' set to be: NUM_FAIL_TOLERANCE <= NUM_BLUKJOB'
229 if ( num_domain > 1 )
then
230 if ( freq_fail_check >= 1 .or. num_fail_tolerance /= num_bulkjob )
then
231 write(*,*)
'xxx Full function of FPM is not available with online nesting.'
232 write(*,*)
' You can use this only to avoid job stop until all members finish.'
233 write(*,*)
' for this purpose, set: FREQ_FAIL_CHECK = 0'
234 write(*,*)
' NUM_FAIL_TOLERANCE == NUM_BULKJOB'
240 prc_bulkjob(1:num_bulkjob) = global_nprocs
243 call prc_mpisplit_bulk( universal_comm, &
250 call prc_global_setup( abort_all_jobs, &
255 if ( num_domain > 1 )
then
256 if( universal_master )
write(*,
'(1x,A,I5)')
"*** TOTAL DOMAIN NUMBER = ", num_domain
257 if( universal_master )
write(*,
'(1x,A,L5)')
"*** Flag of ABORT ALL JOBS = ", abort_all_jobs
261 call prc_mpisplit_nest( global_comm, &
270 call fpm_init( num_fail_tolerance, &
279 call prc_errhandler_setup( use_fpm, universal_master )
283 do itr = 1, num_iteration_bulk
285 if ( id_bulkjob > num_bulkjob )
exit
287 if( universal_master .and. num_iteration_bulk > 1 )
then
289 write(*,*)
'*** BULK ITERATION COUNT : ', itr,
'/', num_iteration_bulk
298 write(path,
'(I4.4,A)') id_bulkjob + bulkjob_start_dirnum,
"/"
303 if ( execute_preprocess )
then
304 call rm_prep( local_comm, &
305 conf_files(id_domain), &
310 if ( execute_model )
then
311 call rm_driver( local_comm, &
312 conf_files(id_domain), &
317 id_bulkjob = id_bulkjob + num_bulkjob_once
324 if( universal_master )
write(*,*)
'*** End Launch System for SCALE-RM'
subroutine, public prc_abort
Abort Process.
subroutine, public prc_universal_setup(comm, nprocs, myrank, ismaster)
setup MPI in universal communicator
subroutine, public prc_mpisplit_bulk(ORG_COMM_WORLD, NUM_BULKJOB, PRC_BULKJOB, debug, SUB_COMM_WORLD, ID_BULKJOB)
MPI Communicator Split (bulk job)
subroutine, public prc_global_setup(abortall, comm)
setup MPI in global communicator
subroutine, public io_set_universalrank(myrank, jobid, domainid)
Put for error log.
subroutine, public prc_mpisplit_nest(ORG_COMM_WORLD, NUM_DOMAIN, PRC_DOMAIN, debug, color_reorder, SUB_COMM_WORLD, ID_DOMAIN)
MPI Communicator Split (nesting)
subroutine, public launcher(EXECUTE_PREPROCESS, EXECUTE_MODEL)
subroutine, public prc_mpistart(comm)
Start MPI.
subroutine, public rm_driver(comm_world, cnf_fname, path, add_path)
Setup.
subroutine, public fpm_init(max_failure, polling_freq, universal_comm, global_comm, local_comm, num_member, global_root, use_fpm)
Initialize FPM.
integer, parameter, public prc_comm_null
subroutine, public prc_errhandler_setup(use_fpm, master)
Setup MPI error handler.
integer, dimension(prc_domain_nlim), public prc_global_root
root processes in global members
integer, parameter, public prc_domain_nlim
max depth of domains
module Launcher(a launcher of main routine)
module SCALE-RM (a main routine of regional model)
subroutine, public rm_prep(comm_world, cnf_fname, path, add_path)
Setup.
subroutine, public prc_mpifinish
Stop MPI peacefully.
character(len=h_long) function, public io_arg_getfname(is_master, allow_noconf)
get config filename from argument
integer function, public io_cnf_open(fname, is_master)
open config file