SCALE-RM
scale_rdma.c
Go to the documentation of this file.
1 /*
2  * rdma.c
3  *
4  * Created on: 2012/01/16
5  * Author: ohno
6  */
7 
8 #include <stdlib.h>
9 #include "rdma.h"
10 #include <mpi.h>
11 #include <mpi-ext.h>
12 
14 #define WEST 0
15 #define NORTH 1
16 #define EAST 2
17 #define SOUTH 3
18 #define BEARING_CNT 4
19 
20 int32_t COMM_vsize_max ;
21 
22 int32_t IA, JA, KA ;
23 int32_t IHALO, JHALO;
24 int32_t IS, IE, JS, JE ;
25 #define offset(Z,X,Y) (sizeof(var_t)*((Z)+(X)*(KA)+(Y)*(KA)*(IA)))
26 
28 
30 
31 #define RDMA_TAG_NUM_MAX 15
32 #define RDMA_TAG 0
33 #define RDMA_TAG_TAIL 1
34 
35 int32_t memid_cnt;
36 
37 int32_t *memid ;
38 uint64_t *lvar ;
39 uint64_t **rvar ;
40 
41 
42 typedef uint64_t sf_t ;
43 volatile sf_t *status_flag;
44 int32_t memid_sf;
45 uint64_t local_sf;
46 uint64_t *remote_sf;
47 #define LOCAL_RECV_READY 0
48 #define REMOTE_RECV_READY 1
49 #define LOCAL_PUT_DONE 2
50 #define REMOTE_PUT_DONE 3
51 #define LOCAL_RECV_DONE 3
52 #define FLAG_CNT 4
53 #define sidx(DIR,FLAG) ((DIR)+(BEARING_CNT)*(FLAG))
54 #define soffset(DIR,FLAG) (sizeof(sf_t)*sidx(DIR,FLAG))
55 
56 int64_t *sending;
57 #define FALSE 0
58 #define TRUE 1
59 
60 
61 volatile sf_t rdma_put_id ;
62 #define RDMA_PUT_ID_MAX 0xFFFFFFFF
63 
64 
65 #define FJMPI_RDMA_PUT_FLAGS_TO_NORTH (FJMPI_RDMA_LOCAL_NIC0 | FJMPI_RDMA_REMOTE_NIC2 | FJMPI_RDMA_PATH0 )
66 #define FJMPI_RDMA_PUT_FLAGS_TO_SOUTH (FJMPI_RDMA_LOCAL_NIC2 | FJMPI_RDMA_REMOTE_NIC0 | FJMPI_RDMA_PATH0 )
67 #define FJMPI_RDMA_PUT_FLAGS_TO_EAST (FJMPI_RDMA_LOCAL_NIC3 | FJMPI_RDMA_REMOTE_NIC1 | FJMPI_RDMA_PATH0 )
68 #define FJMPI_RDMA_PUT_FLAGS_TO_WEST (FJMPI_RDMA_LOCAL_NIC1 | FJMPI_RDMA_REMOTE_NIC3 | FJMPI_RDMA_PATH0 )
69 
70 #include <stdio.h>
71 
73  const int32_t *COMM_vsize_max_in,
74  const int32_t *IA_in,
75  const int32_t *JA_in,
76  const int32_t *KA_in,
77  const int32_t *IHALO_in,
78  const int32_t *JHALO_in,
79  const int32_t *IS_in,
80  const int32_t *IE_in,
81  const int32_t *JS_in,
82  const int32_t *JE_in,
83  const int32_t *RANK_W_in,
84  const int32_t *RANK_N_in,
85  const int32_t *RANK_E_in,
86  const int32_t *RANK_S_in)
87 {
88  int v;
89 
90  COMM_vsize_max = *COMM_vsize_max_in ;
91  IA = *IA_in ;
92  JA = *JA_in ;
93  KA = *KA_in ;
94  IHALO = *IHALO_in ;
95  JHALO = *JHALO_in ;
96  IS = *IS_in ;
97  IE = *IE_in ;
98  JS = *JS_in ;
99  JE = *JE_in ;
100  RANK_W = *RANK_W_in ;
101  RANK_N = *RANK_N_in ;
102  RANK_E = *RANK_E_in ;
103  RANK_S = *RANK_S_in ;
104 
105  datasize_NS = sizeof(var_t) * (IE-IS+1) * KA ;
106  datasize_WE = sizeof(var_t) * IHALO * KA ;
107 
108  memid_cnt = 0;
109 
110  memid = (int32_t *) malloc(sizeof(int32_t) * COMM_vsize_max);
111  lvar = (uint64_t *) malloc(sizeof(uint64_t ) * COMM_vsize_max) ;
112  rvar = (uint64_t **) malloc(sizeof(uint64_t *) * COMM_vsize_max) ;
113  for(v=0; v<COMM_vsize_max; v++) rvar[v] = (uint64_t *) malloc(sizeof(uint64_t) * BEARING_CNT) ;
114 
115  status_flag = (sf_t *) calloc(BEARING_CNT*FLAG_CNT, sizeof(sf_t)) ;
116  remote_sf = (uint64_t *) malloc(sizeof(uint64_t) * BEARING_CNT) ;
117 
118  sending = (int64_t *) calloc(BEARING_CNT, sizeof(int64_t)) ;
119 
120  rdma_put_id = 0 ;
121 
122  FJMPI_Rdma_init() ;
123 
124  memid_sf = memid_cnt ;
125  local_sf = FJMPI_Rdma_reg_mem(memid_cnt, status_flag, sizeof(sf_t)*BEARING_CNT*FLAG_CNT) ;
126  memid_cnt++ ;
127 
128  MPI_Barrier(MPI_COMM_WORLD) ;
129 
130  if( RANK_W != MPI_PROC_NULL )
131  remote_sf[WEST] = FJMPI_Rdma_get_remote_addr(RANK_W, memid_sf) ;
132  else
133  remote_sf[WEST] = 0 ;
134 
135  if( RANK_N != MPI_PROC_NULL )
136  remote_sf[NORTH] = FJMPI_Rdma_get_remote_addr(RANK_N, memid_sf) ;
137  else
138  remote_sf[NORTH] = 0 ;
139 
140  if( RANK_E != MPI_PROC_NULL )
141  remote_sf[EAST] = FJMPI_Rdma_get_remote_addr(RANK_E, memid_sf) ;
142  else
143  remote_sf[EAST] = 0 ;
144 
145  if( RANK_S != MPI_PROC_NULL )
146  remote_sf[SOUTH] = FJMPI_Rdma_get_remote_addr(RANK_S, memid_sf) ;
147  else
148  remote_sf[SOUTH] = 0 ;
149 
150 }
151 
152 
154  const var_t *var,
155  const int32_t *vid )
156 {
157 
158  memid[*vid] = memid_cnt ;
159  lvar[*vid] = FJMPI_Rdma_reg_mem(memid_cnt, var, sizeof(var_t)*IA*JA*KA) ;
160  memid_cnt++ ;
161 
162 
163  MPI_Barrier(MPI_COMM_WORLD) ;
164 
165  if( RANK_W != MPI_PROC_NULL ) rvar[*vid][WEST] = FJMPI_Rdma_get_remote_addr(RANK_W, memid[*vid]) ;
166  if( RANK_N != MPI_PROC_NULL ) rvar[*vid][NORTH] = FJMPI_Rdma_get_remote_addr(RANK_N, memid[*vid]) ;
167  if( RANK_E != MPI_PROC_NULL ) rvar[*vid][EAST] = FJMPI_Rdma_get_remote_addr(RANK_E, memid[*vid]) ;
168  if( RANK_S != MPI_PROC_NULL ) rvar[*vid][SOUTH] = FJMPI_Rdma_get_remote_addr(RANK_S, memid[*vid]) ;
169 
170 }
171 
172 
173 void rdma_put_(const int32_t *vid, const int32_t *num)
174 {
175  struct FJMPI_Rdma_cq cq ;
176  int j , v;
177 
179 
180  /* set status_flag(recv) */
185 
186  if( RANK_S != MPI_PROC_NULL )
187  FJMPI_Rdma_put(RANK_S, RDMA_TAG,
190  sizeof(sf_t),
191  FJMPI_RDMA_PUT_FLAGS_TO_SOUTH | FJMPI_RDMA_STRONG_ORDER ) ;
192 
193  if( RANK_N != MPI_PROC_NULL )
194  FJMPI_Rdma_put(RANK_N, RDMA_TAG,
197  sizeof(sf_t),
198  FJMPI_RDMA_PUT_FLAGS_TO_NORTH | FJMPI_RDMA_STRONG_ORDER ) ;
199 
200  if( RANK_E != MPI_PROC_NULL )
201  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
204  sizeof(sf_t),
205  FJMPI_RDMA_PUT_FLAGS_TO_EAST | FJMPI_RDMA_STRONG_ORDER ) ;
206 
207  if( RANK_W != MPI_PROC_NULL )
208  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
211  sizeof(sf_t),
212  FJMPI_RDMA_PUT_FLAGS_TO_WEST | FJMPI_RDMA_STRONG_ORDER ) ;
213 
214  /* send data */
215  do {
216  // to north
217  if( status_flag[sidx(NORTH,LOCAL_PUT_DONE)] != rdma_put_id && RANK_N == MPI_PROC_NULL )
218  {
221  sending[NORTH] = FALSE ;
222  }
223 
226  {
227  sending[NORTH] = TRUE ;
228 
229  // put_data
230  for(v=0; v<*num; v++)
231  {
232  for(j=0; j<JHALO; j++)
233  {
234  FJMPI_Rdma_put(RANK_N, RDMA_TAG,
235  rvar[*vid+v][NORTH]+offset(0,IS-1,j+JE),
236  lvar[*vid+v]+offset(0,IS-1,j+JS-1),
237  datasize_NS,
239  }
240  }
241 
242  // set status_flag(send)
244  FJMPI_Rdma_put(RANK_N, RDMA_TAG,
247  sizeof(sf_t),
248  FJMPI_RDMA_PUT_FLAGS_TO_NORTH | FJMPI_RDMA_STRONG_ORDER ) ;
249  }
250 
251  // to south
252  if( status_flag[sidx(SOUTH,LOCAL_PUT_DONE)] != rdma_put_id && RANK_S == MPI_PROC_NULL )
253  {
256  sending[SOUTH] = FALSE;
257  }
258 
261  {
262  sending[SOUTH] = TRUE;
263 
264  // put_data
265  for(v=0; v<*num; v++)
266  {
267  for(j=0; j<JHALO; j++)
268  {
269  FJMPI_Rdma_put(RANK_S, RDMA_TAG,
270  rvar[*vid+v][SOUTH]+offset(0,IS-1,j+JS-JHALO-1),
271  lvar[*vid+v]+offset(0,IS-1,j+JE-JHALO),
272  datasize_NS,
274  }
275  }
276 
277  // set status_flag(send)
279  FJMPI_Rdma_put(RANK_S, RDMA_TAG,
282  sizeof(sf_t),
283  FJMPI_RDMA_PUT_FLAGS_TO_SOUTH | FJMPI_RDMA_STRONG_ORDER ) ;
284  }
285 
286  // to west
287  if( status_flag[sidx(WEST,LOCAL_PUT_DONE)] != rdma_put_id && RANK_W == MPI_PROC_NULL )
288  {
291  sending[WEST] = FALSE;
292  }
293 
296  {
297  sending[WEST] = TRUE;
298 
299  // put_data
300  for(v=0; v<*num; v++)
301  {
302  for(j=JS-1; j<JE; j++)
303  {
304  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
305  rvar[*vid+v][WEST]+offset(0,IE,j),
306  lvar[*vid+v]+offset(0,IS-1,j),
307  datasize_WE,
309  }
310  }
311 
312  // set status_flag(send)
314  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
317  sizeof(sf_t),
318  FJMPI_RDMA_PUT_FLAGS_TO_WEST | FJMPI_RDMA_STRONG_ORDER ) ;
319  }
320 
321  // to east
322  if( status_flag[sidx(EAST,LOCAL_PUT_DONE)] != rdma_put_id && RANK_E == MPI_PROC_NULL )
323  {
326  sending[EAST] = FALSE;
327  }
328 
331  {
332  sending[TRUE] = FALSE;
333 
334  // put_data
335  for(v=0; v<*num; v++)
336  {
337  for(j=JS-1; j<JE; j++)
338  {
339  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
340  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
341  lvar[*vid+v]+offset(0,IE-IHALO,j),
342  datasize_WE,
344  }
345  }
346 
347  // set status_flag(send)
349  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
352  sizeof(sf_t),
353  FJMPI_RDMA_PUT_FLAGS_TO_EAST | FJMPI_RDMA_STRONG_ORDER ) ;
354  }
359 
360  /* completion check (put) */
361  do {
362  // to west
363  if( RANK_W != MPI_PROC_NULL ) {
364  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC1, &cq) == FJMPI_RDMA_NOTICE ) {
365  if(cq.tag == RDMA_TAG_TAIL) sending[WEST] = FALSE ;
366  }
367  }
368  // to east
369  if( RANK_E != MPI_PROC_NULL ) {
370  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC3, &cq) == FJMPI_RDMA_NOTICE ) {
371  if(cq.tag == RDMA_TAG_TAIL) sending[EAST] = FALSE ;
372  }
373  }
374  // to north
375  if( RANK_N != MPI_PROC_NULL ) {
376  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC0, &cq) == FJMPI_RDMA_NOTICE ) {
377  if(cq.tag == RDMA_TAG_TAIL) sending[NORTH] = FALSE ;
378  }
379  }
380  // to south
381  if( RANK_S != MPI_PROC_NULL ) {
382  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC2, &cq) == FJMPI_RDMA_NOTICE ) {
383  if(cq.tag == RDMA_TAG_TAIL) sending[SOUTH] = FALSE ;
384  }
385  }
386  } while(sending[WEST] || sending[NORTH] || sending[EAST] || sending[SOUTH] ) ;
387 
388  /* completion check (recv) */
393 
394 }
395 
396 void rdma_put8_(const int32_t *vid, const int32_t *num)
397 {
398  struct FJMPI_Rdma_cq cq ;
399  int j , v;
400 
402 
403  /* set status_flag(recv) */
408 
409  if( RANK_S != MPI_PROC_NULL )
410  FJMPI_Rdma_put(RANK_S, RDMA_TAG,
413  sizeof(sf_t),
414  FJMPI_RDMA_PUT_FLAGS_TO_SOUTH | FJMPI_RDMA_STRONG_ORDER ) ;
415 
416  if( RANK_N != MPI_PROC_NULL )
417  FJMPI_Rdma_put(RANK_N, RDMA_TAG,
420  sizeof(sf_t),
421  FJMPI_RDMA_PUT_FLAGS_TO_NORTH | FJMPI_RDMA_STRONG_ORDER ) ;
422 
423  if( RANK_E != MPI_PROC_NULL )
424  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
427  sizeof(sf_t),
428  FJMPI_RDMA_PUT_FLAGS_TO_EAST | FJMPI_RDMA_STRONG_ORDER ) ;
429 
430  if( RANK_W != MPI_PROC_NULL )
431  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
434  sizeof(sf_t),
435  FJMPI_RDMA_PUT_FLAGS_TO_WEST | FJMPI_RDMA_STRONG_ORDER ) ;
436 
437  /* send data */
438  do {
439  // to north
440  if( status_flag[sidx(NORTH,LOCAL_PUT_DONE)] != rdma_put_id && RANK_N == MPI_PROC_NULL )
441  {
443  sending[NORTH] = FALSE ;
444  }
445 
448  {
449  sending[NORTH] = TRUE ;
450 
451  // put_data
452  for(v=0; v<*num; v++)
453  {
454  for(j=0; j<JHALO; j++)
455  {
456  FJMPI_Rdma_put(RANK_N, RDMA_TAG,
457  rvar[*vid+v][NORTH]+offset(0,IS-1,j+JE),
458  lvar[*vid+v]+offset(0,IS-1,j+JS-1),
459  datasize_NS,
461 
462  }
463  }
464 
465  // set status_flag(send)
467  FJMPI_Rdma_put(RANK_N, RDMA_TAG_TAIL,
470  sizeof(sf_t),
471  FJMPI_RDMA_PUT_FLAGS_TO_NORTH | FJMPI_RDMA_STRONG_ORDER ) ;
472  }
473 
474  // to south
475  if( status_flag[sidx(SOUTH,LOCAL_PUT_DONE)] != rdma_put_id && RANK_S == MPI_PROC_NULL )
476  {
478  sending[SOUTH] = FALSE;
479  }
480 
483  {
484  sending[SOUTH] = TRUE;
485 
486  // put_data
487  for(v=0; v<*num; v++)
488  {
489  for(j=0; j<JHALO; j++)
490  {
491  FJMPI_Rdma_put(RANK_S, RDMA_TAG,
492  rvar[*vid+v][SOUTH]+offset(0,IS-1,j+JS-JHALO-1),
493  lvar[*vid+v]+offset(0,IS-1,j+JE-JHALO),
494  datasize_NS,
496  }
497  }
498 
499  // set status_flag(send)
501  FJMPI_Rdma_put(RANK_S, RDMA_TAG_TAIL,
504  sizeof(sf_t),
505  FJMPI_RDMA_PUT_FLAGS_TO_SOUTH | FJMPI_RDMA_STRONG_ORDER ) ;
506  }
507 
508  // to west
509  if( status_flag[sidx(WEST,LOCAL_PUT_DONE)] != rdma_put_id && RANK_W == MPI_PROC_NULL )
510  {
512  sending[WEST] = FALSE ;
513  }
514 
517  {
518  sending[WEST] = TRUE ;
519 
520  // put_data
521  for(v=0; v<*num; v++)
522  {
523  for(j=JS-1; j<JE; j++)
524  {
525  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
526  rvar[*vid+v][WEST]+offset(0,IE,j),
527  lvar[*vid+v]+offset(0,IS-1,j),
528  datasize_WE,
530  }
531  }
532 
533  // set status_flag(send)
535  }
536 
537  // to east
538  if( status_flag[sidx(EAST,LOCAL_PUT_DONE)] != rdma_put_id && RANK_E == MPI_PROC_NULL )
539  {
541  sending[EAST] = FALSE ;
542  }
543 
546  {
547  sending[EAST] = TRUE ;
548 
549  // put_data
550  for(v=0; v<*num; v++)
551  {
552  for(j=JS-1; j<JE; j++)
553  {
554  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
555  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
556  lvar[*vid+v]+offset(0,IE-IHALO,j),
557  datasize_WE,
559  }
560  }
561 
562  // set status_flag(send)
564  }
565 
570 
571  /* completion check (recv from NORTH and SOUTH) */
572  if( RANK_N == MPI_PROC_NULL ) status_flag[sidx(NORTH, LOCAL_RECV_DONE)] = rdma_put_id ;
573  if( RANK_S == MPI_PROC_NULL ) status_flag[sidx(SOUTH, LOCAL_RECV_DONE)] = rdma_put_id ;
574 
577 
578  /* send data from North and South to West and East */
579  for(v=0; v<*num; v++)
580  {
581  // data form north
582  if( RANK_N != MPI_PROC_NULL ) {
583  if( RANK_W != MPI_PROC_NULL && RANK_E != MPI_PROC_NULL ) {
584  for(j=0; j<JHALO; j++)
585  {
586  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
587  rvar[*vid+v][WEST]+offset(0,IE,j),
588  lvar[*vid+v]+offset(0,IS-1,j),
589  datasize_WE,
591  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
592  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
593  lvar[*vid+v]+offset(0,IE-IHALO,j),
594  datasize_WE,
596  }
597  }
598  else if( RANK_W != MPI_PROC_NULL ) {
599  for(j=0; j<JHALO; j++)
600  {
601  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
602  rvar[*vid+v][WEST]+offset(0,IE,j),
603  lvar[*vid+v]+offset(0,IS-1,j),
604  datasize_WE,
606  }
607  }
608  else if( RANK_E != MPI_PROC_NULL ) {
609  for(j=0; j<JHALO; j++)
610  {
611  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
612  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
613  lvar[*vid+v]+offset(0,IE-IHALO,j),
614  datasize_WE,
616  }
617  }
618  }
619 
620  // data from south
621  if( RANK_S != MPI_PROC_NULL ) {
622  if( RANK_W != MPI_PROC_NULL && RANK_E != MPI_PROC_NULL ) {
623  for(j=JE; j<JE+JHALO; j++)
624  {
625  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
626  rvar[*vid+v][WEST]+offset(0,IE,j),
627  lvar[*vid+v]+offset(0,IS-1,j),
628  datasize_WE,
630  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
631  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
632  lvar[*vid+v]+offset(0,IE-IHALO,j),
633  datasize_WE,
635  }
636  }
637  else if( RANK_W != MPI_PROC_NULL ) {
638  for(j=JE; j<JE+JHALO; j++)
639  {
640  FJMPI_Rdma_put(RANK_W, RDMA_TAG,
641  rvar[*vid+v][WEST]+offset(0,IE,j),
642  lvar[*vid+v]+offset(0,IS-1,j),
643  datasize_WE,
645  }
646  }
647  else if( RANK_E != MPI_PROC_NULL ) {
648  for(j=JE; j<JE+JHALO; j++)
649  {
650  FJMPI_Rdma_put(RANK_E, RDMA_TAG,
651  rvar[*vid+v][EAST]+offset(0,IS-IHALO-1,j),
652  lvar[*vid+v]+offset(0,IE-IHALO,j),
653  datasize_WE,
655  }
656  }
657  }
658  }
659 
660  if( RANK_W != MPI_PROC_NULL ) {
661  FJMPI_Rdma_put(RANK_W, RDMA_TAG_TAIL,
664  sizeof(sf_t),
665  FJMPI_RDMA_PUT_FLAGS_TO_WEST | FJMPI_RDMA_STRONG_ORDER ) ;
666  }
667 
668  if( RANK_E != MPI_PROC_NULL ) {
669  FJMPI_Rdma_put(RANK_E, RDMA_TAG_TAIL,
672  sizeof(sf_t),
673  FJMPI_RDMA_PUT_FLAGS_TO_EAST | FJMPI_RDMA_STRONG_ORDER ) ;
674  }
675 
676  /* completion check (put) */
677  do {
678  // to west
679  if( RANK_W != MPI_PROC_NULL ) {
680  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC1, &cq) == FJMPI_RDMA_NOTICE ) {
681  if(cq.tag == RDMA_TAG_TAIL) sending[WEST] = FALSE ;
682  }
683  }
684  // to east
685  if( RANK_E != MPI_PROC_NULL ) {
686  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC3, &cq) == FJMPI_RDMA_NOTICE ) {
687  if(cq.tag == RDMA_TAG_TAIL) sending[EAST] = FALSE ;
688  }
689  }
690  // to north
691  if( RANK_N != MPI_PROC_NULL ) {
692  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC0, &cq) == FJMPI_RDMA_NOTICE ) {
693  if(cq.tag == RDMA_TAG_TAIL) sending[NORTH] = FALSE ;
694  }
695  }
696  // to south
697  if( RANK_S != MPI_PROC_NULL ) {
698  while(FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC2, &cq) == FJMPI_RDMA_NOTICE ) {
699  if(cq.tag == RDMA_TAG_TAIL) sending[SOUTH] = FALSE ;
700  }
701  }
702  } while(sending[WEST] || sending[NORTH] || sending[EAST] || sending[SOUTH] ) ;
703 
704  /* completion check (recv from WEST and EAST) */
705  if( RANK_W == MPI_PROC_NULL ) status_flag[sidx(WEST, LOCAL_RECV_DONE)] = rdma_put_id ;
706  if( RANK_E == MPI_PROC_NULL ) status_flag[sidx(EAST, LOCAL_RECV_DONE)] = rdma_put_id ;
707 
708 
711 }
712 
713 
volatile sf_t * status_flag
Definition: scale_rdma.c:43
int32_t RANK_N
Definition: scale_rdma.c:29
#define FALSE
Definition: scale_rdma.c:57
#define BEARING_CNT
Definition: scale_rdma.c:18
#define LOCAL_RECV_READY
Definition: scale_rdma.c:47
#define REMOTE_RECV_READY
Definition: scale_rdma.c:48
#define SOUTH
Definition: scale_rdma.c:17
#define LOCAL_RECV_DONE
Definition: scale_rdma.c:51
void set_rdma_variable_(const var_t *var, const int32_t *vid)
Definition: scale_rdma.c:153
void rdma_put_(const int32_t *vid, const int32_t *num)
Definition: scale_rdma.c:173
uint64_t local_sf
Definition: scale_rdma.c:45
int32_t memid_sf
Definition: scale_rdma.c:44
int32_t RANK_S
Definition: scale_rdma.c:29
volatile sf_t rdma_put_id
Definition: scale_rdma.c:61
int32_t JE
Definition: scale_rdma.c:24
int32_t RANK_W
Definition: scale_rdma.c:29
#define EAST
Definition: scale_rdma.c:16
#define soffset(DIR, FLAG)
Definition: scale_rdma.c:54
#define REMOTE_PUT_DONE
Definition: scale_rdma.c:50
size_t datasize_NS
Definition: scale_rdma.c:27
#define TRUE
Definition: scale_rdma.c:58
#define WEST
Definition: scale_rdma.c:14
size_t datasize_WE
Definition: scale_rdma.c:27
#define NORTH
Definition: scale_rdma.c:15
int32_t IA
Definition: scale_rdma.c:22
int32_t RANK_E
Definition: scale_rdma.c:29
void rdma_put8_(const int32_t *vid, const int32_t *num)
Definition: scale_rdma.c:396
uint64_t sf_t
Definition: scale_rdma.c:42
int32_t JA
Definition: scale_rdma.c:22
#define RDMA_TAG
Definition: scale_rdma.c:32
int32_t memid_cnt
Definition: scale_rdma.c:35
#define LOCAL_PUT_DONE
Definition: scale_rdma.c:49
#define FLAG_CNT
Definition: scale_rdma.c:52
int32_t JS
Definition: scale_rdma.c:24
#define FJMPI_RDMA_PUT_FLAGS_TO_NORTH
Definition: scale_rdma.c:65
real(rp), dimension(:,:,:), allocatable, public v
#define FJMPI_RDMA_PUT_FLAGS_TO_SOUTH
Definition: scale_rdma.c:66
#define RDMA_PUT_ID_MAX
Definition: scale_rdma.c:62
void rdma_setup_(const int32_t *COMM_vsize_max_in, const int32_t *IA_in, const int32_t *JA_in, const int32_t *KA_in, const int32_t *IHALO_in, const int32_t *JHALO_in, const int32_t *IS_in, const int32_t *IE_in, const int32_t *JS_in, const int32_t *JE_in, const int32_t *RANK_W_in, const int32_t *RANK_N_in, const int32_t *RANK_E_in, const int32_t *RANK_S_in)
Definition: scale_rdma.c:72
#define offset(Z, X, Y)
Definition: scale_rdma.c:25
int32_t IHALO
Definition: scale_rdma.c:23
int32_t COMM_vsize_max
Definition: scale_rdma.c:20
uint64_t * remote_sf
Definition: scale_rdma.c:46
int32_t IE
Definition: scale_rdma.c:24
uint64_t ** rvar
Definition: scale_rdma.c:39
int32_t JHALO
Definition: scale_rdma.c:23
#define sidx(DIR, FLAG)
Definition: scale_rdma.c:53
uint64_t * lvar
Definition: scale_rdma.c:38
#define FJMPI_RDMA_PUT_FLAGS_TO_WEST
Definition: scale_rdma.c:68
#define RDMA_TAG_TAIL
Definition: scale_rdma.c:33
int64_t * sending
Definition: scale_rdma.c:56
int32_t IS
Definition: scale_rdma.c:24
int32_t KA
Definition: scale_rdma.c:22
int32_t * memid
Definition: scale_rdma.c:37
#define FJMPI_RDMA_PUT_FLAGS_TO_EAST
Definition: scale_rdma.c:67