wrfv2_fire/external/RSL/RSL/rsl_remap_state.c

   1 /***********************************************************************
   2
   3                               COPYRIGHT
   4
   5      The following is a notice of limited availability of the code and
   6      Government license and disclaimer which must be included in the
   7      prologue of the code and in all source listings of the code.
   8
   9      Copyright notice
  10        (c) 1977  University of Chicago
  11
  12      Permission is hereby granted to use, reproduce, prepare
  13      derivative works, and to redistribute to others at no charge.  If
  14      you distribute a copy or copies of the Software, or you modify a
  15      copy or copies of the Software or any portion of it, thus forming
  16      a work based on the Software and make and/or distribute copies of
  17      such work, you must meet the following conditions:
  18
  19           a) If you make a copy of the Software (modified or verbatim)
  20              it must include the copyright notice and Government
  21              license and disclaimer.
  22
  23           b) You must cause the modified Software to carry prominent
  24              notices stating that you changed specified portions of
  25              the Software.
  26
  27      This software was authored by:
  28
  29      Argonne National Laboratory
  30      J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
  31      Mathematics and Computer Science Division
  32      Argonne National Laboratory, Argonne, IL  60439
  33
  34      ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
  35      OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
  36      AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
  37      CONTRACT WITH THE DEPARTMENT OF ENERGY.
  38
  39                       GOVERNMENT LICENSE AND DISCLAIMER
  40
  41      This computer code material was prepared, in part, as an account
  42      of work sponsored by an agency of the United States Government.
  43      The Government is granted for itself and others acting on its
  44      behalf a paid-up, nonexclusive, irrevocable worldwide license in
  45      this data to reproduce, prepare derivative works, distribute
  46      copies to the public, perform publicly and display publicly, and
  47      to permit others to do so.  NEITHER THE UNITED STATES GOVERNMENT
  48      NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
  49      THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
  50      ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
  51      COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
  52      PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
  53      NOT INFRINGE PRIVATELY OWNED RIGHTS.
  54
  55 ***************************************************************************/
  56
  57 /* #define NOPACK   */
  58 /* #define NOUNPACK */
  59 /*
  60    rsl_remap_state
  61
  62    Called to move partitions in memory and between processors
  63    after a domain is re-decomposed.  Assumes that a state vector
  64    has been associated with a domain.  If not, returns with warning.
  65
  66  */
  67 #include <stdio.h>
  68 #include <stdlib.h>
  69 #include "rsl.h"
  70
  71 extern rsl_list_t *point_move_receives[] ;      /* decomp.c */
  72 extern rsl_list_t *point_move_sends[] ;
  73
  74 int debuggal_pack = 0 ;
  75
  76 static int sendsize[RSL_MAXPROC] ;
  77 static int recvsize[RSL_MAXPROC] ;
  78 static int recvtag[RSL_MAXPROC] ;
  79 static int recvnpts[RSL_MAXPROC] ;
  80
  81 /*@
  82   RSL_REMAP_STATE --- Use previously defined state vectors to remap grid points.
  83
  84   Notes:
  85   This routine is called at the point in a remapping at which two state vectors
  86   --- one for the source data structures and one for the destination
  87   data structures of a domain --- have
  88   been defined for RSL using RSL_DESCRIBE_STATE.  Remapping can be done in-place
  89   (both state vectors may refer to the same data structures).  Also, a
  90   new decomposition should have been given to RSL using RSL_FDECOMPOSE.
  91
  92   When this routine is called, RSL computes the difference between the
  93   old and new mappings and constructs a schedule of points that need
  94   to be moved between processors.  Using the schedule, it packs data
  95   for moving grid points into messages, exchanges the messages between
  96   processors, then unpacks the messages into their new locations.
  97   Points may be moved in a processor's memory, even if the
  98   points are not communicated (this allows RSL to make room for
  99   an influx of points in the subdomain, if necessary).  RSL bases
 100   packing and unpacking on the state vectors that have been previously
 101   defined.
 102
 103   On return, old domain data structures may be discarded (assuming the
 104   remapping has not been done in-place), and computation may resume.
 105   All subsequent horizontal iteration, stencil-exchange, and
 106   broadcast-merge communications will be over the new mapping.
 107   The stencils and broadcast merges will automatically reconfigure
 108   themselves the first time they are used on the new mapping.  However,
 109   it is crucial that the loop macros (LoopMacros.m4) be re-initialized
 110   by an execution of RSL_INIT_RUNVARS, before iteration is begun.
 111   If the program is not using the loop macros and instead handling iteration
 112   explicitely, a new call to RSL_GET_RUN_INFO or RSL_GET_RUN_INFOP is
 113   required.  Iteration using the column-callable routines RSL_COMPUTE_CELLS
 114   and RSL_COMPUTE_MASK does not need to be re-initialized.
 115
 116   Example:
 117 $ C Construct state vector for current mapping.
 118 $   dcp(1) = rsl_northsouth ; dcp(2) = rsl_eastwest ; dcp(3) = rsl_notdecomposed
 119 $   gl(1)  = d%m            ; gl(2)  = d%n          ; gl(3)  = d%nlev
 120 $   call rsl_create_message(ms)
 121 $   call rsl_build_message(ms,rsl_real,d%psa,size(shape(d%psa)),
 122 $                          dcp,gl,shape(d%psa))
 123 $   call rsl_build_message(ms,rsl_real,d%ua,size(shape(d%ua)),
 124 $                          dcp,gl,shape(d%ua))
 125 $   call rsl_build_message(ms,rsl_real,d%va,size(shape(d%va)),
 126 $                          dcp,gl,shape(d%va))
 127 $   . . .
 128 $   call rsl_describe_state(did,ms)
 129 $ C
 130 $ C New decomposition.
 131 $   retval=rsl_fdecompose(did,mapping,p_lt,p_ln,timers,mloc,nloc)
 132 $ C
 133 $   if (retval .eq. 0 ) then
 134 $ C
 135 $ C Construct state vector for new mapping and associate with
 136 $ C newly allocated data structures.
 137 $     call allocate_domain(tmp,did,tmp%m,tmp%n,tmp%nlev,mloc,nloc)
 138 $     call rsl_create_message(ms)
 139 $     call rsl_build_message(ms,rsl_real,tmp%ua,size(shape(tmp%psa)),
 140 $                            dcp,gl,shape(tmp%psa))
 141 $     . . .
 142 $     call rsl_describe_state(did,ms)
 143 $ C
 144 $ C Effect the remapping
 145 $     call rsl_remap_state(did)
 146
 147 BREAKTHEEXAMPLECODE
 148
 149   This example is from the dynamic load balancing code in MM90,
 150   the Fortran90 implementation of the Penn State/NCAR MM5.
 151
 152   A state vector --- an RSL message definition that contains a list of
 153   all the fields that make up the state for a grid-column in the old
 154   decomposition --- is constructed with successive calls to
 155   RSL_BUILD_MESSAGE.  Then, the
 156   domain DID is decomposed using the MM90 routine, MAPPING, passed as a
 157   function to the RSL_FDECOMPOSE.  TIMERS is an MM90 array of timers for
 158   containing per-grid-column performance data that is used by MAPPING.
 159   TIMERS
 160   is passed directly to MAPPING when it is called from within RSL.
 161   RSL_FDECOMPOSE returns 0 for
 162   success if the new mapping improves on the current one.
 163   It also passes back MLOC and NLOC with the
 164   dimensions of arrays that will be needed to hold the arrays of the processor
 165   subdomain under the new decomposition.  Otherwise, the RSL_FDECOMPOSE
 166   returns a non-zero value, indicating that the program should continue
 167   to time step using the old decomposition.
 168
 169   If a new decomposition is adopted, MM90 allocates a new domain
 170   structure, TMP, using MLOC and NLOC.  This will hold the remapped
 171   data.  (RSL permits remapping in place, without resizing memory, but
 172   this places restrictions on how far the remapping algorithm can go in
 173   moving work around).  The code defines a new state vector identical to
 174   the previous one except that it is associated with the new fields of TMP.
 175
 176   The call to RSL_REMAP_STATE effects the remapping.  RSL compares the
 177   old and new mappings and generates the lists of moves between each
 178   processor.  RSL then uses the information in the first state
 179   vector to pack the columns to be moved into messages and sends the
 180   messages between the processors. On arrival, the incoming messages are
 181   unpacked using information from the second state vector.  When the call
 182   returns TMP contains the remapped state data. The program then
 183   uses pointer assignments (not shown) to swap old and new data into the
 184   D domain, then the old data structures are deallocated.  In the end, D
 185   points to the structure with the remapped data and the model resumes
 186   time stepping on the domain under the new mapping.
 187
 188   See also:
 189   RSL_DESCRIBE_STATE, RSL_FDECOMPOSE, LoopMacros.m4
 190 @*/
 191
 192 RSL_REMAP_STATE ( d_p )
 193   int_p d_p ;   /* (I) RSL domain descriptor. */
 194 {
 195   message_desc_t *old, *new ;
 196   rsl_domain_info_t * dinfo ;
 197   rsl_list_t *lp ;
 198   int m1, m2, msize, size, npts, curs ;
 199   int mtype, mdest ;
 200   int isaved, jsaved ;
 201   char *pbuf ;
 202   int P ;
 203   int d ;
 204   int i, id, ig, jg, d1 ;
 205
 206   d = *d_p ;
 207
 208   RSL_TEST_ERR( d < 0 || d >= RSL_MAXDOMAINS,
 209     "rsl_remap_state: bad domain descriptor\n") ;
 210   dinfo = &(domain_info[d]) ;
 211   RSL_TEST_ERR(dinfo->valid != RSL_VALID,
 212     "rsl_remap_state: descriptor is not for a valid domain\n") ;
 213
 214
 215   /* get old and new state vectors */
 216
 217   RSL_TEST_ERR((old = dinfo->old_state_vect) == NULL,
 218         "no state message previously associated with domain" );
 219   RSL_TEST_ERR((new = dinfo->new_state_vect) == NULL,
 220         "no state message associated with domain" ) ;
 221
 222   /* figure out size and post a recieve for each processor in the receive list */
 223   m1 = message_size( new ) ;
 224   m2 = message_size( old ) ;
 225   if ( m1 != m2 )
 226   {
 227     sprintf(mess,
 228     "old (%d) and new (%d) state vectors cannot be different sizes.",m2,m1) ;
 229     RSL_TEST_ERR(1,mess) ;
 230   }
 231   msize = m1 ;
 232
 233   for ( P = 0 ; P < rsl_nproc_all ; P++ )
 234   {
 235     size = 0 ;
 236     npts = 0 ;
 237     for ( lp = point_move_receives[P] ; lp != NULL ; lp = lp->next )
 238     {
 239 #ifdef crayx1
 240       size += msize + 2*sizeof(int) ;  /* size plus int for ig and jg */
 241 #else
 242       size += msize + 2*sizeof(short) ;  /* size plus shorts for ig and jg */
 243 #endif
 244       npts++ ;
 245     }
 246     if ( size > 0 )
 247     {
 248       if ( rsl_c_comp2phys_proc(P) != rsl_myproc )
 249       {
 250         pbuf = buffer_for_proc( P, size, RSL_RECVBUF ) ;
 251         mtype = MTYPE_FROMTO( MSG_REDISTCOM,
 252                               rsl_c_comp2phys_proc(P),
 253                               rsl_myproc ) ;
 254         RSL_RECVBEGIN( pbuf, size, mtype ) ;
 255         recvsize[P] = size ;
 256         recvtag[P] = mtype ;
 257         recvnpts[P] = npts ;
 258       }
 259       else
 260       {
 261         recvsize[P] = size ;
 262         recvtag[P] = mtype ;
 263         recvnpts[P] = npts ;
 264       }
 265     }
 266     else
 267     {
 268       recvsize[P] = 0 ;
 269       recvnpts[P] = 0 ;
 270       recvtag[P] = RSL_INVALID ;
 271     }
 272   }
 273
 274   isaved = dinfo->ilocaloffset ;
 275   jsaved = dinfo->jlocaloffset ;
 276   dinfo->ilocaloffset = dinfo->old_ilocaloffset ;
 277   dinfo->jlocaloffset = dinfo->old_jlocaloffset ;
 278
 279   debuggal_pack = 0 ;
 280
 281   /* pack and send messages to each processor in the send list --
 282      buffer the ones for me */
 283   for ( P = 0 ; P < rsl_nproc_all ; P++ )
 284   {
 285     size = 0 ;
 286     npts = 0 ;
 287     for ( lp = point_move_sends[P] ; lp != NULL ; lp = lp->next )
 288     {
 289 #ifdef crayx1
 290       size += msize + 2*sizeof(int) ;
 291 #else
 292       size += msize + 2*sizeof(short) ;
 293 #endif
 294       npts++ ;
 295     }
 296     if ( size > 0 )
 297     {
 298       pbuf = buffer_for_proc( P, size, RSL_SENDBUF ) ;
 299     }
 300     curs = 0 ;
 301     for ( lp = point_move_sends[P] ; lp != NULL ; lp = lp->next )
 302     {
 303 #ifdef crayx1
 304       bcopy( &(lp->info1), &(pbuf[curs]), sizeof(int)) ;        /* point id */
 305       curs += sizeof(int) ;
 306       bcopy( &(lp->info2), &(pbuf[curs]), sizeof(int)) ;        /* point id */
 307       curs += sizeof(int) ;
 308 #else
 309       bcopy( &(lp->info1), &(pbuf[curs]), sizeof(short)) ;      /* point id */
 310       curs += sizeof(short) ;
 311       bcopy( &(lp->info2), &(pbuf[curs]), sizeof(short)) ;      /* point id */
 312       curs += sizeof(short) ;
 313 #endif
 314       ig = lp->info1 ;
 315       jg = lp->info2 ;
 316 #ifndef NOPACK
 317       pack_message( old, pbuf, &curs, d, ig, jg ) ;
 318 #else
 319       curs = size ;
 320 #endif
 321       RSL_TEST_ERR(curs > size, "Buffer overflow") ;
 322     }
 323
 324     if ( curs > 0 )
 325     {
 326       if ( rsl_myproc != rsl_c_comp2phys_proc(P) )
 327       {
 328         mtype = MTYPE_FROMTO( MSG_REDISTCOM,
 329                               rsl_myproc,
 330                               rsl_c_comp2phys_proc(P) ) ;
 331         mdest = rsl_c_comp2phys_proc (P) ;
 332         RSL_SEND( pbuf, curs, mtype, mdest ) ;
 333       }
 334       else
 335       {
 336         recvsize[P] = curs ;
 337         recvnpts[P] = npts ;
 338       }
 339     }
 340   }
 341   dinfo->ilocaloffset = isaved ;
 342   dinfo->jlocaloffset = jsaved ;
 343
 344   /* receive points from other processors and unpack in new position */
 345   for ( P = 0 ; P < rsl_nproc_all ; P++ )
 346   {
 347     curs = 0 ;
 348     if ( recvsize[P] > 0 )
 349     {
 350       if ( rsl_c_comp2phys_proc( P ) != rsl_myproc )
 351       {
 352         RSL_RECVEND( recvtag[P] ) ;
 353         pbuf = buffer_for_proc( P, recvsize[P], RSL_RECVBUF ) ;
 354       }
 355       else
 356       {
 357         pbuf = buffer_for_proc( P, recvsize[P], RSL_SENDBUF ) ;
 358       }
 359       for ( i = 0 ; i < recvnpts[P] ; i++ )
 360       {
 361 #ifdef crayx1
 362         int id ;
 363
 364         bcopy( &(pbuf[curs]), &id, sizeof(int)) ;       /* point id */
 365         curs += sizeof(int) ;
 366         ig = id ;
 367
 368         bcopy( &(pbuf[curs]), &id, sizeof(int)) ;       /* point id */
 369         curs += sizeof(int) ;
 370 #else
 371         short id ;
 372
 373         bcopy( &(pbuf[curs]), &id, sizeof(short)) ;     /* point id */
 374         curs += sizeof(short) ;
 375         ig = id ;
 376
 377         bcopy( &(pbuf[curs]), &id, sizeof(short)) ;     /* point id */
 378         curs += sizeof(short) ;
 379 #endif
 380         jg = id ;
 381
 382 #ifndef NOUNPACK
 383         unpack_message( new, pbuf, &curs, d, ig, jg ) ;
 384 #endif
 385       }
 386     }
 387   }
 388
 389   buffer_for_proc( rsl_c_phys2comp_proc(rsl_myproc), 0, RSL_FREEBUF ) ;
 390
 391   debuggal_pack = 0 ;
 392
 393 }
 394
 395 /*@
 396   RSL_DESCRIBE_STATE --- Describe a state vector for use in remappping.
 397
 398   Notes:
 399   This routine takes an RSL message, Arg2, and associates it with
 400   the domain specified by Arg1.   The message Arg2 is built using
 401   RSL_BUILD_MESSAGE.  The state vector is then stored internally
 402   within RSL and used in run-time remapping (RSL_REMAP_STATE) for
 403   dynamic load balancing.
 404
 405   RSL keeps a maximum of two state vectors internally, one representing
 406   a new state and one representing an old.  Each call to RSL_DESCRIBE_STATE
 407   installs Arg2 as the new new state vector, pushing the previous
 408   new state vector to the old position.   The previous old state vector
 409   is discarded.  RSL_REMAP_STATE uses the old state vector as it's
 410   guide for packing source data into messages
 411   for remapping.   The new state vector is used for unpacking into
 412   destination data structures.  See RSL_REMAP_STATE for a code example.
 413
 414   See also:
 415   RSL_REMAP_STATE, RSL_BUILD_MESSAGES
 416
 417
 418 @*/
 419
 420 RSL_DESCRIBE_STATE ( d_p, message )
 421   int_p d_p ;
 422   int_p message ;
 423 {
 424   int d ;
 425   int mh ;
 426   rsl_domain_info_t * dinfo ;
 427   message_desc_t *msg ;
 428
 429   d = *d_p ;
 430
 431   RSL_TEST_ERR( d < 0 || d >= RSL_MAXDOMAINS,
 432     "rsl_describe_state: bad domain descriptor\n") ;
 433   dinfo = &(domain_info[d]) ;
 434   RSL_TEST_ERR(dinfo->valid != RSL_VALID,
 435     "rsl_describe_state: descriptor is not for a valid domain\n") ;
 436
 437   mh = *message ;
 438   RSL_TEST_ERR( mh != RSL_INVALID && (mh < 0 || mh >=RSL_MAXDESCRIPTORS),
 439 "rsl_describe_state: bad message handle in list,\n  must be either valid message or RSL_INVALID") ;
 440   dinfo->old_state_vect = dinfo->new_state_vect ;
 441   if ( mh != RSL_INVALID )
 442   {
 443     RSL_TEST_ERR((msg = (message_desc_t *) mh_descriptors[ mh ])==NULL,
 444        "rsl_describe_state: handle does not describe an active message") ;
 445     RSL_TEST_ERR( msg->tag != MESSAGE_DESC,
 446        "rsl_describe_state: handle given in message list is not for an rsl mesage def" ) ;
 447     dinfo->new_state_vect = msg ;
 448   }
 449   else
 450   {
 451     dinfo->new_state_vect = NULL ;
 452   }
 453
 454 /* If there was not an old state, make the old state the dup of the new state */
 455   if ( dinfo->old_state_vect == NULL )
 456   {
 457     dinfo->old_state_vect = dinfo->new_state_vect ;
 458   }
 459
 460   release_mh_descriptor( &mh ) ;
 461
 462 }
 463
 464 void *
 465 myloc( x )
 466   void * x ;
 467 {
 468   return( x ) ;
 469 }
 470