src/libs/ck-libs/ParFUM-Iterators/ParFUM_Iterators.cc

   1 /**
   2
   3   @file
   4   @brief Implementation of the non-iterator parts of the ParFUM-Iterators layer
   5
   6   @author Isaac Dooley
   7   @author Aaron Becker
   8
   9   @todo add code to generate ghost layers
  10   @todo Support multiple models
  11   @todo Specify element types to be used via input vector in meshModel_Create
  12   */
  13
  14
  15 #include "ParFUM_Iterators.h"
  16 #include "ParFUM_Iterators.decl.h"
  17 #include "ParFUM.h"
  18 #include "ParFUM_internals.h"
  19 #ifdef CUDA
  20     #include <cuda.h>
  21     #include <cuda_runtime.h>
  22 #endif
  23
  24 #include <stack>
  25 #include <sstream>
  26 #include <iostream>
  27
  28 #undef DEBUG
  29 #define DEBUG 0
  30
  31
  32
  33
  34 int tetFaces[] = {0,1,3,  0,2,1,  1,2,3,   0,3,2};
  35 int cohFaces[] = {0,1,2,  3,4,5};
  36
  37
  38 int lib_FP_Type_Size()
  39 {
  40     static const int LIB_FP_TYPE_SIZE = sizeof(FP_TYPE);
  41     return LIB_FP_TYPE_SIZE;
  42 }
  43
  44
  45 void mesh_set_device(MeshModel* m, MeshDevice d)
  46 {
  47   m->target_device = d;
  48 #if CUDA
  49   if(d == DeviceGPU){
  50     CkAssert(m->allocatedForCUDADevice);
  51   }
  52 #endif
  53 }
  54
  55
  56 MeshDevice mesh_target_device(MeshModel* m)
  57 {
  58     return m->target_device;
  59 }
  60
  61
  62 void fillIDHash(MeshModel* model)
  63 {
  64
  65     if(model->nodeIDHash == NULL)
  66         model->nodeIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
  67
  68     if(model->elemIDHash == NULL)
  69         model->elemIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
  70
  71     for(int i=0; i<model->node_id_T->size(); ++i){
  72         model->nodeIDHash->put((*model->node_id_T)(i,0)) = i+1;
  73     }
  74     for(int i=0; i<model->elem_id_T->size(); ++i){
  75         model->elemIDHash->put((*model->elem_id_T)(i,0)) = i+1;
  76     }
  77 }
  78
  79
  80 // Set the pointers in the model to point to the data stored by the ParFUM framework.
  81 // If the number of nodes or elements increases, then this function should be called
  82 // because the attribute arrays may have been resized, after which the old pointers
  83 // would be invalid.
  84 void setTableReferences(MeshModel* model, bool recomputeHash)
  85 {
  86     model->ElemConn_T = &((FEM_IndexAttribute*)model->mesh->elem[MESH_ELEMENT_TET4].lookup(FEM_CONN,""))->get();
  87     model->elem_id_T = &((FEM_DataAttribute*)model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_ID,""))->getInt();
  88     model->n2eConn_T = &((FEM_DataAttribute*)model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_N2E_CONN, ""))->getInt();
  89     model->node_id_T = &((FEM_DataAttribute*)model->mesh->node.lookup(ATT_NODE_ID,""))->getInt();
  90 #ifdef FP_TYPE_FLOAT
  91     model->coord_T = &((FEM_DataAttribute*)model->mesh->node.lookup(ATT_NODE_COORD, ""))->getFloat();
  92 #else
  93     model->coord_T = &((FEM_DataAttribute*)model->mesh->node.lookup(ATT_NODE_COORD, ""))->getDouble();
  94 #endif
  95     model->ElemData_T = &((FEM_DataAttribute*)model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_DATA,""))->getChar();
  96     FEM_Entity* ghost = model->mesh->elem[MESH_ELEMENT_TET4].getGhost();
  97     if (ghost) {
  98         model->GhostElemData_T = &((FEM_DataAttribute*)ghost->lookup(ATT_ELEM_DATA,""))->getChar();
  99     }
 100
 101     model->NodeData_T = &((FEM_DataAttribute*)model->mesh->node.lookup(ATT_NODE_DATA,""))->getChar();
 102     ghost = model->mesh->node.getGhost();
 103     if (ghost) {
 104         model->GhostNodeData_T = &((FEM_DataAttribute*)ghost->lookup(ATT_NODE_DATA,""))->getChar();
 105     }
 106
 107     if(model->nodeIDHash == NULL) {
 108         model->nodeIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
 109     }
 110
 111     if(model->elemIDHash == NULL) {
 112         model->elemIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
 113     }
 114
 115     if(recomputeHash) {
 116         fillIDHash(model);
 117     }
 118 }
 119
 120
 121 /** Create a model  before partitioning. Given the number of nodes per element.
 122
 123   After this call, the node data and element data CANNOT be set. They can only
 124   be set in the driver. If the user tries to set the attribute values, the
 125   call will be ignored.
 126
 127 */
 128 MeshModel* meshModel_Create_Init(){
 129     MeshModel* model = new MeshModel;
 130     memset((void*) model, 0, sizeof(MeshModel));
 131
 132     model->nodeIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
 133     model->elemIDHash = new CkHashtableT<CkHashtableAdaptorT<int>, int>;
 134
 135     // This only uses a single mesh
 136     int which_mesh=FEM_Mesh_default_write();
 137     model->mesh = FEM_Mesh_lookup(which_mesh,"meshModel_Create_Init");
 138
 139     /** @note   Here we allocate the arrays with a single
 140       initial node and element, which are set as
 141       invalid. If no initial elements were provided.
 142       the AllocTable2d's would not ever get allocated,
 143       and insertNode or insertElement would fail.
 144       */
 145     char temp_array[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 146
 147
 148     // Allocate node id array
 149     FEM_Mesh_data(which_mesh,FEM_NODE,ATT_NODE_ID,temp_array, 0, 1, FEM_INT, 1);
 150     // Allocate node coords
 151 #ifdef FP_TYPE_FLOAT
 152     FEM_Mesh_data(which_mesh,FEM_NODE,ATT_NODE_COORD,temp_array, 0, 1, FEM_FLOAT, 3);
 153 #else
 154     FEM_Mesh_data(which_mesh,FEM_NODE,ATT_NODE_COORD,temp_array, 0, 1, FEM_DOUBLE, 3);
 155 #endif
 156     FEM_Mesh_data(which_mesh,FEM_NODE,FEM_COORD,temp_array, 0, 1, FEM_DOUBLE, 3);  // Needed for shared node regeneration
 157     // Don't allocate the ATT_NODE_DATA array because it will be large
 158
 159     // Allocate element connectivity
 160     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_TET4,FEM_CONN,temp_array, 0, 1, FEM_INDEX_0, 4);
 161     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_COH3T3,FEM_CONN,temp_array, 0, 1, FEM_INDEX_0, 6);
 162
 163     // Allocate element id array
 164     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_TET4,ATT_ELEM_ID,temp_array, 0, 1, FEM_INT, 1);
 165     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_COH3T3,ATT_ELEM_ID,temp_array, 0, 1, FEM_INT, 1);
 166
 167
 168     // Don't allocate the ATT_ELEM_DATA array because it will be large
 169
 170     FEM_Mesh_allocate_valid_attr(which_mesh, FEM_NODE);
 171     FEM_Mesh_allocate_valid_attr(which_mesh, FEM_ELEM+MESH_ELEMENT_TET4);
 172     FEM_Mesh_allocate_valid_attr(which_mesh, FEM_ELEM+MESH_ELEMENT_COH3T3);
 173
 174     FEM_set_entity_invalid(which_mesh, FEM_NODE, 0);
 175     FEM_set_entity_invalid(which_mesh, FEM_ELEM+MESH_ELEMENT_TET4, 0);
 176     FEM_set_entity_invalid(which_mesh, FEM_ELEM+MESH_ELEMENT_COH3T3, 0);
 177
 178     // Setup the adjacency lists
 179     setTableReferences(model, true);
 180     return model;
 181 }
 182
 183 /** Get the mesh for use in the driver. It will be partitioned already.
 184
 185   In the driver routine, after getting the model from this function,
 186   the input data file should be reread to fill in the node and element
 187   data values which were not done in init.
 188
 189 */
 190 void meshModel_Create_Driver(MeshDevice target_device, int elem_attr_sz,
 191         int node_attr_sz, int model_attr_sz, void *mAtt, MeshModel &model) {
 192
 193     CkAssert(ATT_NODE_ID != FEM_COORD);
 194     CkAssert(ATT_NODE_DATA != FEM_COORD);
 195
 196     int partition = FEM_My_partition();
 197     if(haveConfigurableCPUGPUMap()){
 198         if(isPartitionCPU(partition))
 199             CkPrintf("partition %d is on CPU\n", partition);
 200         else
 201             CkPrintf("partition %d is on GPU\n", partition);
 202     }
 203
 204
 205     // This only uses a single mesh, so don't create multiple MeshModels of these
 206     CkAssert(elem_attr_sz > 0);
 207     CkAssert(node_attr_sz > 0);
 208     int which_mesh=FEM_Mesh_default_read();
 209
 210     memset((void *) &model, 0, sizeof(MeshModel));
 211
 212     model.target_device = target_device;
 213     model.elem_attr_size = elem_attr_sz;
 214     model.node_attr_size = node_attr_sz;
 215     model.model_attr_size = model_attr_sz;
 216
 217     model.mesh = FEM_Mesh_lookup(which_mesh,"meshModel_Create_Driver");
 218     model.mAtt = mAtt;
 219
 220     model.num_local_elem = model.mesh->elem[MESH_ELEMENT_TET4].size();
 221     model.num_local_node = model.mesh->node.size();
 222
 223     // Allocate user model attributes
 224     FEM_Mesh_become_set(which_mesh);
 225     char* temp_array = (char*) malloc(model.num_local_elem * model.elem_attr_size);
 226     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_TET4,ATT_ELEM_DATA,temp_array,0,model.num_local_elem,FEM_BYTE,model.elem_attr_size);
 227     free(temp_array);
 228
 229     temp_array = (char*) malloc(model.num_local_node * model.node_attr_size);
 230     FEM_Mesh_data(which_mesh,FEM_NODE,ATT_NODE_DATA,temp_array,0,model.num_local_node,FEM_BYTE,model.node_attr_size);
 231     free(temp_array);
 232
 233
 234     const int connSize = model.mesh->elem[MESH_ELEMENT_TET4].getConn().width();
 235     temp_array = (char*) malloc(model.num_local_node * connSize);
 236     FEM_Mesh_data(which_mesh,FEM_ELEM+MESH_ELEMENT_TET4,ATT_ELEM_N2E_CONN,temp_array, 0, 1, FEM_INT, connSize);
 237     free(temp_array);
 238
 239     setTableReferences(&model, true);
 240
 241     // Setup the adjacencies
 242     int nodesPerTuple = 3;
 243     int tuplesPerTet = 4;
 244     int tuplesPerCoh = 2;
 245
 246     FEM_Add_elem2face_tuples(which_mesh, MESH_ELEMENT_TET4,  nodesPerTuple, tuplesPerTet, tetFaces);
 247     FEM_Add_elem2face_tuples(which_mesh, MESH_ELEMENT_COH3T3,  nodesPerTuple, tuplesPerCoh, cohFaces);
 248
 249     model.mesh->createNodeElemAdj();
 250     model.mesh->createNodeNodeAdj();
 251     model.mesh->createElemElemAdj();
 252
 253 #if CUDA
 254     int* n2eTable;
 255     /** Create n2e connectivity array and copy to device global memory */
 256     FEM_Mesh_create_node_elem_adjacency(which_mesh);
 257     FEM_Mesh* mesh = FEM_Mesh_lookup(which_mesh, "meshModel_Create_Driver");
 258     FEM_DataAttribute * at = (FEM_DataAttribute*)
 259         model.mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_N2E_CONN,"meshModel_Create_Driver");
 260     n2eTable = at->getInt().getData();
 261
 262     FEM_IndexAttribute * iat = (FEM_IndexAttribute*)
 263         model.mesh->elem[MESH_ELEMENT_TET4].lookup(FEM_CONN,"meshModel_Create_Driver");
 264     int* connTable  = iat->get().getData();
 265
 266     int* adjElements;
 267     int size;
 268     for (int i=0; i<model.num_local_node; ++i) {
 269         mesh->n2e_getAll(i, adjElements, size);
 270         for (int j=0; j<size; ++j) {
 271             for (int k=0; k<connSize+1; ++k) {
 272                 if (connTable[connSize*adjElements[j]+k] == i) {
 273                     n2eTable[connSize*adjElements[j]+k] = j;
 274                     break;
 275                 }
 276                 if (k == connSize) {
 277                     CkPrintf("Element %d cannot find node %d in its conn [%d %d %d]\n",
 278                             adjElements[j], i,
 279                             connTable[connSize*adjElements[j]+0],
 280                             connTable[connSize*adjElements[j]+1],
 281                             connTable[connSize*adjElements[j]+2]);
 282                     CkAssert(false);
 283                 }
 284             }
 285         }
 286         delete[] adjElements;
 287     }
 288 #endif
 289
 290     //for (int i=0; i<model->num_local_elem*4; ++i) {
 291     //    printf("%d ", connTable[i]);
 292     //    if ((i+1)%4 == 0) printf("\n");
 293     //}
 294     //printf("\n\n");
 295     //for (int i=0; i<model->num_local_elem*4; ++i) {
 296     //    printf("%d ", n2eTable[i]);
 297     //    if ((i+1)%4 == 0) printf("\n");
 298     //}
 299     FEM_Mesh_become_get(which_mesh);
 300
 301 #if CUDA
 302     if (model.target_device == DeviceGPU) {
 303       allocateModelForCUDADevice(&model);
 304     }
 305 #endif
 306
 307 }
 308
 309
 310
 311 #ifdef CUDA
 312 //  MeshDevice target_device
 313 void allocateModelForCUDADevice(MeshModel* model){
 314
 315   CkPrintf("[%d] allocateModelForCUDADevice\n", CkMyPe() );
 316
 317   if( ! model->allocatedForCUDADevice ) {
 318     model->allocatedForCUDADevice = true;
 319
 320     const int connSize = model->mesh->elem[MESH_ELEMENT_TET4].getConn().width();
 321     const FEM_DataAttribute * at = (FEM_DataAttribute*)  model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_N2E_CONN,"allocateModelForCUDADevice");
 322     const int* n2eTable  = at->getInt().getData();
 323
 324
 325         int size = model->num_local_elem * connSize *sizeof(int);
 326         cudaError_t err = cudaMalloc((void**)&(model->device_model.n2eConnDevice), size);
 327         if(err == cudaErrorMemoryAllocation){
 328             CkPrintf("[%d] cudaMalloc FAILED with error cudaErrorMemoryAllocation model->device_model.n2eConnDevice in ParFUM_Iterators.cc size=%d: %s\n", CkMyPe(), size, cudaGetErrorString(err));
 329         }else if(err != cudaSuccess){
 330             CkPrintf("[%d] cudaMalloc FAILED model->device_model.n2eConnDevice in ParFUM_Iterators.cc size=%d: %s\n", CkMyPe(), size, cudaGetErrorString(err));
 331             CkAbort("cudaMalloc FAILED");
 332         }
 333         CkAssert(cudaMemcpy(model->device_model.n2eConnDevice,n2eTable,size, cudaMemcpyHostToDevice) == cudaSuccess);
 334
 335
 336         /** copy number/sizes of nodes and elements to device structure */
 337         model->device_model.elem_attr_size =  model->elem_attr_size;
 338         model->device_model.node_attr_size =  model->node_attr_size;
 339         model->device_model.model_attr_size =  model->model_attr_size;
 340         model->device_model.num_local_node = model->num_local_node;
 341         model->device_model.num_local_elem = model->num_local_elem;
 342
 343         /** Copy element Attribute array to device global memory */
 344         {
 345             FEM_DataAttribute * at = (FEM_DataAttribute*) model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_DATA,"meshModel_Create_Driver");
 346             AllocTable2d<unsigned char> &dataTable  = at->getChar();
 347             unsigned char *ElemData = dataTable.getData();
 348             int size = dataTable.size()*dataTable.width();
 349             assert(size == model->num_local_elem * model->elem_attr_size);
 350             CkAssert(cudaMalloc((void**)&(model->device_model.ElemDataDevice), size) == cudaSuccess);
 351             CkAssert(cudaMemcpy(model->device_model.ElemDataDevice,ElemData,size,
 352                         cudaMemcpyHostToDevice) == cudaSuccess);
 353         }
 354
 355         /** Copy node Attribute array to device global memory */
 356         {
 357             FEM_DataAttribute * at = (FEM_DataAttribute*) model->mesh->node.lookup(ATT_NODE_DATA,"meshModel_Create_Driver");
 358             AllocTable2d<unsigned char> &dataTable  = at->getChar();
 359             unsigned char *NodeData = dataTable.getData();
 360             int size = dataTable.size()*dataTable.width();
 361             assert(size == model->num_local_node * model->node_attr_size);
 362             CkAssert(cudaMalloc((void**)&(model->device_model.NodeDataDevice), size) == cudaSuccess);
 363             CkAssert(cudaMemcpy(model->device_model.NodeDataDevice,NodeData,size,
 364                         cudaMemcpyHostToDevice) == cudaSuccess);
 365         }
 366
 367         /** Copy elem connectivity array to device global memory */
 368         {
 369             FEM_IndexAttribute * at = (FEM_IndexAttribute*) model->mesh->elem[MESH_ELEMENT_TET4].lookup(FEM_CONN,"meshModel_Create_Driver");
 370             AllocTable2d<int> &dataTable  = at->get();
 371             int *data = dataTable.getData();
 372             int size = dataTable.size()*dataTable.width()*sizeof(int);
 373             CkAssert(cudaMalloc((void**)&(model->device_model.ElemConnDevice), size) == cudaSuccess);
 374             CkAssert(cudaMemcpy(model->device_model.ElemConnDevice,data,size,
 375                         cudaMemcpyHostToDevice) == cudaSuccess);
 376         }
 377
 378         /** Copy model Attribute to device global memory */
 379         {
 380             printf("Copying model attribute of size %d\n", model->model_attr_size);
 381             CkAssert(cudaMalloc((void**)&(model->device_model.mAttDevice),
 382                         model->model_attr_size) == cudaSuccess);
 383             CkAssert(cudaMemcpy(model->device_model.mAttDevice,model->mAtt,model->model_attr_size,
 384                         cudaMemcpyHostToDevice) == cudaSuccess);
 385         }
 386     }
 387 }
 388
 389
 390 //  Copy data from GPU and deallocate its memory
 391 void deallocateModelForCUDADevice(MeshModel* model){
 392
 393   CkPrintf("[%d] deallocateModelForCUDADevice\n", CkMyPe() );
 394
 395   if( model->allocatedForCUDADevice ) {
 396     model->allocatedForCUDADevice = false;
 397
 398     const int connSize = model->mesh->elem[MESH_ELEMENT_TET4].getConn().width();
 399     FEM_DataAttribute * at = (FEM_DataAttribute*)  model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_N2E_CONN,"allocateModelForCUDADevice");
 400     int* n2eTable  = at->getInt().getData();
 401
 402     int size = model->num_local_elem * connSize *sizeof(int);
 403
 404     CkAssert(cudaMemcpy(n2eTable,model->device_model.n2eConnDevice,size, cudaMemcpyDeviceToHost) == cudaSuccess);
 405     CkAssert(cudaFree(model->device_model.n2eConnDevice) == cudaSuccess);
 406
 407     /** Copy element Attribute array from device global memory */
 408     {
 409       FEM_DataAttribute * at = (FEM_DataAttribute*) model->mesh->elem[MESH_ELEMENT_TET4].lookup(ATT_ELEM_DATA,"meshModel_Create_Driver");
 410       AllocTable2d<unsigned char> &dataTable  = at->getChar();
 411       unsigned char *ElemData = dataTable.getData();
 412       int size = dataTable.size()*dataTable.width();
 413       assert(size == model->num_local_elem * model->elem_attr_size);
 414       CkAssert(cudaMemcpy(ElemData,model->device_model.ElemDataDevice,size, cudaMemcpyDeviceToHost) == cudaSuccess);
 415       CkAssert(cudaFree(model->device_model.ElemDataDevice) == cudaSuccess);
 416     }
 417
 418     /** Copy node Attribute array from device global memory */
 419     {
 420       FEM_DataAttribute * at = (FEM_DataAttribute*) model->mesh->node.lookup(ATT_NODE_DATA,"meshModel_Create_Driver");
 421       AllocTable2d<unsigned char> &dataTable  = at->getChar();
 422       unsigned char *NodeData = dataTable.getData();
 423       int size = dataTable.size()*dataTable.width();
 424       assert(size == model->num_local_node * model->node_attr_size);
 425       CkAssert(cudaMemcpy(NodeData,model->device_model.NodeDataDevice,size, cudaMemcpyDeviceToHost) == cudaSuccess);
 426       CkAssert(cudaFree(model->device_model.NodeDataDevice) == cudaSuccess);
 427     }
 428
 429     /** Copy elem connectivity array from device global memory */
 430     {
 431       FEM_IndexAttribute * at = (FEM_IndexAttribute*) model->mesh->elem[MESH_ELEMENT_TET4].lookup(FEM_CONN,"meshModel_Create_Driver");
 432       AllocTable2d<int> &dataTable  = at->get();
 433       int *data = dataTable.getData();
 434       int size = dataTable.size()*dataTable.width()*sizeof(int);
 435       CkAssert(cudaMemcpy(data,model->device_model.ElemConnDevice,size, cudaMemcpyDeviceToHost) == cudaSuccess);
 436       CkAssert(cudaFree(model->device_model.ElemConnDevice) == cudaSuccess);
 437     }
 438
 439     /** Copy model Attribute from device global memory */
 440     {
 441       printf("Copying model attribute of size %d\n", model->model_attr_size);
 442       CkAssert(cudaMemcpy(model->mAtt,model->device_model.mAttDevice,model->model_attr_size, cudaMemcpyDeviceToHost) == cudaSuccess);
 443       CkAssert(cudaFree(model->device_model.mAttDevice) == cudaSuccess);
 444
 445     }
 446   }
 447 }
 448 #endif
 449
 450
 451
 452
 453
 454 /** Copy node attribute array from CUDA device back to the ParFUM attribute */
 455 void mesh_retrieve_node_data(MeshModel* m){
 456 #if CUDA
 457   CkAssert( m->allocatedForCUDADevice);
 458     cudaError_t status = cudaMemcpy(m->NodeData_T->getData(),
 459                 m->device_model.NodeDataDevice,
 460                 m->num_local_node * m->node_attr_size,
 461                 cudaMemcpyDeviceToHost);
 462     CkAssert(status == cudaSuccess);
 463 #endif
 464 }
 465
 466 /** Copy node attribute array to CUDA device from the ParFUM attribute */
 467 void mesh_put_node_data(MeshModel* m){
 468 #if CUDA
 469   CkAssert( m->allocatedForCUDADevice);
 470     cudaError_t status = cudaMemcpy(m->device_model.NodeDataDevice,
 471                 m->NodeData_T->getData(),
 472                 m->num_local_node * m->node_attr_size,
 473                 cudaMemcpyHostToDevice);
 474     CkAssert(status == cudaSuccess);
 475 #endif
 476 }
 477
 478
 479 /** Copy element attribute array from CUDA device back to the ParFUM attribute */
 480 void mesh_retrieve_elem_data(MeshModel* m){
 481 #if CUDA
 482   CkAssert( m->allocatedForCUDADevice);
 483     cudaError_t status = cudaMemcpy(m->ElemData_T->getData(),
 484                 m->device_model.ElemDataDevice,
 485                 m->num_local_elem * m->elem_attr_size,
 486                 cudaMemcpyDeviceToHost);
 487     CkAssert(status == cudaSuccess);
 488 #endif
 489 }
 490
 491
 492 /** Copy elem attribute array to CUDA device from the ParFUM attribute */
 493 void mesh_put_elem_data(MeshModel* m) {
 494 #if CUDA
 495   CkAssert( m->allocatedForCUDADevice);
 496   cudaError_t status = cudaMemcpy(m->device_model.ElemDataDevice,
 497                 m->ElemData_T->getData(),
 498                 m->num_local_elem * m->elem_attr_size,
 499                 cudaMemcpyHostToDevice);
 500     CkAssert(status == cudaSuccess);
 501 #endif
 502 }
 503
 504
 505 /** Copy node and elem attribute arrays to CUDA device from the ParFUM attribute */
 506 void mesh_put_data(MeshModel* m) {
 507 #if CUDA
 508   CkAssert( m->allocatedForCUDADevice);
 509     mesh_put_node_data(m);
 510     mesh_put_elem_data(m);
 511     cudaError_t status = cudaMemcpy(m->device_model.mAttDevice,m->mAtt,m->model_attr_size,
 512                 cudaMemcpyHostToDevice);
 513     CkAssert(status == cudaSuccess);
 514 #endif
 515 }
 516
 517
 518 /** Copy node and elem attribute arrays from CUDA device to the ParFUM attribute */
 519 void mesh_retrieve_data(MeshModel* m) {
 520 #if CUDA
 521   CkAssert( m->allocatedForCUDADevice);
 522     mesh_retrieve_node_data(m);
 523     mesh_retrieve_elem_data(m);
 524     cudaError_t status = cudaMemcpy(m->mAtt,m->device_model.mAttDevice,m->model_attr_size,
 525                 cudaMemcpyDeviceToHost);
 526     CkAssert(status == cudaSuccess);
 527 #endif
 528 }
 529
 530
 531 /** Cleanup a model */
 532 void meshModel_Destroy(MeshModel* m){
 533 #if CUDA
 534     if (m->target_device == DeviceGPU) {
 535       //        CkAssert(cudaFree(m->device_model.mAttDevice) == cudaSuccess);
 536       //        CkAssert(cudaFree(m->device_model.NodeDataDevice) == cudaSuccess);
 537       //        CkAssert(cudaFree(m->device_model.ElemDataDevice) == cudaSuccess);
 538     }
 539 #endif
 540     delete m;
 541 }
 542
 543
 544 MeshNode meshModel_InsertNode(MeshModel* m, double x, double y, double z){
 545     int newNode = FEM_add_node_local(m->mesh,false,false,false);
 546     setTableReferences(m);
 547     (*m->coord_T)(newNode,0)=x;
 548     (*m->coord_T)(newNode,1)=y;
 549     (*m->coord_T)(newNode,2)=z;
 550     return newNode;
 551 }
 552
 553 MeshNode meshModel_InsertNode(MeshModel* m, float x, float y, float z){
 554     int newNode = FEM_add_node_local(m->mesh,false,false,false);
 555     setTableReferences(m);
 556     (*m->coord_T)(newNode,0)=x;
 557     (*m->coord_T)(newNode,1)=y;
 558     (*m->coord_T)(newNode,2)=z;
 559     return newNode;
 560 }
 561
 562
 563 /** Set id of a node
 564   @todo Make this work with ghosts
 565   */
 566 void meshNode_SetId(MeshModel* m, MeshNode n, EntityID id){
 567     CkAssert(n>=0);
 568     (*m->node_id_T)(n,0)=id;
 569     m->nodeIDHash->put(id) = n+1;
 570 }
 571
 572 /** Insert an element */
 573 MeshElement meshModel_InsertElem(MeshModel*m, MeshElementType type, MeshNode* nodes){
 574     CkAssert(type ==  MESH_ELEMENT_TET4 || type == MESH_ELEMENT_TET10);
 575
 576     MeshElement newEl;
 577
 578     if(type==MESH_ELEMENT_TET4){
 579         int conn[4];
 580         conn[0] = nodes[0];
 581         conn[1] = nodes[1];
 582         conn[2] = nodes[2];
 583         conn[3] = nodes[3];
 584         newEl.type = MESH_ELEMENT_TET4;
 585         newEl.id = FEM_add_element_local(m->mesh, conn, 4, type, 0,0);
 586     } else if (type==MESH_ELEMENT_TET10){
 587         int conn[10];
 588         conn[0] = nodes[0];
 589         conn[1] = nodes[1];
 590         conn[2] = nodes[2];
 591         conn[3] = nodes[3];
 592         conn[4] = nodes[4];
 593         conn[5] = nodes[5];
 594         conn[6] = nodes[6];
 595         conn[7] = nodes[7];
 596         conn[8] = nodes[8];
 597         conn[9] = nodes[9];
 598         newEl.type =  MESH_ELEMENT_TET10;
 599         newEl.id = FEM_add_element_local(m->mesh, conn, 10, type, 0, 0);
 600     }
 601
 602     setTableReferences(m);
 603     return newEl;
 604 }
 605
 606 /** Set id of an element
 607   @todo Make this work with ghosts
 608   */
 609 void meshElement_SetId(MeshModel* m, MeshElement e, EntityID id){
 610     CkAssert(e.id>=0);
 611     (*m->elem_id_T)(e.id,0)=id;
 612     m->elemIDHash->put(id) = e.id+1;
 613 }
 614
 615 /** get the number of elements in the mesh */
 616 int meshModel_GetNElem (MeshModel* m){
 617     const int numBulk = m->mesh->elem[MESH_ELEMENT_TET4].count_valid();
 618     const int numCohesive = m->mesh->elem[MESH_ELEMENT_COH3T3].count_valid();
 619     std::cout << " numBulk = " << numBulk << " numCohesive " << numCohesive << std::endl;
 620     return numBulk + numCohesive;
 621 }
 622
 623 /**
 624   @brief Set attribute of a node
 625
 626   The attribute passed in must be a contiguous data structure with size equal to the value node_attr_sz passed into meshModel_Create_Driver() and meshModel_Create_Init()
 627
 628   The supplied attribute will be copied into the ParFUM attribute array "ATT_NODE_DATA. Then ParFUM will own this data. The function meshNode_GetAttrib() will return a pointer to the copy owned by ParFUM. If a single material parameter attribute is used for multiple nodes, each node will get a separate copy of the array. Any subsequent modifications to the data will only be reflected at a single node.
 629
 630   The user is responsible for deallocating parameter d passed into this function.
 631
 632 */
 633 void meshNode_SetAttrib(MeshModel* m, MeshNode n, void* d)
 634 {
 635     if(m->NodeData_T == NULL){
 636         CkPrintf("Ignoring call to meshNode_SetAttrib\n");
 637         return;
 638     } else {
 639         unsigned char* data;
 640         if (n < 0) {
 641             assert(m->GhostNodeData_T);
 642             data = m->GhostNodeData_T->getData();
 643             n = FEM_From_ghost_index(n);
 644         } else {
 645             assert(m->NodeData_T);
 646             data = m->NodeData_T->getData();
 647         }
 648         memcpy(data + n*m->node_attr_size, d, m->node_attr_size);
 649     }
 650 }
 651
 652 /** @brief Set attribute of an element
 653   See meshNode_SetAttrib() for description
 654   */
 655 void meshElement_SetAttrib(MeshModel* m, MeshElement e, void* d){
 656     if(m->ElemData_T == NULL){
 657         CkPrintf("Ignoring call to meshElement_SetAttrib\n");
 658         return;
 659     } else {
 660         unsigned char *data;
 661         if (e.id < 0) {
 662             data = m->GhostElemData_T->getData();
 663             e.id = FEM_From_ghost_index(e.id);
 664         } else {
 665             data = m->ElemData_T->getData();
 666         }
 667         memcpy(data + e.id*m->elem_attr_size, d, m->elem_attr_size);
 668     }
 669 }
 670
 671 /** @brief Get elem attribute
 672   See meshNode_SetAttrib() for description
 673   */
 674 void* meshElement_GetAttrib(MeshModel* m, MeshElement e)
 675 {
 676     if(! m->mesh->elem[e.type].is_valid_any_idx(e.id))
 677         return NULL;
 678     unsigned char *data;
 679     if (FEM_Is_ghost_index(e.id)) {
 680         data = m->GhostElemData_T->getData();
 681         e.id = FEM_From_ghost_index(e.id);
 682     } else {
 683         data = m->ElemData_T->getData();
 684     }
 685     return (data + e.id*m->elem_attr_size);
 686 }
 687
 688 /** @brief Get nodal attribute
 689   See meshNode_SetAttrib() for description
 690   */
 691 void* meshNode_GetAttrib(MeshModel* m, MeshNode n)
 692 {
 693     if(!m->mesh->node.is_valid_any_idx(n))
 694         return NULL;
 695
 696     unsigned char* data;
 697     if (FEM_Is_ghost_index(n)) {
 698         data = m->GhostNodeData_T->getData();
 699         n = FEM_From_ghost_index(n);
 700     } else {
 701         data = m->NodeData_T->getData();
 702     }
 703     return (data + n*m->node_attr_size);
 704 }
 705
 706
 707 /**
 708   Get node via id
 709   */
 710 MeshNode meshModel_GetNodeAtId(MeshModel* m, EntityID id)
 711 {
 712     int hashnode = m->nodeIDHash->get(id)-1;
 713     if (hashnode != -1) return hashnode;
 714
 715     AllocTable2d<int>* ghostNode_id_T = &((FEM_DataAttribute*)m->mesh->
 716             node.getGhost()->lookup(ATT_NODE_ID,""))->getInt();
 717     if(ghostNode_id_T != NULL){
 718         for(int i=0; i<ghostNode_id_T->size(); ++i) {
 719             if((*ghostNode_id_T)(i,0)==id){
 720                 return FEM_To_ghost_index(i);
 721             }
 722         }
 723     }
 724     return -1;
 725 }
 726
 727
 728 /**
 729   Get elem via id
 730 Note: this will currently only work with TET4 elements
 731 */
 732 #ifndef INLINE_GETELEMATID
 733 MeshElement meshModel_GetElemAtId(MeshModel*m,EntityID id)
 734 {
 735     MeshElement e;
 736     e.id = m->elemIDHash->get(id)-1;
 737     e.type = MESH_ELEMENT_TET4;
 738
 739     if (e.id != -1) return e;
 740
 741     AllocTable2d<int>* ghostElem_id_T = &((FEM_DataAttribute*)m->mesh->
 742             elem[MESH_ELEMENT_TET4].getGhost()->lookup(ATT_ELEM_ID,""))->getInt();
 743
 744     if(ghostElem_id_T  != NULL) {
 745         for(int i=0; i<ghostElem_id_T->size(); ++i) {
 746             if((*ghostElem_id_T)(i,0)==id){
 747                 e.id = FEM_To_ghost_index(i);
 748                 e.type = MESH_ELEMENT_TET4;
 749                 return e;
 750             }
 751         }
 752     }
 753
 754     e.id = -1;
 755     e.type = MESH_ELEMENT_TET4;
 756
 757     return e;
 758 }
 759 #endif
 760
 761 MeshNode meshElement_GetNode(MeshModel* m,MeshElement e,int idx){
 762     int node = -1;
 763     if (e.id < 0) {
 764         CkAssert(m->mesh->elem[e.type].getGhost());
 765         const AllocTable2d<int> &conn = ((FEM_Elem*)m->mesh->elem[e.type].getGhost())->getConn();
 766         CkAssert(idx>=0 && idx<conn.width());
 767         node = conn(FEM_From_ghost_index(e.id),idx);
 768     } else {
 769         const AllocTable2d<int> &conn = m->mesh->elem[e.type].getConn();
 770         CkAssert(idx>=0 && idx<conn.width());
 771         node = conn(e.id,idx);
 772     }
 773
 774     return node;
 775 }
 776
 777 int meshNode_GetId(MeshModel* m, MeshNode n){
 778     CkAssert(n>=0);
 779     return (*m->node_id_T)(n,0);
 780 }
 781
 782
 783 /** @todo handle ghost nodes as appropriate */
 784 int meshModel_GetNNodes(MeshModel *model){
 785     return model->mesh->node.count_valid();
 786 }
 787
 788 /** @todo How should we handle meshes with mixed elements? */
 789 int meshElement_GetNNodes(MeshModel* model, MeshElement elem){
 790     return model->mesh->elem[elem.type].getConn().width();
 791 }
 792
 793 /** @todo make sure we are in a getting mesh */
 794 void meshNode_GetPosition(MeshModel*model, MeshNode node,double*x,double*y,double*z){
 795     if (node < 0) {
 796         AllocTable2d<double>* table = &((FEM_DataAttribute*)model->
 797                 mesh->node.getGhost()->lookup(ATT_NODE_COORD,""))->getDouble();
 798         node = FEM_From_ghost_index(node);
 799         *x = (*table)(node,0);
 800         *y = (*table)(node,1);
 801         *z = (*table)(node,2);
 802     } else {
 803         *x = (*model->coord_T)(node,0);
 804         *y = (*model->coord_T)(node,1);
 805         *z = (*model->coord_T)(node,2);
 806     }
 807 }
 808
 809 /** @todo make sure we are in a getting mesh */
 810 void meshNode_GetPosition(MeshModel*model, MeshNode node,float*x,float*y,float*z){
 811     if (node < 0) {
 812         AllocTable2d<float>* table = &((FEM_DataAttribute*)model->
 813                 mesh->node.getGhost()->lookup(ATT_NODE_COORD,""))->getFloat();
 814         node = FEM_From_ghost_index(node);
 815
 816         *x = (*table)(node,0);
 817         *y = (*table)(node,1);
 818         *z = (*table)(node,2);
 819     } else {
 820         *x = (*model->coord_T)(node,0);
 821         *y = (*model->coord_T)(node,1);
 822         *z = (*model->coord_T)(node,2);
 823     }
 824 }
 825
 826 void meshModel_Sync(MeshModel*m){
 827     MPI_Barrier(MPI_COMM_WORLD);
 828 }
 829
 830 /** Test the node and element iterators */
 831 void meshModel_TestIterators(MeshModel*m){
 832     CkAssert(m->mesh->elem[MESH_ELEMENT_TET4].ghost!=NULL);
 833     CkAssert(m->mesh->node.ghost!=NULL);
 834
 835     int expected_elem_count = m->mesh->elem[MESH_ELEMENT_TET4].count_valid() + m->mesh->elem[MESH_ELEMENT_TET4].ghost->count_valid();
 836     int iterated_elem_count = 0;
 837
 838     int expected_node_count = m->mesh->node.count_valid() + m->mesh->node.ghost->count_valid();
 839     int iterated_node_count = 0;
 840
 841     int myId = FEM_My_partition();
 842
 843
 844     MeshNodeItr* itr = meshModel_CreateNodeItr(m);
 845     for(meshNodeItr_Begin(itr);meshNodeItr_IsValid(itr);meshNodeItr_Next(itr)){
 846         iterated_node_count++;
 847         MeshNode node = meshNodeItr_GetCurr(itr);
 848         void* na = meshNode_GetAttrib(m,node);
 849         CkAssert(na != NULL);
 850     }
 851
 852     MeshElemItr* e_itr = meshModel_CreateElemItr(m);
 853     for(meshElemItr_Begin(e_itr);meshElemItr_IsValid(e_itr);meshElemItr_Next(e_itr)){
 854         iterated_elem_count++;
 855         MeshElement elem = meshElemItr_GetCurr(e_itr);
 856         void* ea = meshElement_GetAttrib(m,elem);
 857         CkAssert(ea != NULL);
 858     }
 859
 860     CkAssert(iterated_node_count == expected_node_count);
 861     CkAssert(iterated_elem_count==expected_elem_count);
 862     CkPrintf("Completed Iterator Test!\n");
 863 }
 864
 865
 866 bool meshElement_IsCohesive(MeshModel* m, MeshElement e){
 867     return e.type > MESH_ELEMENT_MIN_COHESIVE;
 868 }
 869
 870
 871 /** currently we only support linear tets for the bulk elements */
 872 int meshFacet_GetNNodes (MeshModel* m, MeshFacet f){
 873     return 6;
 874 }
 875
 876 MeshNode meshFacet_GetNode (MeshModel* m, MeshFacet f, int i){
 877     return f.node[i];
 878 }
 879
 880 MeshElement meshFacet_GetElem (MeshModel* m, MeshFacet f, int i){
 881     return f.elem[i];
 882 }
 883
 884 /** I'm not quite sure the point of this function
 885  * TODO figure out what this is supposed to do
 886  */
 887 bool meshElement_IsValid(MeshModel* m, MeshElement e){
 888     return m->mesh->elem[e.type].is_valid_any_idx(e.id);
 889 }
 890
 891
 892 /** We will use the following to identify the original boundary nodes.
 893  * These are those that are adjacent to a facet that is on the boundary(has one adjacent element).
 894  * Assume vertex=node.
 895  */
 896
 897 bool meshVertex_IsBoundary (MeshModel* m, MeshVertex v){
 898     return m->mesh->node.isBoundary(v);
 899 }
 900
 901
 902 MeshVertex meshNode_GetVertex (MeshModel* m, MeshNode n){
 903     return n;
 904 }
 905
 906
 907 int meshElement_GetId (MeshModel* m, MeshElement e) {
 908     CkAssert(e.id>=0);
 909     return (*m->elem_id_T)(e.id,0);
 910 }
 911
 912 /** Determine if two triangles are the same, but possibly varied under
 913  * rotation or mirroring */
 914 bool areSameTriangle(int a1, int a2, int a3, int b1, int b2, int b3) {
 915     if (a1==b1 && a2==b2 && a3==b3) return true;
 916     if (a1==b2 && a2==b3 && a3==b1) return true;
 917     if (a1==b3 && a2==b1 && a3==b2) return true;
 918     if (a1==b1 && a2==b3 && a3==b2) return true;
 919     if (a1==b2 && a2==b1 && a3==b3) return true;
 920     if (a1==b3 && a2==b2 && a3==b1) return true;
 921
 922     return false;
 923 }
 924
 925
 926 MeshElement meshModel_InsertCohesiveAtFacet (MeshModel* m, MeshElementType etype, MeshFacet f){
 927     MeshElement newCohesiveElement;
 928
 929     CkAssert(etype == MESH_ELEMENT_COH3T3);
 930
 931     const MeshElement firstElement = f.elem[0];
 932     const MeshElement secondElement = f.elem[1];
 933
 934     CkAssert(firstElement.type != MESH_ELEMENT_COH3T3);
 935     CkAssert(secondElement.type != MESH_ELEMENT_COH3T3);
 936
 937     CkAssert(firstElement.id != -1);
 938     CkAssert(secondElement.id != -1);
 939
 940     // Create a new element
 941     int newEl = m->mesh->elem[etype].get_next_invalid(m->mesh);
 942     m->mesh->elem[etype].set_valid(newEl, false);
 943
 944     newCohesiveElement.id = newEl;
 945     newCohesiveElement.type = etype;
 946
 947 #if DEBUG
 948     CkPrintf("/\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\/ \n");
 949     CkPrintf("Inserting cohesive %d of type %d at facet %d,%d,%d\n", newEl, etype,  f.node[0], f.node[1], f.node[2]);
 950 #endif
 951
 952     int conn[6];
 953     conn[0] = f.node[0];
 954     conn[1] = f.node[1];
 955     conn[2] = f.node[2];
 956     conn[3] = f.node[0];
 957     conn[4] = f.node[1];
 958     conn[5] = f.node[2];
 959
 960     /// The lists of elements that can be reached from element on one side of the facet by iterating around each of the three nodes
 961     std::set<MeshElement> reachableFromElement1[3];
 962     std::set<MeshNode> reachableNodeFromElement1[3];
 963     bool canReachSecond[3];
 964
 965
 966     // Examine each node to determine if the node should be split
 967     for(int whichNode = 0; whichNode<3; whichNode++){
 968 #if DEBUG
 969         CkPrintf("--------------------------------\n");
 970         CkPrintf("Determining whether to split node %d\n",  f.node[whichNode]);
 971 #endif
 972
 973         canReachSecond[whichNode]=false;
 974
 975         MeshNode theNode = f.node[whichNode];
 976
 977         // Traverse across the faces to see which elements we can get to from the first element of this facet
 978         std::stack<MeshElement> traverseTheseElements;
 979         CkAssert(firstElement.type != MESH_ELEMENT_COH3T3);
 980         traverseTheseElements.push(firstElement);
 981
 982         while(traverseTheseElements.size()>0 && ! canReachSecond[whichNode]){
 983             MeshElement traversedToElem = traverseTheseElements.top();
 984             traverseTheseElements.pop();
 985
 986             // We should only examine elements that we have not yet already examined
 987             if(reachableFromElement1[whichNode].find(traversedToElem) == reachableFromElement1[whichNode].end()){
 988                 reachableFromElement1[whichNode].insert(traversedToElem);
 989 #if DEBUG
 990                 CkPrintf("Can iterate from first element %d to element %d\n", firstElement.id, traversedToElem.id);
 991 #endif
 992                 // keep track of which nodes the split node would be adjacent to,
 993                 // if we split this node
 994                 for (int elemNode=0; elemNode<4; ++elemNode) {
 995                     int queryNode = m->mesh->e2n_getNode(traversedToElem.id, elemNode, traversedToElem.type);
 996                     if (m->mesh->n2n_exists(theNode, queryNode) &&
 997                             queryNode != f.node[0] &&
 998                             queryNode != f.node[1] &&
 999                             queryNode != f.node[2]) {
1000                         reachableNodeFromElement1[whichNode].insert(queryNode);
1001                     }
1002                 }
1003 #if DEBUG
1004                 //CkPrintf("Examining element %s,%d\n", traversedToElem.type==MESH_ELEMENT_COH3T3?"MESH_ELEMENT_COH3T3":"MESH_ELEMENT_TET4", traversedToElem.id);
1005 #endif
1006
1007                 // Add all elements across this elements face, if they contain whichNode
1008                 for(int face=0;face<4;face++){
1009
1010                     MeshElement neighbor = m->mesh->e2e_getElem(traversedToElem, face);
1011                     // Only traverse to neighboring bulk elements
1012                     if(neighbor.type == MESH_ELEMENT_TET4){
1013 #if DEBUG
1014                         CkPrintf("element %d,%d is adjacent to bulk element %d on face %d\n", traversedToElem.type,traversedToElem.id, neighbor.id, face);
1015 #endif
1016                         if(meshElement_IsValid(m,neighbor)) {
1017                             bool containsTheNode = false;
1018                             for(int i=0;i<4;i++){
1019                                 if(meshElement_GetNode(m,neighbor,i) == theNode){
1020                                     containsTheNode = true;
1021                                 }
1022                             }
1023
1024                             if(containsTheNode){
1025                                 // Don't traverse across the face at which we are inserting the cohesive element
1026                                 if(!areSameTriangle(f.node[0],f.node[1],f.node[2],
1027                                             meshElement_GetNode(m,traversedToElem,tetFaces[face*3+0]),
1028                                             meshElement_GetNode(m,traversedToElem,tetFaces[face*3+1]),
1029                                             meshElement_GetNode(m,traversedToElem,tetFaces[face*3+2]) ) ){
1030
1031                                     // If this element is the second element adjacent to the new cohesive element, we can stop
1032                                     if(neighbor == secondElement){
1033                                         canReachSecond[whichNode] = true;
1034 #if DEBUG
1035                                         CkPrintf("We have traversed to the other side of the facet\n");
1036 #endif
1037                                     } else {
1038                                         // Otherwise, add this element to the set remaining to be examined
1039                                         CkAssert(neighbor.type != MESH_ELEMENT_COH3T3);
1040                                         traverseTheseElements.push(neighbor);
1041 #if DEBUG
1042                                         //CkPrintf("Adding element %d,%d to list\n", neighbor.type, neighbor.id);
1043 #endif
1044                                     }
1045                                 } else {
1046                                     // ignore the element because it is not adjacent to the node we are considering splitting
1047                                 }
1048                             }
1049                         }
1050                     }
1051                 }
1052 #if DEBUG
1053                 //CkPrintf("So far we have traversed through %d elements(%d remaining)\n", reachableFromElement1[whichNode].size(), traverseTheseElements.size() );
1054 #endif
1055             }
1056         }
1057
1058     }
1059
1060 #if DEBUG
1061     CkPrintf("\n");
1062 #endif
1063
1064     // Now do the actual splitting of the nodes
1065     int myChunk = FEM_My_partition();
1066     for(int whichNode = 0; whichNode<3; whichNode++){
1067         if(canReachSecond[whichNode]){
1068 #if DEBUG
1069             CkPrintf("Node %d doesn't need to be split\n", f.node[whichNode]);
1070 #endif
1071             // Do nothing
1072         }else {
1073 #if DEBUG
1074             CkPrintf("Node %d needs to be split\n", f.node[whichNode]);
1075             CkPrintf("There are %d elements that will be reassigned to the new node\n",
1076                     reachableFromElement1[whichNode].size());
1077 #endif
1078
1079             // Create a new node
1080             int newNode = m->mesh->node.get_next_invalid(m->mesh);
1081             m->mesh->node.set_valid(newNode);
1082
1083             // copy its coordinates
1084             // TODO: copy its other data as well
1085             (*m->coord_T)(newNode,0) = (*m->coord_T)(conn[whichNode],0);
1086             (*m->coord_T)(newNode,1) = (*m->coord_T)(conn[whichNode],1);
1087             (*m->coord_T)(newNode,2) = (*m->coord_T)(conn[whichNode],2);
1088
1089 #if DEBUG
1090             CkPrintf("Splitting node %d into %d and %d\n", conn[whichNode], conn[whichNode], newNode);
1091 #endif
1092             // can we use nilesh's idxl aware stuff here?
1093             //FEM_add_node(m->mesh, int* adjacentNodes, int numAdjacentNodes, &myChunk, 1, 0);
1094
1095             // relabel one node in the cohesive element to the new node
1096             conn[whichNode+3] = newNode;
1097
1098             // relabel the appropriate old node in the elements in reachableFromElement1
1099             std::set<MeshElement>::iterator elem;
1100             for (elem = reachableFromElement1[whichNode].begin(); elem != reachableFromElement1[whichNode].end(); ++elem) {
1101                 m->mesh->e2n_replace(elem->id, conn[whichNode], newNode, elem->type);
1102
1103
1104 #if DEBUG
1105                 CkPrintf("replacing node %d with %d in elem %d\n", conn[whichNode], newNode, elem->id);
1106 #endif
1107             }
1108
1109             // fix node-node adjacencies
1110             std::set<MeshNode>::iterator node;
1111             for (node = reachableNodeFromElement1[whichNode].begin(); node != reachableNodeFromElement1[whichNode].end(); ++node) {
1112                 m->mesh->n2n_replace(*node, conn[whichNode], newNode);
1113 #if DEBUG
1114                 CkPrintf("node %d is now adjacent to %d instead of %d\n",
1115                         *node, newNode, conn[whichNode]);
1116 #endif
1117             }
1118         }
1119     }
1120
1121 #if DEBUG
1122     m->mesh->e2e_printAll(firstElement);
1123     m->mesh->e2e_printAll(secondElement);
1124 #endif
1125
1126     // fix elem-elem adjacencies
1127     m->mesh->e2e_replace(firstElement, secondElement, newCohesiveElement);
1128     m->mesh->e2e_replace(secondElement, firstElement, newCohesiveElement);
1129
1130 #if DEBUG
1131     CkPrintf("elements %d and %d were adjacent, now both adjacent to cohesive %d instead\n",
1132             firstElement.id, secondElement.id, newEl);
1133
1134     m->mesh->e2e_printAll(firstElement);
1135     m->mesh->e2e_printAll(secondElement);
1136
1137 #endif
1138
1139     // set cohesive connectivity
1140     m->mesh->elem[newCohesiveElement.type].connIs(newEl,conn);
1141 #if DEBUG
1142     CkPrintf("Setting connectivity of new cohesive %d to [%d %d %d %d %d %d]\n\n",
1143             newEl, conn[0], conn[1], conn[2], conn[3], conn[4], conn[5]);
1144 #endif
1145     return newCohesiveElement;
1146 }
1147
1148
1149 // #define DEBUG1
1150
1151
1152 /// A class responsible for parsing the command line arguments for the PE
1153 /// to extract the format string passed in with +ConfigurableRRMap
1154 class ConfigurableCPUGPUMapLoader {
1155     public:
1156
1157         char *locations;
1158         int objs_per_block;
1159         int numNodes;
1160
1161         /// labels for states used when parsing the ConfigurableRRMap from ARGV
1162         enum loadStatus{
1163             not_loaded,
1164             loaded_found,
1165             loaded_not_found
1166         };
1167
1168         enum loadStatus state;
1169
1170         ConfigurableCPUGPUMapLoader(){
1171             state = not_loaded;
1172             locations = NULL;
1173             objs_per_block = 0;
1174         }
1175
1176         /// load configuration if possible, and return whether a valid configuration exists
1177         bool haveConfiguration() {
1178             if(state == not_loaded) {
1179 #ifdef DEBUG1
1180                 CkPrintf("[%d] loading ConfigurableCPUGPUMap configuration\n", CkMyPe());
1181 #endif
1182                 char **argv=CkGetArgv();
1183                 char *configuration = NULL;
1184                 bool found = CmiGetArgString(argv, "+ConfigurableCPUGPUMap", &configuration);
1185                 if(!found){
1186 #ifdef DEBUG1
1187                     CkPrintf("Couldn't find +ConfigurableCPUGPUMap command line argument\n");
1188 #endif
1189                     state = loaded_not_found;
1190                     return false;
1191                 } else {
1192 #ifdef DEBUG1
1193                     CkPrintf("Found +ConfigurableCPUGPUMap command line argument in %p=\"%s\"\n", configuration, configuration);
1194 #endif
1195                     std::istringstream instream(configuration);
1196                     CkAssert(instream.good());
1197                     // extract first integer
1198                     instream >> objs_per_block;
1199                     instream >> numNodes;
1200                     CkAssert(instream.good());
1201                     CkAssert(objs_per_block > 0);
1202                     locations = new char[objs_per_block];
1203                     for(int i=0;i<objs_per_block;i++){
1204                         CkAssert(instream.good());
1205                         instream >> locations[i];
1206                         //        CkPrintf("location[%d] = '%c'\n", i, locations[i]);
1207                         CkAssert(locations[i] == 'G' || locations[i] == 'C');
1208                     }
1209                     state = loaded_found;
1210                     return true;
1211                 }
1212             } else {
1213 #ifdef DEBUG1
1214                 CkPrintf("[%d] ConfigurableCPUGPUMap has already been loaded\n", CkMyPe());
1215 #endif
1216                 return state == loaded_found;
1217             }
1218         }
1219 };
1220
1221 CkpvDeclare(ConfigurableCPUGPUMapLoader, myConfigGPUCPUMapLoader);
1222
1223 void _initConfigurableCPUGPUMap(){
1224     //  CkPrintf("Initializing CPUGPU Map!\n");
1225     CkpvInitialize(ConfigurableCPUGPUMapLoader, myConfigGPUCPUMapLoader);
1226 }
1227
1228
1229 /// Try to load the command line arguments for ConfigurableRRMap
1230 bool haveConfigurableCPUGPUMap(){
1231     ConfigurableCPUGPUMapLoader &loader =  CkpvAccess(myConfigGPUCPUMapLoader);
1232     return loader.haveConfiguration();
1233 }
1234
1235 int configurableCPUGPUMapNumNodes(){
1236     ConfigurableCPUGPUMapLoader &loader =  CkpvAccess(myConfigGPUCPUMapLoader);
1237     return loader.numNodes;
1238 }
1239
1240
1241 bool isPartitionCPU(int partition){
1242     ConfigurableCPUGPUMapLoader &loader =  CkpvAccess(myConfigGPUCPUMapLoader);
1243     int l = partition % loader.objs_per_block;
1244     return loader.locations[l] == 'C';
1245 }
1246
1247 bool isPartitionGPU(int partition){
1248     return ! isPartitionCPU(partition);
1249 }
1250
1251 #include "ParFUM_Iterators.def.h"