From bff5b2cf01da6bc6b4ff483f1cf97600df452be8 Mon Sep 17 00:00:00 2001 From: Venkatasubrahmanian Narayanan Date: Mon, 28 Jan 2019 14:22:20 -0600 Subject: [PATCH] Bug #2046: TRAM higher-dimensional chare array bugfixes Fixed the implementation of TRAM to properly support higher-dimensional chare arrays. Also made a minor change to the interface file parser to emit code compatible with this implementation. Change-Id: Ica0d87aa65f827ba9d47fa9e9053defa71a0146a --- .../charm++/TRAM/randomAccessArray/randomAccess.C | 6 +- .../charm++/TRAM/randomAccessArray/randomAccess.ci | 8 +- src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.C | 26 ++++++ src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.h | 46 ++++++++++- src/xlat-i/xi-Entry.C | 14 +--- tests/charm++/randomTRAM3D/Makefile | 22 +++++ tests/charm++/randomTRAM3D/tram3d.C | 95 ++++++++++++++++++++++ tests/charm++/randomTRAM3D/tram3d.ci | 32 ++++++++ 8 files changed, 226 insertions(+), 23 deletions(-) create mode 100644 tests/charm++/randomTRAM3D/Makefile create mode 100644 tests/charm++/randomTRAM3D/tram3d.C create mode 100644 tests/charm++/randomTRAM3D/tram3d.ci diff --git a/examples/charm++/TRAM/randomAccessArray/randomAccess.C b/examples/charm++/TRAM/randomAccessArray/randomAccess.C index 45dfc2b8f2..149474a8a3 100644 --- a/examples/charm++/TRAM/randomAccessArray/randomAccess.C +++ b/examples/charm++/TRAM/randomAccessArray/randomAccess.C @@ -14,7 +14,7 @@ CmiInt8 localTableSize; // Handle to the test driver (chare) CProxy_TestDriver driverProxy; // Handle to the communication library (group) -CProxy_ArrayMeshStreamer aggregator; // Number of chares per PE int numElementsPerPe; @@ -49,7 +49,7 @@ public: // Instantiate communication library group with a handle to the client aggregator = - CProxy_ArrayMeshStreamer + CProxy_ArrayMeshStreamer ::ckNew(numMsgsBuffered, 2, dims, updater_array, 1); delete args; @@ -139,7 +139,7 @@ public: CmiUInt8 key = HPCC_starts(4 * globalStartmyProc); // Get a pointer to the local communication library object // from its proxy handle - ArrayMeshStreamer + ArrayMeshStreamer * localAggregator = aggregator.ckLocalBranch(); // Generate this chare's share of global updates diff --git a/examples/charm++/TRAM/randomAccessArray/randomAccess.ci b/examples/charm++/TRAM/randomAccessArray/randomAccess.ci index 5d3d3552c5..bdca1e52f5 100644 --- a/examples/charm++/TRAM/randomAccessArray/randomAccess.ci +++ b/examples/charm++/TRAM/randomAccessArray/randomAccess.ci @@ -7,7 +7,7 @@ mainmodule randomAccess { // Handle to the test driver (chare) readonly CProxy_TestDriver driverProxy; // Handle to the communication library (group) - readonly CProxy_ArrayMeshStreamer aggregator; // Number of chares per PE readonly int numElementsPerPe; @@ -29,8 +29,8 @@ mainmodule randomAccess { }; // Setup required for the communication library - message MeshStreamerMessage >; - group ArrayMeshStreamer; - group MeshStreamer, SimpleMeshRouter>; + message MeshStreamerMessage >; + group ArrayMeshStreamer; + group MeshStreamer, SimpleMeshRouter>; }; diff --git a/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.C b/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.C index 278e006627..2537f954d7 100644 --- a/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.C +++ b/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.C @@ -2,3 +2,29 @@ #include "NDMeshStreamer.h" #include "NDMeshStreamer.def.h" + +//below code initializes the templated static variables from the header +CkArrayIndex1D TramBroadcastInstance::value=TRAM_BROADCAST; + +CkArrayIndex2D TramBroadcastInstance::value=CkArrayIndex2D(TRAM_BROADCAST,TRAM_BROADCAST); + +CkArrayIndex3D TramBroadcastInstance::value=CkArrayIndex3D(TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST); + +CkArrayIndex4D TramBroadcastInstance::value=CkArrayIndex4D(TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST); + +CkArrayIndex5D TramBroadcastInstance::value=CkArrayIndex5D(TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST); + +CkArrayIndex6D TramBroadcastInstance::value=CkArrayIndex6D(TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST,TRAM_BROADCAST); + +//Below code picks the appropriate TRAM_BROADCAST index value +CkArrayIndex& TramBroadcastInstance::value(int dims) { + switch(dims) { + case 1: return TramBroadcastInstance::value; + case 2: return TramBroadcastInstance::value; + case 3: return TramBroadcastInstance::value; + case 4: return TramBroadcastInstance::value; + case 5: return TramBroadcastInstance::value; + case 6: return TramBroadcastInstance::value; + default: CmiAbort("TRAM only supports 1-6D arrays\n"); + } +}; diff --git a/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.h b/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.h index 560e0cfa7a..3de3c6c2f8 100644 --- a/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.h +++ b/src/libs/ck-libs/NDMeshStreamer/NDMeshStreamer.h @@ -26,6 +26,44 @@ extern void QdCreate(int n); extern void QdProcess(int n); +//below code uses templates to generate appropriate TRAM_BROADCAST array index values +template +struct TramBroadcastInstance; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex1D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex2D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex3D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex4D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex5D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex6D value; +}; + +template<> +struct TramBroadcastInstance{ + static CkArrayIndex& value(int); +}; template class MeshStreamerMessage : public CMessage_MeshStreamerMessage { @@ -1002,7 +1040,7 @@ private: void localDeliver(const ArrayDataItem& packedDataItem) { itype arrayId = packedDataItem.arrayIndex; - if (arrayId == itype(TRAM_BROADCAST)) { + if (arrayId == TramBroadcastInstance::value(arrayId.dimension)) { localBroadcast(packedDataItem); return; } @@ -1211,12 +1249,12 @@ public: void processLocationRequest(itype arrayId, int deliveredToPe, int sourcePe) { int ownerPe = clientArrayMgr_->lastKnown((CkArrayIndex)arrayId); this->thisProxy[deliveredToPe].resendMisdeliveredItems(arrayId, ownerPe); - this->thisProxy[sourcePe].updateLocationAtSource(arrayId, sourcePe); + this->thisProxy[sourcePe].updateLocationAtSource(arrayId, ownerPe); } void resendMisdeliveredItems(itype arrayId, int destinationPe) { - clientLocMgr_->updateLocation(arrayId, destinationPe); + clientLocMgr_->updateLocation(arrayId, clientLocMgr_->lookupID(arrayId),destinationPe); std::vector > &bufferedItems = misdeliveredItems[arrayId]; @@ -1235,7 +1273,7 @@ public: int prevOwner = clientArrayMgr_->lastKnown((CkArrayIndex)arrayId); if (prevOwner != destinationPe) { - clientLocMgr_->updateLocation(arrayId, destinationPe); + clientLocMgr_->updateLocation(arrayId,clientLocMgr_->lookupID(arrayId), destinationPe); // it is possible to also fix destinations of items buffered for arrayId, // but the search could be expensive; instead, with the current code diff --git a/src/xlat-i/xi-Entry.C b/src/xlat-i/xi-Entry.C index 3e741034dd..44a3f7cae3 100644 --- a/src/xlat-i/xi-Entry.C +++ b/src/xlat-i/xi-Entry.C @@ -981,11 +981,7 @@ XStr Entry::aggregatorIndexType() { } else if (container->isArray()) { XStr dim, arrayIndexType; dim << ((Array*)container)->dim(); - if (dim == "1D") { - indexType << "int"; - } else { - indexType << "CkArrayIndex"; - } + indexType << "CkArrayIndex"; } return indexType; } @@ -1088,13 +1084,7 @@ void Entry::genTramDefs(XStr& str) { str << " const CkArrayIndex &myIndex = ckGetIndex();\n" << " " << aggregatorName() << "->insertData<" << (isInline() ? "true" : "false") << ">(" << param->param->name; - if (dim == (const char*)"1D") { - str << ", " - << "myIndex.data()[0]);\n}\n"; - } else { - str << ", " - << "myIndex);\n}\n"; - } + str << ", " << "myIndex);\n}\n"; } } diff --git a/tests/charm++/randomTRAM3D/Makefile b/tests/charm++/randomTRAM3D/Makefile new file mode 100644 index 0000000000..37632e009f --- /dev/null +++ b/tests/charm++/randomTRAM3D/Makefile @@ -0,0 +1,22 @@ +-include ../../common.mk +-include ../../../include/conv-mach-opt.mak +CHARMC = ../../../bin/charmc $(OPTS) + +OBJS = tram3d.o + +all: tram3d + +tram3d: tram3d.o + $(CHARMC) $(CHARMCFLAGS) -language charm++ -o tram3d tram3d.o -module NDMeshStreamer + +tram3d.def.h: tram3d.ci + $(CHARMC) $(CHARMCFLAGS) tram3d.ci + +tram3d.o: tram3d.C tram3d.def.h + $(CHARMC) $(CHARMCFLAGS) -c tram3d.C + +test: tram3d + $(call run, ./tram3d +p4 ) + +clean: + rm -f *.o *.decl.h *.def.h tram3d charmrun* diff --git a/tests/charm++/randomTRAM3D/tram3d.C b/tests/charm++/randomTRAM3D/tram3d.C new file mode 100644 index 0000000000..24cf0597ec --- /dev/null +++ b/tests/charm++/randomTRAM3D/tram3d.C @@ -0,0 +1,95 @@ +#include "tram3d.decl.h" +#include +#include +#include +#include +CProxy_main master; //readonly +class main : public CBase_main +{ + CProxy_Test blocks; + int N; + + public: + main(CkArgMsg* args) + { + N = 2; + CkArrayOptions opts; + opts.setBounds(N, N, N); + blocks = CProxy_Test::ckNew(opts); + std::mt19937 engine(37); // arbitrarily selected constant seed for reproducibility + std::uniform_int_distribution<> distro(INT_MIN, INT_MAX); + master = thisProxy; + for (int i = 0; i != N; ++i) + { + for (int j = 0; j != N; ++j) + { + for (int k = 0; k != N; ++k) + { + blocks(i, j, k).insert(distro(engine), N); + } + } + } + blocks.doneInserting(); + blocks.run(); + delete args; + } + void endexec(int val) + { + /* The test has 4 phases: initialization, distribution, and + reduction. + + In phase 1, the main thread generates a sequence of starter + values for each element of the array, using a well-defined + constant as the initial seed. Each thread stores a series of + N*N*N values generated by incrementing the starter values they + received. + + In phase 2, all the threads redistribute their values by sending + them to elements of the array based on their indices(including + themselves). + + In phase 3, after all the threads have received the new values, + they contribute the minimum of all the values they received into + a sum-reduction. + + The value below is obtained by running the test code without + TRAM enabled, with the same deterministic seed. + */ + if (val != 488803188) + { + CkAbort("Messages not delivered correctly!"); + } + else + { + CkPrintf("The sum of minimal values across chares is %d\n", val); + CkExit(); + } + } +}; +class Test : public CBase_Test +{ + Test_SDAG_CODE + std::vector values; + std::vector recvd; + int N; + int count1, count2, count3; + + public: + Test() {} + Test(int seed, int N) + : values([this, seed, N]() mutable { + std::vector temp; + temp.reserve(N * N * N); + std::generate_n(std::back_inserter(temp), N * N * N, + [seed]() mutable { return seed++; }); + return temp; + }()), + N(N), + count1(0), + count2(0), + count3(0) + { + recvd.reserve(N * N * N); + } +}; +#include "tram3d.def.h" diff --git a/tests/charm++/randomTRAM3D/tram3d.ci b/tests/charm++/randomTRAM3D/tram3d.ci new file mode 100644 index 0000000000..5cb92cc874 --- /dev/null +++ b/tests/charm++/randomTRAM3D/tram3d.ci @@ -0,0 +1,32 @@ +mainmodule tram3d { + mainchare main { + entry main(CkArgMsg*); + entry [reductiontarget] void endexec(int); + }; + readonly CProxy_main master; + array [3D] Test { + entry Test(); + entry Test(int,int); + entry void run() { + for (count1=0;count1!=N;++count1) { + for (count2=0;count2!=N;++count2) { + for (count3=0;count3!=N;++count3) serial { + thisProxy(count1,count2,count3).clock(values[count1*N*N+count2*N+count3]); + } + } + } + for (count1=0;count1!=N*N*N;++count1) { + when clock(int j) serial { + recvd.emplace_back(j); + } + } + serial { + auto min_iter=std::min_element(recvd.begin(),recvd.end()); + CkCallback cb(CkReductionTarget(main,endexec),master); + contribute(sizeof(int),&(*min_iter),CkReduction::sum_int,cb); + //addressof and dereference done since iterators != pointers + } + } + entry [aggregate] void clock(int); + }; +}; -- 2.11.4.GIT