From 1f8cf51051de7b3f087ddda30df75dafd6d30296 Mon Sep 17 00:00:00 2001 From: Sam White Date: Mon, 24 Sep 2018 14:20:32 -0500 Subject: [PATCH] AMPI: store predefined ops and types per-process rather than per-rank - Split storage of predefined ops and datatypes from user-defined ones, storing the predefined ones in static const arrays, in order to reduce memory overhead. - Rename CkDDT_MAX_PRIMITIVE_TYPE and CkDDT_MAX_BASIC_TYPE to AMPI_*, and fix definition of BASIC type to include MPI_UB and MPI_LB. Change-Id: I490a4c4d50e4fc1a1fb2c39314baa4b08640aa96 --- src/libs/ck-libs/ampi/ampi.C | 57 ++++++------ src/libs/ck-libs/ampi/ampi.h | 19 ++-- src/libs/ck-libs/ampi/ampiOneSided.C | 20 ++--- src/libs/ck-libs/ampi/ampiimpl.h | 70 ++++++++++----- src/libs/ck-libs/ampi/ddt.C | 114 ++++++++++++++---------- src/libs/ck-libs/ampi/ddt.h | 167 +++++++++++++++++++---------------- 6 files changed, 263 insertions(+), 184 deletions(-) diff --git a/src/libs/ck-libs/ampi/ampi.C b/src/libs/ck-libs/ampi/ampi.C index 5b1752ea52..8023210b86 100644 --- a/src/libs/ck-libs/ampi/ampi.C +++ b/src/libs/ck-libs/ampi/ampi.C @@ -885,6 +885,25 @@ static int AMPI_threadstart_idx = -1; CsvExtern(funcmap*, tcharm_funcmap); #endif +// Predefined datatype's and op's are readonly, so store them only once per process here: +static const std::array ampiPredefinedTypes = CkDDT::createPredefinedTypes(); + +static constexpr std::array ampiPredefinedOps = {{ + MPI_MAX_USER_FN, + MPI_MIN_USER_FN, + MPI_SUM_USER_FN, + MPI_PROD_USER_FN, + MPI_LAND_USER_FN, + MPI_BAND_USER_FN, + MPI_LOR_USER_FN, + MPI_BOR_USER_FN, + MPI_LXOR_USER_FN, + MPI_BXOR_USER_FN, + MPI_MAXLOC_USER_FN, + MPI_REPLACE_USER_FN, + MPI_NO_OP_USER_FN +}}; + static void ampiNodeInit() noexcept { #if CMK_TRACE_ENABLED @@ -1073,25 +1092,6 @@ void ampiCreateMain(MPI_MainFn mainFn, const char *name,int nameLen) static CProxy_ampiWorlds ampiWorldsGroup; -void ampiParent::initOps() noexcept -{ - ops.resize(MPI_NO_OP+1); - ops[MPI_MAX] = OpStruct(MPI_MAX_USER_FN); - ops[MPI_MIN] = OpStruct(MPI_MIN_USER_FN); - ops[MPI_SUM] = OpStruct(MPI_SUM_USER_FN); - ops[MPI_PROD] = OpStruct(MPI_PROD_USER_FN); - ops[MPI_LAND] = OpStruct(MPI_LAND_USER_FN); - ops[MPI_BAND] = OpStruct(MPI_BAND_USER_FN); - ops[MPI_LOR] = OpStruct(MPI_LOR_USER_FN); - ops[MPI_BOR] = OpStruct(MPI_BOR_USER_FN); - ops[MPI_LXOR] = OpStruct(MPI_LXOR_USER_FN); - ops[MPI_BXOR] = OpStruct(MPI_BXOR_USER_FN); - ops[MPI_MAXLOC] = OpStruct(MPI_MAXLOC_USER_FN); - ops[MPI_MINLOC] = OpStruct(MPI_MINLOC_USER_FN); - ops[MPI_REPLACE] = OpStruct(MPI_REPLACE_USER_FN); - ops[MPI_NO_OP] = OpStruct(MPI_NO_OP_USER_FN); -} - // Create MPI_COMM_SELF from MPI_COMM_WORLD static void createCommSelf() noexcept { STARTUP_DEBUG("ampiInit> creating MPI_COMM_SELF") @@ -1182,12 +1182,12 @@ static ampi *ampiInit(char **argv) noexcept TRACE_BG_ADD_TAG("AMPI_START"); #endif - getAmpiParent()->initOps(); // initialize reduction operations - vector& keyvals = getAmpiParent()->getKeyvals(MPI_COMM_WORLD); - getAmpiParent()->setAttr(MPI_COMM_WORLD, keyvals, MPI_UNIVERSE_SIZE, &_nchunks); + ampiParent* pptr = getAmpiParent(); + vector& keyvals = pptr->getKeyvals(MPI_COMM_WORLD); + pptr->setAttr(MPI_COMM_WORLD, keyvals, MPI_UNIVERSE_SIZE, &_nchunks); ptr->setCommName("MPI_COMM_WORLD"); - getAmpiParent()->ampiInitCallDone = 0; + pptr->ampiInitCallDone = 0; CProxy_ampi cbproxy = ptr->getProxy(); CkCallback cb(CkReductionTarget(ampi, allInitDone), cbproxy[0]); @@ -1231,7 +1231,8 @@ class ampiWorlds : public CBase_ampiWorlds { //-------------------- ampiParent ------------------------- ampiParent::ampiParent(MPI_Comm worldNo_,CProxy_TCharm threads_,int nRanks_) noexcept -:threads(threads_), worldNo(worldNo_), isTmpRProxySet(false), ampiReqs(64, &reqPool) + : threads(threads_), worldNo(worldNo_), isTmpRProxySet(false), ampiReqs(64, &reqPool), + myDDT(ampiPredefinedTypes), predefinedOps(ampiPredefinedOps) { int barrier = 0x1234; STARTUP_DEBUG("ampiParent> starting up") @@ -1258,7 +1259,9 @@ ampiParent::ampiParent(MPI_Comm worldNo_,CProxy_TCharm threads_,int nRanks_) noe #endif } -ampiParent::ampiParent(CkMigrateMessage *msg) noexcept :CBase_ampiParent(msg) { +ampiParent::ampiParent(CkMigrateMessage *msg) noexcept + : CBase_ampiParent(msg), myDDT(ampiPredefinedTypes), predefinedOps(ampiPredefinedOps) +{ thread=NULL; worldPtr=NULL; @@ -1287,7 +1290,7 @@ void ampiParent::pup(PUP::er &p) noexcept { p|groups; p|winStructList; p|infos; - p|ops; + p|userOps; p|reqPool; ampiReqs.pup(p, &reqPool); @@ -6481,7 +6484,7 @@ AMPI_API_IMPL(int, MPI_Type_free, MPI_Datatype *datatype) if (datatype == nullptr) { return ampiErrhandler("AMPI_Type_free", MPI_ERR_ARG); - } else if (*datatype <= CkDDT_MAX_PRIMITIVE_TYPE) { + } else if (*datatype <= AMPI_MAX_PREDEFINED_TYPE) { return ampiErrhandler("AMPI_Type_free", MPI_ERR_TYPE); } #endif diff --git a/src/libs/ck-libs/ampi/ampi.h b/src/libs/ck-libs/ampi/ampi.h index 303cb520d4..0ff38d8c9c 100644 --- a/src/libs/ck-libs/ampi/ampi.h +++ b/src/libs/ck-libs/ampi/ampi.h @@ -196,15 +196,14 @@ typedef long long int MPI_Offset; #define MPI_UINT64_T 26 #define MPI_AINT 27 #define MPI_COUNT MPI_AINT +#define MPI_LB 28 +#define MPI_UB 29 /* - * CkDDT_MAX_BASIC_TYPE is defined in ddt.h + * AMPI_MAX_BASIC_TYPE is defined in ddt.h * and is tied to the above values, if the above * indexes change or values are added/deleted - * you may need to change CkDDT_MAX_BASIC_TYPE + * you may need to change AMPI_MAX_BASIC_TYPE */ -/* special types */ -#define MPI_LB 28 -#define MPI_UB 29 /* tuple types */ #define MPI_FLOAT_INT 30 #define MPI_DOUBLE_INT 31 @@ -220,10 +219,10 @@ typedef long long int MPI_Offset; #define MPI_DOUBLE_COMPLEX 40 #define MPI_LONG_DOUBLE_COMPLEX 41 /* - * CkDDT_MAX_PRIMITIVE_TYPE is defined in ddt.h + * AMPI_MAX_PREDEFINED_TYPE is defined in ddt.h * and is tied to the above values, if the above * indexes change or values are added/deleted - * you may need to change CkDDT_MAX_PRIMITIVE_TYPE + * you may need to change AMPI_MAX_PREDEFINED_TYPE */ #define MPI_ANY_TAG MPI_TAG_UB_VALUE+1 @@ -272,6 +271,12 @@ typedef void (MPI_User_function)(void *invec, void *inoutvec, #define MPI_MINLOC 11 #define MPI_REPLACE 12 #define MPI_NO_OP 13 +/* + * AMPI_MAX_PREDEFINED_OP is defined in ampiimpl.h + * and is tied to the above values, if the above + * indexes change or values are added/deleted + * you may need to change AMPI_MAX_PREDEFINED_TYPE + */ #define MPI_UNWEIGHTED 0 #define MPI_CART 1 diff --git a/src/libs/ck-libs/ampi/ampiOneSided.C b/src/libs/ck-libs/ampi/ampiOneSided.C index 97b20913cc..f852577a0c 100644 --- a/src/libs/ck-libs/ampi/ampiOneSided.C +++ b/src/libs/ck-libs/ampi/ampiOneSided.C @@ -767,7 +767,7 @@ AMPI_API_IMPL(int, MPI_Put, const void *orgaddr, int orgcnt, MPI_Datatype orgtyp MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, MPI_Win win) { AMPI_API("AMPI_Put"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} handle_MPI_BOTTOM((void*&)orgaddr, orgtype); WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); @@ -784,7 +784,7 @@ AMPI_API_IMPL(int, MPI_Get, void *orgaddr, int orgcnt, MPI_Datatype orgtype, int MPI_Win win) { AMPI_API("AMPI_Get"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} handle_MPI_BOTTOM(orgaddr, orgtype); WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); @@ -807,7 +807,7 @@ AMPI_API_IMPL(int, MPI_Accumulate, const void *orgaddr, int orgcnt, MPI_Datatype MPI_Datatype targtype, MPI_Op op, MPI_Win win) { AMPI_API("AMPI_Accumulate"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} handle_MPI_BOTTOM((void*&)orgaddr, orgtype); WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); @@ -828,7 +828,7 @@ AMPI_API_IMPL(int, MPI_Get_accumulate, const void *orgaddr, int orgcnt, MPI_Data MPI_Datatype targtype, MPI_Op op, MPI_Win win) { AMPI_API("AMPI_Get_accumulate"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} handle_MPI_BOTTOM((void*&)orgaddr, orgtype); WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); @@ -849,7 +849,7 @@ AMPI_API_IMPL(int, MPI_Rput, const void *orgaddr, int orgcnt, MPI_Datatype orgty MPI_Request *request) { AMPI_API("AMPI_Rput"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); *request = ptr->postReq(getAmpiParent()->reqPool.newSendReq(orgtype, winStruct->comm, ptr->getDDT(), AMPI_REQ_COMPLETED)); @@ -868,7 +868,7 @@ AMPI_API_IMPL(int, MPI_Rget, void *orgaddr, int orgcnt, MPI_Datatype orgtype, in MPI_Win win, MPI_Request *request) { AMPI_API("AMPI_Rget"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); *request = ptr->postReq(getAmpiParent()->reqPool.newSendReq(orgtype, winStruct->comm, ptr->getDDT(), AMPI_REQ_COMPLETED)); @@ -888,7 +888,7 @@ AMPI_API_IMPL(int, MPI_Raccumulate, const void *orgaddr, int orgcnt, MPI_Datatyp MPI_Op op, MPI_Win win, MPI_Request *request) { AMPI_API("AMPI_Raccumulate"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); *request = ptr->postReq(getAmpiParent()->reqPool.newSendReq(orgtype, winStruct->comm, ptr->getDDT(), AMPI_REQ_COMPLETED)); @@ -911,7 +911,7 @@ AMPI_API_IMPL(int, MPI_Rget_accumulate, const void *orgaddr, int orgcnt, MPI_Dat MPI_Request *request) { AMPI_API("AMPI_Rget_accumulate"); - if (targtype > CkDDT_MAX_PRIMITIVE_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} + if (targtype > AMPI_MAX_PREDEFINED_TYPE) {CkAbort("AMPI does not currently support RMA with derived datatypes.");} WinStruct *winStruct = getAmpiParent()->getWinStruct(win); ampi *ptr = getAmpiInstance(winStruct->comm); *request = ptr->postReq(getAmpiParent()->reqPool.newSendReq(orgtype, winStruct->comm, ptr->getDDT(), AMPI_REQ_COMPLETED)); @@ -929,7 +929,7 @@ AMPI_API_IMPL(int, MPI_Fetch_and_op, const void *orgaddr, void *resaddr, MPI_Dat { AMPI_API("AMPI_Fetch_and_op"); #if AMPI_ERROR_CHECKING - if (type > CkDDT_MAX_PRIMITIVE_TYPE) + if (type > AMPI_MAX_PREDEFINED_TYPE) { return ampiErrhandler("AMPI_Fetch_and_op", MPI_ERR_UNSUPPORTED_OPERATION); } @@ -952,7 +952,7 @@ AMPI_API_IMPL(int, MPI_Compare_and_swap, const void *orgaddr, const void *compad { AMPI_API("AMPI_Compare_and_swap"); #if AMPI_ERROR_CHECKING - if (type > CkDDT_MAX_PRIMITIVE_TYPE) + if (type > AMPI_MAX_PREDEFINED_TYPE) { return ampiErrhandler("AMPI_Compare_and_swap", MPI_ERR_UNSUPPORTED_OPERATION); } diff --git a/src/libs/ck-libs/ampi/ampiimpl.h b/src/libs/ck-libs/ampi/ampiimpl.h index 221394f8f5..f97ecf4923 100644 --- a/src/libs/ck-libs/ampi/ampiimpl.h +++ b/src/libs/ck-libs/ampi/ampiimpl.h @@ -1996,6 +1996,9 @@ PUPmarshall(AmpiSeqQ) inline CProxy_ampi ampiCommStruct::getProxy() const noexcept {return ampiID;} const ampiCommStruct &universeComm2CommStruct(MPI_Comm universeNo) noexcept; +// Max value of a predefined MPI_Op (values defined in ampi.h) +#define AMPI_MAX_PREDEFINED_OP 13 + /* An ampiParent holds all the communicators and the TCharm thread for its children, which are bound to it. @@ -2028,7 +2031,8 @@ class ampiParent final : public CBase_ampiParent { CkPupPtrVec groups; // "Wild" groups that don't have a communicator CkPupPtrVec winStructList; //List of windows for one-sided communication CkPupPtrVec infos; // list of all MPI_Infos - vector ops; // list of all MPI_Ops + const std::array& predefinedOps; // owned by ampiNodeMgr + vector userOps; // list of any user-defined MPI_Ops vector matchedMsgs; // for use with MPI_Mprobe and MPI_Mrecv /* MPI_*_get_attr C binding returns a *pointer* to an integer, @@ -2352,44 +2356,68 @@ class ampiParent final : public CBase_ampiParent { void defineInfoEnv(int nRanks_) noexcept; void defineInfoMigration() noexcept; - void initOps(void) noexcept; + // An 'MPI_Op' is an integer that indexes into either: + // A) an array of predefined ops owned by ampiNodeMgr, or + // B) a vector of user-defined ops owned by ampiParent + // The MPI_Op is compared to AMPI_MAX_PREDEFINED_OP to disambiguate. inline int createOp(MPI_User_function *fn, bool isCommutative) noexcept { // Search thru non-predefined op's for any invalidated ones: - for (int i=MPI_NO_OP+1; i MPI_NO_OP) { - // Invalidate op, then free all invalid op's from the back of the op's vector - ops[op].free(); - while (ops.back().isFree()) { - ops.pop_back(); + if (!opIsPredefined(op)) { + // Invalidate op, then free all invalid op's from the back of the userOp's vector + int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP; + CkAssert(opIdx < userOps.size()); + userOps[opIdx].free(); + while (!userOps.empty() && userOps.back().isFree()) { + userOps.pop_back(); } } } inline bool opIsPredefined(MPI_Op op) const noexcept { - return (op>=MPI_OP_NULL && op<=MPI_NO_OP); + return (op <= AMPI_MAX_PREDEFINED_OP); } inline bool opIsCommutative(MPI_Op op) const noexcept { - CkAssert(op>MPI_OP_NULL && opMPI_OP_NULL && opMPI_OP_NULL && opgetSize(count); - return AmpiOpHeader(ops[op].func, type, count, size); + if (opIsPredefined(op)) { + int size = myDDT.getType(type)->getSize(count); + return AmpiOpHeader(predefinedOps[op], type, count, size); + } + else { + int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP; + CkAssert(opIdx < userOps.size()); + int size = myDDT.getType(type)->getSize(count); + return AmpiOpHeader(userOps[opIdx].func, type, count, size); + } } inline void applyOp(MPI_Datatype datatype, MPI_Op op, int count, const void* invec, void* inoutvec) const noexcept { // inoutvec[i] = invec[i] op inoutvec[i] diff --git a/src/libs/ck-libs/ampi/ddt.C b/src/libs/ck-libs/ampi/ddt.C index f0be82a606..aacd2d0d31 100644 --- a/src/libs/ck-libs/ampi/ddt.C +++ b/src/libs/ck-libs/ampi/ddt.C @@ -9,37 +9,37 @@ CkDDT::pup(PUP::er &p) noexcept { p|types; if (p.isUnpacking()) { - typeTable.resize(types.size(), nullptr); + userTypeTable.resize(types.size(), nullptr); for (int i=0; ipupType(p, this); + userTypeTable[i]->pupType(p, this); } } } @@ -55,41 +55,43 @@ CkDDT::pup(PUP::er &p) noexcept void CkDDT::freeType(int index) noexcept { - CkAssert(types.size() == typeTable.size()); - if (index > CkDDT_MAX_PRIMITIVE_TYPE) { + CkAssert(types.size() == userTypeTable.size()); + if (index > AMPI_MAX_PREDEFINED_TYPE) { + int idx = index - AMPI_MAX_PREDEFINED_TYPE - 1; // Decrement the ref count and free the type if there are no references to it. - if (typeTable[index]->decRefCount() == 0) { + if (userTypeTable[idx]->decRefCount() == 0) { // Remove a reference from this type's base type(s). - if (typeTable[index]->getType() == CkDDT_STRUCT) { - int count = typeTable[index]->getCount(); - vector &baseIndices = static_cast(*typeTable[index]).getBaseIndices(); + if (userTypeTable[idx]->getType() == CkDDT_STRUCT) { + int count = userTypeTable[idx]->getCount(); + vector &baseIndices = static_cast(*userTypeTable[idx]).getBaseIndices(); for (int i=0; igetBaseIndex()); + freeType(userTypeTable[idx]->getBaseIndex()); } // Free non-primitive type - delete typeTable[index]; - typeTable[index] = nullptr; - types[index] = MPI_DATATYPE_NULL; - // Free all NULL types from back of typeTable - while (typeTable.back() == nullptr) { - typeTable.pop_back(); + delete userTypeTable[idx]; + userTypeTable[idx] = nullptr; + types[idx] = MPI_DATATYPE_NULL; + // Free all NULL types from back of userTypeTable + while (!userTypeTable.empty() && userTypeTable.back() == nullptr) { + userTypeTable.pop_back(); CkAssert(types.back() == MPI_DATATYPE_NULL); types.pop_back(); } } } + CkAssert(types.size() == userTypeTable.size()); } CkDDT::~CkDDT() noexcept { - for (int i=0; igetType()) { case CkDDT_CONTIGUOUS: type = new CkDDT_Contiguous(static_cast (*dttype)); + typeClass = CkDDT_CONTIGUOUS; break; case CkDDT_VECTOR: type = new CkDDT_Vector(static_cast (*dttype)); + typeClass = CkDDT_VECTOR; break; case CkDDT_HVECTOR: type = new CkDDT_HVector(static_cast (*dttype)); + typeClass = CkDDT_HVECTOR; break; case CkDDT_INDEXED_BLOCK: type = new CkDDT_Indexed_Block(static_cast (*dttype)); + typeClass = CkDDT_INDEXED_BLOCK; break; case CkDDT_HINDEXED_BLOCK: type = new CkDDT_HIndexed_Block(static_cast (*dttype)); + typeClass = CkDDT_HINDEXED_BLOCK; break; case CkDDT_INDEXED: type = new CkDDT_Indexed(static_cast (*dttype)); + typeClass = CkDDT_INDEXED; break; case CkDDT_HINDEXED: type = new CkDDT_HIndexed(static_cast (*dttype)); + typeClass = CkDDT_HINDEXED; break; case CkDDT_STRUCT: type = new CkDDT_Struct(static_cast (*dttype)); + typeClass = CkDDT_STRUCT; break; default: type = new CkDDT_DataType(*dttype); + typeClass = dttype->getType(); break; } - *nIndexNew = insertType(type, types[nIndexOld]); + *nIndexNew = insertType(type, typeClass); } int @@ -180,52 +192,62 @@ CkDDT::createResized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Dat { CkDDT_DataType *dttype = getType(oldtype); CkDDT_DataType *type; + int typeClass; switch (dttype->getType()) { case CkDDT_CONTIGUOUS: type = new CkDDT_Contiguous(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_CONTIGUOUS; break; case CkDDT_VECTOR: type = new CkDDT_Vector(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_VECTOR; break; case CkDDT_HVECTOR: type = new CkDDT_HVector(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_HVECTOR; break; case CkDDT_INDEXED_BLOCK: type = new CkDDT_Indexed_Block(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_INDEXED_BLOCK; break; case CkDDT_HINDEXED_BLOCK: type = new CkDDT_HIndexed_Block(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_HINDEXED_BLOCK; break; case CkDDT_INDEXED: type = new CkDDT_Indexed(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_INDEXED; break; case CkDDT_HINDEXED: type = new CkDDT_HIndexed(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_HINDEXED; break; case CkDDT_STRUCT: type = new CkDDT_Struct(static_cast(*dttype)); type->setSize(lb, extent); + typeClass = CkDDT_STRUCT; break; default: type = new CkDDT_DataType(*dttype, lb, extent); + typeClass = dttype->getType(); break; } - *newType = insertType(type, types[oldtype]); + *newType = insertType(type, typeClass); } void CkDDT::newContiguous(int count, MPI_Datatype oldType, MPI_Datatype *newType) noexcept { - CkDDT_DataType *type = new CkDDT_Contiguous(count, oldType, typeTable[oldType]); + CkDDT_DataType *type = new CkDDT_Contiguous(count, oldType, getType(oldType)); *newType = insertType(type, CkDDT_CONTIGUOUS); } @@ -233,7 +255,7 @@ void CkDDT::newVector(int count, int blocklength, int stride, MPI_Datatype oldType, MPI_Datatype* newType) noexcept { - CkDDT_DataType* type = new CkDDT_Vector(count, blocklength, stride, oldType, typeTable[oldType]); + CkDDT_DataType* type = new CkDDT_Vector(count, blocklength, stride, oldType, getType(oldType)); *newType = insertType(type, CkDDT_VECTOR); } @@ -241,7 +263,7 @@ void CkDDT::newHVector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype* newType) noexcept { - CkDDT_DataType* type = new CkDDT_HVector(count, blocklength, stride, oldtype, typeTable[oldtype]); + CkDDT_DataType* type = new CkDDT_HVector(count, blocklength, stride, oldtype, getType(oldtype)); *newType = insertType(type, CkDDT_HVECTOR); } @@ -252,7 +274,7 @@ CkDDT::newIndexedBlock(int count, int Blocklength, const int *arrDisp, MPI_Datat // Convert arrDisp from an array of int's to an array of MPI_Aint's. This is needed because // MPI_Type_create_indexed_block takes ints and MPI_Type_create_hindexed_block takes MPI_Aint's // and we use HIndexed_Block to represent both of those datatypes internally. - CkDDT_DataType* oldtype = typeTable[oldtypeIdx]; + CkDDT_DataType* oldtype = getType(oldtypeIdx); std::vector arrDispBytes(count); for (int i=0; i(arrDisp[i] * oldtype->getExtent()); @@ -267,7 +289,7 @@ CkDDT::newHIndexedBlock(int count, int Blocklength, const MPI_Aint *arrDisp, MPI MPI_Datatype *newType) noexcept { CkDDT_DataType *type = new CkDDT_HIndexed_Block(count, Blocklength, arrDisp, - oldtype, typeTable[oldtype]); + oldtype, getType(oldtype)); *newType = insertType(type, CkDDT_HINDEXED_BLOCK); } @@ -275,7 +297,7 @@ void CkDDT::newIndexed(int count, const int* arrbLength, MPI_Aint* arrDisp, MPI_Datatype oldtypeIdx, MPI_Datatype* newType) noexcept { - CkDDT_DataType* oldtype = typeTable[oldtypeIdx]; + CkDDT_DataType* oldtype = getType(oldtypeIdx); vector dispBytesArr(count); for (int i=0; igetExtent(); @@ -289,7 +311,7 @@ void CkDDT::newHIndexed(int count, const int* arrbLength, const MPI_Aint* arrDisp, MPI_Datatype oldtype, MPI_Datatype* newType) noexcept { - CkDDT_DataType* type = new CkDDT_HIndexed(count, arrbLength, arrDisp, oldtype, typeTable[oldtype]); + CkDDT_DataType* type = new CkDDT_HIndexed(count, arrbLength, arrDisp, oldtype, getType(oldtype)); *newType = insertType(type, CkDDT_HINDEXED); } @@ -1353,13 +1375,15 @@ CkDDT_HIndexed::getNumBasicElements(int bytes) const noexcept } CkDDT_Struct::CkDDT_Struct(int nCount, const int* arrBlock, const MPI_Aint* arrDisp, - const int *bindex, CkDDT_DataType** arrBase) noexcept + const int *bindex, CkDDT_DataType** arrBase, + const char* name/*=nullptr*/) noexcept : CkDDT_DataType(CkDDT_STRUCT, 0, 0, nCount, 0, 0, 0, 0, 0, NULL, 0, 0, 0, 0), arrayBlockLength(nCount), arrayDisplacements(nCount), index(nCount), arrayDataType(nCount) { + if (name != nullptr) setName(name); int saveExtent = 0; for (int i=0; i 0); - if (datatype > CkDDT_MAX_PRIMITIVE_TYPE) { + if (datatype > AMPI_MAX_PREDEFINED_TYPE) { refCount++; } } @@ -194,7 +195,7 @@ class CkDDT_DataType // Callers of this function should always check its return // value and free the type only if it returns 0. CkAssert(refCount > 0); - if (datatype > CkDDT_MAX_PRIMITIVE_TYPE) { + if (datatype > AMPI_MAX_PREDEFINED_TYPE) { return --refCount; } return -1; @@ -416,7 +417,7 @@ class CkDDT_Struct final : public CkDDT_DataType ~CkDDT_Struct() override = default; CkDDT_Struct& operator=(const CkDDT_Struct& obj) noexcept; CkDDT_Struct(int count, const int* arrBlock, const MPI_Aint* arrDisp, const int *index, - CkDDT_DataType **type) noexcept; + CkDDT_DataType **type, const char* name=nullptr) noexcept; CkDDT_Struct(const CkDDT_Struct &obj, MPI_Aint _lb, MPI_Aint _extent) noexcept; vector& getBaseIndices() noexcept { return index; } @@ -432,69 +433,82 @@ class CkDDT_Struct final : public CkDDT_DataType }; /* - * This class maintains the table of all datatypes (primitive and derived). - * The first 'CkDDT_MAX_PRIMITIVE_TYPE' entries of the table contain primitive datatypes. + * This class maintains the table of all datatypes (predefined and user-defined). * - * typeTable - holds the CkDDT_DataType object pointers - * types - used to identify which CkDDT_DataType derived class a type object is, for PUP + * predefinedTypeTable - a reference to a const array declared as a static global variable + * (to minimize per-rank memory fooprint), which holds the CkDDT_DataType + * object pointers for all predefined types. + * userTypeTable - a vector that holds the CkDDT_DataType object pointers for all user-defined types + * types - used to identify which CkDDT_DataType derived class a type object really is, + * for PUPing the userTypeTable */ class CkDDT { private: - vector typeTable; + const array& predefinedTypeTable; + vector userTypeTable; vector types; - void addBasic(int type) noexcept { - CkAssert(types.size() > type && types[type] == MPI_DATATYPE_NULL); - typeTable[type] = new CkDDT_DataType(type); - types[type] = type; + public: + // static methods used by ampi.C for predefined types creation: + static + void addBasic(array& predefinedTypeTable_, + int type) noexcept + { + CkAssert(type >= 0); + CkAssert(type <= AMPI_MAX_BASIC_TYPE); + CkAssert(type <= AMPI_MAX_PREDEFINED_TYPE); + predefinedTypeTable_[type] = new CkDDT_DataType(type); } - void addStruct(const char* name, int type, int val, int idx, int offset) noexcept { - CkAssert(types.size() > type && types[type] == MPI_DATATYPE_NULL); - const int bLengths[2] = {1, 1}; - MPI_Datatype bTypes[2] = {val, idx}; - CkDDT_DataType* nTypes[2] = {getType(val), getType(idx)}; - MPI_Aint offsets[2] = {0, offset}; - typeTable[type] = new CkDDT_Struct(2, bLengths, offsets, bTypes, nTypes); - typeTable[type]->setName(name); - types[type] = CkDDT_STRUCT; + static + void addStruct(array& predefinedTypeTable_, + const char* name, int type, int val, int idx, int offset) noexcept + { + CkAssert(type > AMPI_MAX_BASIC_TYPE); + CkAssert(type <= AMPI_MAX_PREDEFINED_TYPE); + const int bLengths[2] = {1, 1}; + MPI_Datatype bTypes[2] = {val, idx}; + CkDDT_DataType* nTypes[2] = {const_cast(predefinedTypeTable_[val]), const_cast(predefinedTypeTable_[idx])}; + MPI_Aint offsets[2] = {0, offset}; + predefinedTypeTable_[type] = new CkDDT_Struct(2, bLengths, offsets, bTypes, nTypes, name); } - public: - - CkDDT() noexcept : typeTable(CkDDT_MAX_PRIMITIVE_TYPE+1, nullptr), types(CkDDT_MAX_PRIMITIVE_TYPE+1, MPI_DATATYPE_NULL) + static + const array createPredefinedTypes() noexcept { - addBasic(MPI_DOUBLE); - addBasic(MPI_INT); - addBasic(MPI_FLOAT); - addBasic(MPI_LOGICAL); - addBasic(MPI_C_BOOL); - addBasic(MPI_CHAR); - addBasic(MPI_BYTE); - addBasic(MPI_PACKED); - addBasic(MPI_SHORT); - addBasic(MPI_LONG); - addBasic(MPI_UNSIGNED_CHAR); - addBasic(MPI_UNSIGNED_SHORT); - addBasic(MPI_UNSIGNED); - addBasic(MPI_UNSIGNED_LONG); - addBasic(MPI_LONG_DOUBLE); - addBasic(MPI_LONG_LONG_INT); - addBasic(MPI_SIGNED_CHAR); - addBasic(MPI_UNSIGNED_LONG_LONG); - addBasic(MPI_WCHAR); - addBasic(MPI_INT8_T); - addBasic(MPI_INT16_T); - addBasic(MPI_INT32_T); - addBasic(MPI_INT64_T); - addBasic(MPI_UINT8_T); - addBasic(MPI_UINT16_T); - addBasic(MPI_UINT32_T); - addBasic(MPI_UINT64_T); - addBasic(MPI_AINT); - addBasic(MPI_LB); - addBasic(MPI_UB); + array predefinedTypeTable_; + + addBasic(predefinedTypeTable_, MPI_DOUBLE); + addBasic(predefinedTypeTable_, MPI_INT); + addBasic(predefinedTypeTable_, MPI_FLOAT); + addBasic(predefinedTypeTable_, MPI_LOGICAL); + addBasic(predefinedTypeTable_, MPI_C_BOOL); + addBasic(predefinedTypeTable_, MPI_CHAR); + addBasic(predefinedTypeTable_, MPI_BYTE); + addBasic(predefinedTypeTable_, MPI_PACKED); + addBasic(predefinedTypeTable_, MPI_SHORT); + addBasic(predefinedTypeTable_, MPI_LONG); + addBasic(predefinedTypeTable_, MPI_UNSIGNED_CHAR); + addBasic(predefinedTypeTable_, MPI_UNSIGNED_SHORT); + addBasic(predefinedTypeTable_, MPI_UNSIGNED); + addBasic(predefinedTypeTable_, MPI_UNSIGNED_LONG); + addBasic(predefinedTypeTable_, MPI_LONG_DOUBLE); + addBasic(predefinedTypeTable_, MPI_LONG_LONG_INT); + addBasic(predefinedTypeTable_, MPI_SIGNED_CHAR); + addBasic(predefinedTypeTable_, MPI_UNSIGNED_LONG_LONG); + addBasic(predefinedTypeTable_, MPI_WCHAR); + addBasic(predefinedTypeTable_, MPI_INT8_T); + addBasic(predefinedTypeTable_, MPI_INT16_T); + addBasic(predefinedTypeTable_, MPI_INT32_T); + addBasic(predefinedTypeTable_, MPI_INT64_T); + addBasic(predefinedTypeTable_, MPI_UINT8_T); + addBasic(predefinedTypeTable_, MPI_UINT16_T); + addBasic(predefinedTypeTable_, MPI_UINT32_T); + addBasic(predefinedTypeTable_, MPI_UINT64_T); + addBasic(predefinedTypeTable_, MPI_AINT); + addBasic(predefinedTypeTable_, MPI_LB); + addBasic(predefinedTypeTable_, MPI_UB); /* * The following types have multiple elements, for serialize to know where to write data @@ -503,46 +517,48 @@ class CkDDT // Contiguous: typedef struct { int val; int idx; } IntInt; - addStruct("MPI_2INT", MPI_2INT, MPI_INT, MPI_INT, offsetof(IntInt, idx)); + addStruct(predefinedTypeTable_, "MPI_2INT", MPI_2INT, MPI_INT, MPI_INT, offsetof(IntInt, idx)); typedef struct { float val; float idx; } FloatFloat; - addStruct("MPI_2FLOAT", MPI_2FLOAT, MPI_FLOAT, MPI_FLOAT, offsetof(FloatFloat, idx)); + addStruct(predefinedTypeTable_, "MPI_2FLOAT", MPI_2FLOAT, MPI_FLOAT, MPI_FLOAT, offsetof(FloatFloat, idx)); typedef struct { double val; double idx; } DoubleDouble; - addStruct("MPI_2DOUBLE", MPI_2DOUBLE, MPI_DOUBLE, MPI_DOUBLE, offsetof(DoubleDouble, idx)); + addStruct(predefinedTypeTable_, "MPI_2DOUBLE", MPI_2DOUBLE, MPI_DOUBLE, MPI_DOUBLE, offsetof(DoubleDouble, idx)); typedef struct { float val; int idx; } FloatInt; - addStruct("MPI_FLOAT_INT", MPI_FLOAT_INT, MPI_FLOAT, MPI_INT, offsetof(FloatInt, idx)); + addStruct(predefinedTypeTable_, "MPI_FLOAT_INT", MPI_FLOAT_INT, MPI_FLOAT, MPI_INT, offsetof(FloatInt, idx)); // Non-contiguous: typedef struct { double val; int idx; } DoubleInt; - addStruct("MPI_DOUBLE_INT", MPI_DOUBLE_INT, MPI_DOUBLE, MPI_INT, offsetof(DoubleInt, idx)); + addStruct(predefinedTypeTable_, "MPI_DOUBLE_INT", MPI_DOUBLE_INT, MPI_DOUBLE, MPI_INT, offsetof(DoubleInt, idx)); typedef struct { long val; int idx; } LongInt; - addStruct("MPI_LONG_INT", MPI_LONG_INT, MPI_LONG, MPI_INT, offsetof(LongInt, idx)); + addStruct(predefinedTypeTable_, "MPI_LONG_INT", MPI_LONG_INT, MPI_LONG, MPI_INT, offsetof(LongInt, idx)); typedef struct { short val; int idx; } ShortInt; - addStruct("MPI_SHORT_INT", MPI_SHORT_INT, MPI_SHORT, MPI_INT, offsetof(ShortInt, idx)); + addStruct(predefinedTypeTable_, "MPI_SHORT_INT", MPI_SHORT_INT, MPI_SHORT, MPI_INT, offsetof(ShortInt, idx)); typedef struct { long double val; int idx; } LongdoubleInt; - addStruct("MPI_LONG_DOUBLE_INT", MPI_LONG_DOUBLE_INT, MPI_LONG_DOUBLE, MPI_INT, + addStruct(predefinedTypeTable_, "MPI_LONG_DOUBLE_INT", MPI_LONG_DOUBLE_INT, MPI_LONG_DOUBLE, MPI_INT, offsetof(LongdoubleInt, idx)); // Complex datatypes: typedef struct { float val; float idx; } FloatComplex; - addStruct("MPI_FLOAT_COMPLEX", MPI_FLOAT_COMPLEX, MPI_FLOAT, MPI_FLOAT, + addStruct(predefinedTypeTable_, "MPI_FLOAT_COMPLEX", MPI_FLOAT_COMPLEX, MPI_FLOAT, MPI_FLOAT, offsetof(FloatComplex, idx)); - addStruct("MPI_COMPLEX", MPI_COMPLEX, MPI_FLOAT, MPI_FLOAT, offsetof(FloatComplex, idx)); + addStruct(predefinedTypeTable_, "MPI_COMPLEX", MPI_COMPLEX, MPI_FLOAT, MPI_FLOAT, offsetof(FloatComplex, idx)); typedef struct { double val; double idx; } DoubleComplex; - addStruct("MPI_DOUBLE_COMPLEX", MPI_DOUBLE_COMPLEX, MPI_DOUBLE, MPI_DOUBLE, + addStruct(predefinedTypeTable_, "MPI_DOUBLE_COMPLEX", MPI_DOUBLE_COMPLEX, MPI_DOUBLE, MPI_DOUBLE, offsetof(DoubleComplex, idx)); typedef struct { long double val; long double idx; } LongDoubleComplex; - addStruct("MPI_LONG_DOUBLE_COMPLEX", MPI_LONG_DOUBLE_COMPLEX, MPI_LONG_DOUBLE, MPI_LONG_DOUBLE, + addStruct(predefinedTypeTable_, "MPI_LONG_DOUBLE_COMPLEX", MPI_LONG_DOUBLE_COMPLEX, MPI_LONG_DOUBLE, MPI_LONG_DOUBLE, offsetof(LongDoubleComplex, idx)); + return predefinedTypeTable_; } + CkDDT(const array& predefinedTypeTable_) noexcept : predefinedTypeTable(predefinedTypeTable_) {} CkDDT& operator=(const CkDDT &obj) = default; CkDDT(const CkDDT &obj) = default; ~CkDDT() noexcept; @@ -574,11 +590,14 @@ class CkDDT int array_of_integers[], MPI_Aint array_of_addresses[], int array_of_datatypes[]) noexcept; CkDDT_DataType* getType(int nIndex) const noexcept { - #if CMK_ERROR_CHECKING - if (nIndex < 0 || nIndex > typeTable.size()) - CkAbort("AMPI> invalid datatype index passed to getType!"); - #endif - return typeTable[nIndex]; + if (nIndex <= AMPI_MAX_PREDEFINED_TYPE) { + CkAssert(nIndex >= 0); + return const_cast(predefinedTypeTable[nIndex]); + } + else { + CkAssert((nIndex - AMPI_MAX_PREDEFINED_TYPE - 1) < userTypeTable.size()); + return userTypeTable[nIndex - AMPI_MAX_PREDEFINED_TYPE - 1]; + } } bool isContig(int nIndex) const noexcept { return getType(nIndex)->isContig(); } -- 2.11.4.GIT