mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / ndb / src / kernel / blocks / dbdih / DbdihMain.cpp
blob3c293103c261c400e9b7950e2dc3779ba57b4de6
1 /* Copyright (c) 2003-2007 MySQL AB
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
16 #define DBDIH_C
17 #include <ndb_limits.h>
18 #include <ndb_version.h>
19 #include <NdbOut.hpp>
21 #include "Dbdih.hpp"
22 #include "Configuration.hpp"
24 #include <signaldata/BlockCommitOrd.hpp>
25 #include <signaldata/CheckNodeGroups.hpp>
26 #include <signaldata/CreateFrag.hpp>
27 #include <signaldata/CopyActive.hpp>
28 #include <signaldata/CopyFrag.hpp>
29 #include <signaldata/CopyGCIReq.hpp>
30 #include <signaldata/DiAddTab.hpp>
31 #include <signaldata/DictStart.hpp>
32 #include <signaldata/DiGetNodes.hpp>
33 #include <signaldata/DihContinueB.hpp>
34 #include <signaldata/DihSwitchReplica.hpp>
35 #include <signaldata/DumpStateOrd.hpp>
36 #include <signaldata/EmptyLcp.hpp>
37 #include <signaldata/EndTo.hpp>
38 #include <signaldata/EventReport.hpp>
39 #include <signaldata/GCPSave.hpp>
40 #include <signaldata/HotSpareRep.hpp>
41 #include <signaldata/MasterGCP.hpp>
42 #include <signaldata/MasterLCP.hpp>
43 #include <signaldata/NFCompleteRep.hpp>
44 #include <signaldata/NodeFailRep.hpp>
45 #include <signaldata/ReadNodesConf.hpp>
46 #include <signaldata/StartFragReq.hpp>
47 #include <signaldata/StartInfo.hpp>
48 #include <signaldata/StartMe.hpp>
49 #include <signaldata/StartPerm.hpp>
50 #include <signaldata/StartRec.hpp>
51 #include <signaldata/StartTo.hpp>
52 #include <signaldata/StopPerm.hpp>
53 #include <signaldata/StopMe.hpp>
54 #include <signaldata/TestOrd.hpp>
55 #include <signaldata/UpdateTo.hpp>
56 #include <signaldata/WaitGCP.hpp>
57 #include <signaldata/DihStartTab.hpp>
58 #include <signaldata/LCP.hpp>
59 #include <signaldata/SystemError.hpp>
61 #include <signaldata/DropTab.hpp>
62 #include <signaldata/AlterTab.hpp>
63 #include <signaldata/PrepDropTab.hpp>
64 #include <signaldata/SumaImpl.hpp>
65 #include <signaldata/DictTabInfo.hpp>
66 #include <signaldata/CreateFragmentation.hpp>
67 #include <signaldata/LqhFrag.hpp>
68 #include <signaldata/FsOpenReq.hpp>
69 #include <signaldata/DihFragCount.hpp>
70 #include <signaldata/DictLock.hpp>
71 #include <DebuggerNames.hpp>
73 #include <EventLogger.hpp>
74 extern EventLogger g_eventLogger;
76 #define SYSFILE ((Sysfile *)&sysfileData[0])
78 #define RETURN_IF_NODE_NOT_ALIVE(node) \
79 if (!checkNodeAlive((node))) { \
80 jam(); \
81 return; \
82 } \
84 #define RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverIndex, regTOPtr) \
85 regTOPtr.i = takeOverIndex; \
86 ptrCheckGuard(regTOPtr, MAX_NDB_NODES, takeOverRecord); \
87 if (checkToInterrupted(regTOPtr)) { \
88 jam(); \
89 return; \
90 } \
92 #define receiveLoopMacro(sigName, receiveNodeId)\
93 { \
94 c_##sigName##_Counter.clearWaitingFor(receiveNodeId); \
95 if(c_##sigName##_Counter.done() == false){ \
96 jam(); \
97 return; \
98 } \
101 #define sendLoopMacro(sigName, signalRoutine) \
103 c_##sigName##_Counter.clearWaitingFor(); \
104 NodeRecordPtr specNodePtr; \
105 specNodePtr.i = cfirstAliveNode; \
106 do { \
107 jam(); \
108 ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord); \
109 c_##sigName##_Counter.setWaitingFor(specNodePtr.i); \
110 signalRoutine(signal, specNodePtr.i); \
111 specNodePtr.i = specNodePtr.p->nextNode; \
112 } while (specNodePtr.i != RNIL); \
115 static
116 Uint32
117 prevLcpNo(Uint32 lcpNo){
118 if(lcpNo == 0)
119 return MAX_LCP_STORED - 1;
120 return lcpNo - 1;
123 static
124 Uint32
125 nextLcpNo(Uint32 lcpNo){
126 lcpNo++;
127 if(lcpNo == MAX_LCP_STORED)
128 return 0;
129 return lcpNo;
132 #define gth(x, y) ndbrequire(((int)x)>((int)y))
134 void Dbdih::nullRoutine(Signal* signal, Uint32 nodeId)
136 }//Dbdih::nullRoutine()
138 void Dbdih::sendCOPY_GCIREQ(Signal* signal, Uint32 nodeId)
140 ndbrequire(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE);
142 const BlockReference ref = calcDihBlockRef(nodeId);
143 const Uint32 wordPerSignal = CopyGCIReq::DATA_SIZE;
144 const Uint32 noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
145 wordPerSignal);
147 CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
148 copyGCI->anyData = nodeId;
149 copyGCI->copyReason = c_copyGCIMaster.m_copyReason;
150 copyGCI->startWord = 0;
152 for(Uint32 i = 0; i < noOfSignals; i++) {
153 jam();
154 { // Do copy
155 const int startWord = copyGCI->startWord;
156 for(Uint32 j = 0; j < wordPerSignal; j++) {
157 copyGCI->data[j] = sysfileData[j+startWord];
158 }//for
160 sendSignal(ref, GSN_COPY_GCIREQ, signal, 25, JBB);
161 copyGCI->startWord += wordPerSignal;
162 }//for
163 }//Dbdih::sendCOPY_GCIREQ()
166 void Dbdih::sendDIH_SWITCH_REPLICA_REQ(Signal* signal, Uint32 nodeId)
168 const BlockReference ref = calcDihBlockRef(nodeId);
169 sendSignal(ref, GSN_DIH_SWITCH_REPLICA_REQ, signal,
170 DihSwitchReplicaReq::SignalLength, JBB);
171 }//Dbdih::sendDIH_SWITCH_REPLICA_REQ()
173 void Dbdih::sendEMPTY_LCP_REQ(Signal* signal, Uint32 nodeId)
175 BlockReference ref = calcLqhBlockRef(nodeId);
176 sendSignal(ref, GSN_EMPTY_LCP_REQ, signal, EmptyLcpReq::SignalLength, JBB);
177 }//Dbdih::sendEMPTY_LCPREQ()
179 void Dbdih::sendEND_TOREQ(Signal* signal, Uint32 nodeId)
181 BlockReference ref = calcDihBlockRef(nodeId);
182 sendSignal(ref, GSN_END_TOREQ, signal, EndToReq::SignalLength, JBB);
183 }//Dbdih::sendEND_TOREQ()
185 void Dbdih::sendGCP_COMMIT(Signal* signal, Uint32 nodeId)
187 BlockReference ref = calcDihBlockRef(nodeId);
188 signal->theData[0] = cownNodeId;
189 signal->theData[1] = cnewgcp;
190 sendSignal(ref, GSN_GCP_COMMIT, signal, 2, JBA);
191 }//Dbdih::sendGCP_COMMIT()
193 void Dbdih::sendGCP_PREPARE(Signal* signal, Uint32 nodeId)
195 BlockReference ref = calcDihBlockRef(nodeId);
196 signal->theData[0] = cownNodeId;
197 signal->theData[1] = cnewgcp;
198 sendSignal(ref, GSN_GCP_PREPARE, signal, 2, JBA);
199 }//Dbdih::sendGCP_PREPARE()
201 void Dbdih::sendGCP_SAVEREQ(Signal* signal, Uint32 nodeId)
203 GCPSaveReq * const saveReq = (GCPSaveReq*)&signal->theData[0];
204 BlockReference ref = calcLqhBlockRef(nodeId);
205 saveReq->dihBlockRef = reference();
206 saveReq->dihPtr = nodeId;
207 saveReq->gci = coldgcp;
208 sendSignal(ref, GSN_GCP_SAVEREQ, signal, GCPSaveReq::SignalLength, JBB);
209 }//Dbdih::sendGCP_SAVEREQ()
211 void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
213 BlockReference nodeDihRef = calcDihBlockRef(nodeId);
214 signal->theData[0] = reference();
215 signal->theData[1] = c_nodeStartMaster.startNode;
216 signal->theData[2] = c_nodeStartMaster.failNr;
217 signal->theData[3] = 0;
218 signal->theData[4] = currentgcp;
219 sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
220 }//Dbdih::sendINCL_NODEREQ()
222 void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
224 BlockReference ref = calcDihBlockRef(nodeId);
225 sendSignal(ref, GSN_MASTER_GCPREQ, signal, MasterGCPReq::SignalLength, JBB);
226 }//Dbdih::sendMASTER_GCPREQ()
228 void Dbdih::sendMASTER_LCPREQ(Signal* signal, Uint32 nodeId)
230 BlockReference ref = calcDihBlockRef(nodeId);
231 sendSignal(ref, GSN_MASTER_LCPREQ, signal, MasterLCPReq::SignalLength, JBB);
232 }//Dbdih::sendMASTER_LCPREQ()
234 void Dbdih::sendSTART_INFOREQ(Signal* signal, Uint32 nodeId)
236 const BlockReference ref = calcDihBlockRef(nodeId);
237 sendSignal(ref, GSN_START_INFOREQ, signal, StartInfoReq::SignalLength, JBB);
238 }//sendSTART_INFOREQ()
240 void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId)
242 StartRecReq * const req = (StartRecReq*)&signal->theData[0];
243 BlockReference ref = calcLqhBlockRef(nodeId);
244 req->receivingNodeId = nodeId;
245 req->senderRef = reference();
246 req->keepGci = SYSFILE->keepGCI;
247 req->lastCompletedGci = SYSFILE->lastCompletedGCI[nodeId];
248 req->newestGci = SYSFILE->newestRestorableGCI;
249 sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB);
251 signal->theData[0] = NDB_LE_StartREDOLog;
252 signal->theData[1] = nodeId;
253 signal->theData[2] = SYSFILE->keepGCI;
254 signal->theData[3] = SYSFILE->lastCompletedGCI[nodeId];
255 signal->theData[4] = SYSFILE->newestRestorableGCI;
256 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5, JBB);
257 }//Dbdih::sendSTART_RECREQ()
259 void Dbdih::sendSTART_TOREQ(Signal* signal, Uint32 nodeId)
261 BlockReference ref = calcDihBlockRef(nodeId);
262 sendSignal(ref, GSN_START_TOREQ, signal, StartToReq::SignalLength, JBB);
263 }//Dbdih::sendSTART_TOREQ()
265 void Dbdih::sendSTOP_ME_REQ(Signal* signal, Uint32 nodeId)
267 if (nodeId != getOwnNodeId()) {
268 jam();
269 const BlockReference ref = calcDihBlockRef(nodeId);
270 sendSignal(ref, GSN_STOP_ME_REQ, signal, StopMeReq::SignalLength, JBB);
271 }//if
272 }//Dbdih::sendSTOP_ME_REQ()
274 void Dbdih::sendTC_CLOPSIZEREQ(Signal* signal, Uint32 nodeId)
276 BlockReference ref = calcTcBlockRef(nodeId);
277 signal->theData[0] = nodeId;
278 signal->theData[1] = reference();
279 sendSignal(ref, GSN_TC_CLOPSIZEREQ, signal, 2, JBB);
280 }//Dbdih::sendTC_CLOPSIZEREQ()
282 void Dbdih::sendTCGETOPSIZEREQ(Signal* signal, Uint32 nodeId)
284 BlockReference ref = calcTcBlockRef(nodeId);
285 signal->theData[0] = nodeId;
286 signal->theData[1] = reference();
287 sendSignal(ref, GSN_TCGETOPSIZEREQ, signal, 2, JBB);
288 }//Dbdih::sendTCGETOPSIZEREQ()
290 void Dbdih::sendUPDATE_TOREQ(Signal* signal, Uint32 nodeId)
292 const BlockReference ref = calcDihBlockRef(nodeId);
293 sendSignal(ref, GSN_UPDATE_TOREQ, signal, UpdateToReq::SignalLength, JBB);
294 }//sendUPDATE_TOREQ()
296 void Dbdih::execCONTINUEB(Signal* signal)
298 jamEntry();
299 switch ((DihContinueB::Type)signal->theData[0]) {
300 case DihContinueB::ZPACK_TABLE_INTO_PAGES:
302 jam();
303 Uint32 tableId = signal->theData[1];
304 packTableIntoPagesLab(signal, tableId);
305 return;
306 break;
308 case DihContinueB::ZPACK_FRAG_INTO_PAGES:
310 RWFragment wf;
311 jam();
312 wf.rwfTabPtr.i = signal->theData[1];
313 ptrCheckGuard(wf.rwfTabPtr, ctabFileSize, tabRecord);
314 wf.fragId = signal->theData[2];
315 wf.pageIndex = signal->theData[3];
316 wf.wordIndex = signal->theData[4];
317 packFragIntoPagesLab(signal, &wf);
318 return;
319 break;
321 case DihContinueB::ZREAD_PAGES_INTO_TABLE:
323 jam();
324 Uint32 tableId = signal->theData[1];
325 readPagesIntoTableLab(signal, tableId);
326 return;
327 break;
329 case DihContinueB::ZREAD_PAGES_INTO_FRAG:
331 RWFragment rf;
332 jam();
333 rf.rwfTabPtr.i = signal->theData[1];
334 ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
335 rf.fragId = signal->theData[2];
336 rf.pageIndex = signal->theData[3];
337 rf.wordIndex = signal->theData[4];
338 readPagesIntoFragLab(signal, &rf);
339 return;
340 break;
342 case DihContinueB::ZCOPY_TABLE:
344 jam();
345 Uint32 tableId = signal->theData[1];
346 copyTableLab(signal, tableId);
347 return;
349 case DihContinueB::ZCOPY_TABLE_NODE:
351 NodeRecordPtr nodePtr;
352 CopyTableNode ctn;
353 jam();
354 ctn.ctnTabPtr.i = signal->theData[1];
355 ptrCheckGuard(ctn.ctnTabPtr, ctabFileSize, tabRecord);
356 nodePtr.i = signal->theData[2];
357 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
358 ctn.pageIndex = signal->theData[3];
359 ctn.wordIndex = signal->theData[4];
360 ctn.noOfWords = signal->theData[5];
361 copyTableNode(signal, &ctn, nodePtr);
362 return;
364 case DihContinueB::ZSTART_FRAGMENT:
366 jam();
367 Uint32 tableId = signal->theData[1];
368 Uint32 fragId = signal->theData[2];
369 startFragment(signal, tableId, fragId);
370 return;
372 case DihContinueB::ZCOMPLETE_RESTART:
373 jam();
374 completeRestartLab(signal);
375 return;
376 case DihContinueB::ZREAD_TABLE_FROM_PAGES:
378 TabRecordPtr tabPtr;
379 jam();
380 tabPtr.i = signal->theData[1];
381 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
382 readTableFromPagesLab(signal, tabPtr);
383 return;
385 case DihContinueB::ZSR_PHASE2_READ_TABLE:
387 TabRecordPtr tabPtr;
388 jam();
389 tabPtr.i = signal->theData[1];
390 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
391 srPhase2ReadTableLab(signal, tabPtr);
392 return;
394 case DihContinueB::ZCHECK_TC_COUNTER:
395 jam();
396 #ifndef NO_LCP
397 checkTcCounterLab(signal);
398 #endif
399 return;
400 case DihContinueB::ZCALCULATE_KEEP_GCI:
402 jam();
403 Uint32 tableId = signal->theData[1];
404 Uint32 fragId = signal->theData[2];
405 calculateKeepGciLab(signal, tableId, fragId);
406 return;
408 case DihContinueB::ZSTORE_NEW_LCP_ID:
409 jam();
410 storeNewLcpIdLab(signal);
411 return;
412 case DihContinueB::ZTABLE_UPDATE:
414 TabRecordPtr tabPtr;
415 jam();
416 tabPtr.i = signal->theData[1];
417 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
418 tableUpdateLab(signal, tabPtr);
419 return;
421 case DihContinueB::ZCHECK_LCP_COMPLETED:
423 jam();
424 checkLcpCompletedLab(signal);
425 return;
427 case DihContinueB::ZINIT_LCP:
429 jam();
430 Uint32 senderRef = signal->theData[1];
431 Uint32 tableId = signal->theData[2];
432 initLcpLab(signal, senderRef, tableId);
433 return;
435 case DihContinueB::ZADD_TABLE_MASTER_PAGES:
437 TabRecordPtr tabPtr;
438 jam();
439 tabPtr.i = signal->theData[1];
440 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
441 tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_MASTER;
442 tableUpdateLab(signal, tabPtr);
443 return;
444 break;
446 case DihContinueB::ZDIH_ADD_TABLE_MASTER:
448 jam();
449 addTable_closeConf(signal, signal->theData[1]);
450 return;
452 case DihContinueB::ZADD_TABLE_SLAVE_PAGES:
454 TabRecordPtr tabPtr;
455 jam();
456 tabPtr.i = signal->theData[1];
457 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
458 tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_SLAVE;
459 tableUpdateLab(signal, tabPtr);
460 return;
462 case DihContinueB::ZDIH_ADD_TABLE_SLAVE:
464 ndbrequire(false);
465 return;
467 case DihContinueB::ZSTART_GCP:
468 jam();
469 #ifndef NO_GCP
470 startGcpLab(signal, signal->theData[1]);
471 #endif
472 return;
473 break;
474 case DihContinueB::ZCOPY_GCI:{
475 jam();
476 CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)signal->theData[1];
477 ndbrequire(c_copyGCIMaster.m_copyReason == reason);
478 sendLoopMacro(COPY_GCIREQ, sendCOPY_GCIREQ);
479 return;
481 break;
482 case DihContinueB::ZEMPTY_VERIFY_QUEUE:
483 jam();
484 emptyverificbuffer(signal, true);
485 return;
486 break;
487 case DihContinueB::ZCHECK_GCP_STOP:
488 jam();
489 #ifndef NO_GCP
490 checkGcpStopLab(signal);
491 #endif
492 return;
493 break;
494 case DihContinueB::ZREMOVE_NODE_FROM_TABLE:
496 jam();
497 Uint32 nodeId = signal->theData[1];
498 Uint32 tableId = signal->theData[2];
499 removeNodeFromTables(signal, nodeId, tableId);
500 return;
502 case DihContinueB::ZCOPY_NODE:
504 jam();
505 Uint32 tableId = signal->theData[1];
506 copyNodeLab(signal, tableId);
507 return;
509 case DihContinueB::ZSTART_TAKE_OVER:
511 jam();
512 Uint32 takeOverPtrI = signal->theData[1];
513 Uint32 startNode = signal->theData[2];
514 Uint32 toNode = signal->theData[3];
515 startTakeOver(signal, takeOverPtrI, startNode, toNode);
516 return;
517 break;
519 case DihContinueB::ZCHECK_START_TAKE_OVER:
520 jam();
521 checkStartTakeOver(signal);
522 break;
523 case DihContinueB::ZTO_START_COPY_FRAG:
525 jam();
526 Uint32 takeOverPtrI = signal->theData[1];
527 startNextCopyFragment(signal, takeOverPtrI);
528 return;
530 case DihContinueB::ZINVALIDATE_NODE_LCP:
532 jam();
533 const Uint32 nodeId = signal->theData[1];
534 const Uint32 tableId = signal->theData[2];
535 invalidateNodeLCP(signal, nodeId, tableId);
536 return;
538 case DihContinueB::ZINITIALISE_RECORDS:
539 jam();
540 initialiseRecordsLab(signal,
541 signal->theData[1],
542 signal->theData[2],
543 signal->theData[3]);
544 return;
545 break;
546 case DihContinueB::ZSTART_PERMREQ_AGAIN:
547 jam();
548 nodeRestartPh2Lab2(signal);
549 return;
550 break;
551 case DihContinueB::SwitchReplica:
553 jam();
554 const Uint32 nodeId = signal->theData[1];
555 const Uint32 tableId = signal->theData[2];
556 const Uint32 fragNo = signal->theData[3];
557 switchReplica(signal, nodeId, tableId, fragNo);
558 return;
560 case DihContinueB::ZSEND_START_TO:
562 jam();
563 Uint32 takeOverPtrI = signal->theData[1];
564 sendStartTo(signal, takeOverPtrI);
565 return;
567 case DihContinueB::ZSEND_ADD_FRAG:
569 jam();
570 Uint32 takeOverPtrI = signal->theData[1];
571 toCopyFragLab(signal, takeOverPtrI);
572 return;
574 case DihContinueB::ZSEND_UPDATE_TO:
576 jam();
577 Uint32 takeOverPtrI = signal->theData[1];
578 Uint32 updateState = signal->theData[4];
579 sendUpdateTo(signal, takeOverPtrI, updateState);
580 return;
582 case DihContinueB::ZSEND_END_TO:
584 jam();
585 Uint32 takeOverPtrI = signal->theData[1];
586 sendEndTo(signal, takeOverPtrI);
587 return;
589 case DihContinueB::ZSEND_CREATE_FRAG:
591 jam();
592 Uint32 takeOverPtrI = signal->theData[1];
593 Uint32 storedType = signal->theData[2];
594 Uint32 startGci = signal->theData[3];
595 sendCreateFragReq(signal, startGci, storedType, takeOverPtrI);
596 return;
598 case DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE:{
599 jam();
600 TabRecordPtr tabPtr;
601 tabPtr.i = signal->theData[1];
602 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
603 waitDropTabWritingToFile(signal, tabPtr);
604 return;
606 case DihContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:{
607 jam();
608 Uint32 nodeId = signal->theData[1];
609 Uint32 tableId = signal->theData[2];
610 checkWaitDropTabFailedLqh(signal, nodeId, tableId);
611 return;
613 case DihContinueB::ZTO_START_FRAGMENTS:
615 TakeOverRecordPtr takeOverPtr;
616 takeOverPtr.i = signal->theData[1];
617 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
618 nr_start_fragments(signal, takeOverPtr);
619 return;
621 }//switch
623 ndbrequire(false);
624 return;
625 }//Dbdih::execCONTINUEB()
627 void Dbdih::execCOPY_GCIREQ(Signal* signal)
629 CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
630 jamEntry();
631 CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)copyGCI->copyReason;
632 const Uint32 tstart = copyGCI->startWord;
634 ndbrequire(cmasterdihref == signal->senderBlockRef()) ;
635 ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
636 ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
637 ndbrequire(reason != CopyGCIReq::IDLE);
638 bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
640 if (ERROR_INSERTED(7177))
642 jam();
644 if (signal->getLength() == 3)
646 jam();
647 goto done;
651 arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
652 for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
653 cdata[tstart+i] = copyGCI->data[i];
655 if (ERROR_INSERTED(7177) && isMaster() && isdone)
657 sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
658 return;
661 done:
662 if (isdone)
664 jam();
665 c_copyGCISlave.m_expectedNextWord = 0;
667 else
669 jam();
670 c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
671 return;
674 if (cmasterdihref != reference())
676 jam();
677 Uint32 tmp= SYSFILE->m_restart_seq;
678 memcpy(sysfileData, cdata, sizeof(sysfileData));
679 SYSFILE->m_restart_seq = tmp;
681 if (c_set_initial_start_flag)
683 jam();
684 Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
688 c_copyGCISlave.m_copyReason = reason;
689 c_copyGCISlave.m_senderRef = signal->senderBlockRef();
690 c_copyGCISlave.m_senderData = copyGCI->anyData;
692 CRASH_INSERTION2(7020, reason==CopyGCIReq::LOCAL_CHECKPOINT);
693 CRASH_INSERTION2(7008, reason==CopyGCIReq::GLOBAL_CHECKPOINT);
695 /* -------------------------------------------------------------------------*/
696 /* WE SET THE REQUESTER OF THE COPY GCI TO THE CURRENT MASTER. IF THE */
697 /* CURRENT MASTER WE DO NOT WANT THE NEW MASTER TO RECEIVE CONFIRM OF */
698 /* SOMETHING HE HAS NOT SENT. THE TAKE OVER MUST BE CAREFUL. */
699 /* -------------------------------------------------------------------------*/
700 bool ok = false;
701 switch(reason){
702 case CopyGCIReq::IDLE:
703 ok = true;
704 jam();
705 ndbrequire(false);
706 break;
707 case CopyGCIReq::LOCAL_CHECKPOINT: {
708 ok = true;
709 jam();
710 c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
711 c_lcpState.m_masterLcpDihRef = cmasterdihref;
712 setNodeInfo(signal);
713 break;
715 case CopyGCIReq::RESTART: {
716 ok = true;
717 jam();
718 coldgcp = SYSFILE->newestRestorableGCI;
719 crestartGci = SYSFILE->newestRestorableGCI;
720 c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
721 Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
722 currentgcp = coldgcp + 1;
723 cnewgcp = coldgcp + 1;
724 setNodeInfo(signal);
725 if ((Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))) {
726 jam();
727 /* -------------------------------------------------------------------- */
728 // IF THERE WAS A LOCAL CHECKPOINT ONGOING AT THE CRASH MOMENT WE WILL
729 // INVALIDATE THAT LOCAL CHECKPOINT.
730 /* -------------------------------------------------------------------- */
731 invalidateLcpInfoAfterSr();
732 }//if
733 break;
735 case CopyGCIReq::GLOBAL_CHECKPOINT: {
736 ok = true;
737 jam();
738 cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED;
739 c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
740 setNodeInfo(signal);
741 break;
742 }//if
743 case CopyGCIReq::INITIAL_START_COMPLETED:
744 ok = true;
745 jam();
746 break;
748 ndbrequire(ok);
750 CRASH_INSERTION(7183);
752 if (ERROR_INSERTED(7185) && reason==CopyGCIReq::GLOBAL_CHECKPOINT)
754 jam();
755 return;
758 /* ----------------------------------------------------------------------- */
759 /* WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE. */
760 /* ----------------------------------------------------------------------- */
761 FileRecordPtr filePtr;
762 filePtr.i = crestartInfoFile[0];
763 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
764 if (filePtr.p->fileStatus == FileRecord::OPEN) {
765 jam();
766 openingCopyGciSkipInitLab(signal, filePtr);
767 return;
768 }//if
769 openFileRw(signal, filePtr);
770 filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
771 return;
772 }//Dbdih::execCOPY_GCIREQ()
774 void Dbdih::execDICTSTARTCONF(Signal* signal)
776 jamEntry();
777 Uint32 nodeId = refToNode(signal->getSendersBlockRef());
778 if (nodeId != getOwnNodeId()) {
779 jam();
780 nodeDictStartConfLab(signal);
781 } else {
782 jam();
783 dictStartConfLab(signal);
784 }//if
785 }//Dbdih::execDICTSTARTCONF()
787 void Dbdih::execFSCLOSECONF(Signal* signal)
789 FileRecordPtr filePtr;
790 jamEntry();
791 filePtr.i = signal->theData[0];
792 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
793 filePtr.p->fileStatus = FileRecord::CLOSED;
794 FileRecord::ReqStatus status = filePtr.p->reqStatus;
795 filePtr.p->reqStatus = FileRecord::IDLE;
796 switch (status) {
797 case FileRecord::CLOSING_GCP:
798 jam();
799 closingGcpLab(signal, filePtr);
800 break;
801 case FileRecord::CLOSING_GCP_CRASH:
802 jam();
803 closingGcpCrashLab(signal, filePtr);
804 break;
805 case FileRecord::CLOSING_TABLE_CRASH:
806 jam();
807 closingTableCrashLab(signal, filePtr);
808 break;
809 case FileRecord::CLOSING_TABLE_SR:
810 jam();
811 closingTableSrLab(signal, filePtr);
812 break;
813 case FileRecord::TABLE_CLOSE:
814 jam();
815 tableCloseLab(signal, filePtr);
816 break;
817 case FileRecord::TABLE_CLOSE_DELETE:
818 jam();
819 tableDeleteLab(signal, filePtr);
820 break;
821 default:
822 ndbrequire(false);
823 break;
824 }//switch
825 return;
826 }//Dbdih::execFSCLOSECONF()
828 void Dbdih::execFSCLOSEREF(Signal* signal)
830 FileRecordPtr filePtr;
831 jamEntry();
832 filePtr.i = signal->theData[0];
833 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
834 FileRecord::ReqStatus status = filePtr.p->reqStatus;
835 filePtr.p->reqStatus = FileRecord::IDLE;
836 switch (status) {
837 case FileRecord::CLOSING_GCP:
838 jam();
839 break;
840 case FileRecord::CLOSING_GCP_CRASH:
841 jam();
842 closingGcpCrashLab(signal, filePtr);
843 return;
844 case FileRecord::CLOSING_TABLE_CRASH:
845 jam();
846 closingTableCrashLab(signal, filePtr);
847 return;
848 case FileRecord::CLOSING_TABLE_SR:
849 jam();
850 break;
851 case FileRecord::TABLE_CLOSE:
852 jam();
853 break;
854 case FileRecord::TABLE_CLOSE_DELETE:
855 jam();
856 break;
857 default:
858 jam();
859 break;
861 }//switch
863 char msg[100];
864 sprintf(msg, "File system close failed during FileRecord status %d", (Uint32)status);
865 fsRefError(signal,__LINE__,msg);
868 return;
869 }//Dbdih::execFSCLOSEREF()
871 void Dbdih::execFSOPENCONF(Signal* signal)
873 FileRecordPtr filePtr;
874 jamEntry();
875 filePtr.i = signal->theData[0];
876 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
877 filePtr.p->fileRef = signal->theData[1];
878 filePtr.p->fileStatus = FileRecord::OPEN;
879 FileRecord::ReqStatus status = filePtr.p->reqStatus;
880 filePtr.p->reqStatus = FileRecord::IDLE;
881 switch (status) {
882 case FileRecord::CREATING_GCP:
883 jam();
884 creatingGcpLab(signal, filePtr);
885 break;
886 case FileRecord::OPENING_COPY_GCI:
887 jam();
888 openingCopyGciSkipInitLab(signal, filePtr);
889 break;
890 case FileRecord::CREATING_COPY_GCI:
891 jam();
892 openingCopyGciSkipInitLab(signal, filePtr);
893 break;
894 case FileRecord::OPENING_GCP:
895 jam();
896 openingGcpLab(signal, filePtr);
897 break;
898 case FileRecord::OPENING_TABLE:
899 jam();
900 openingTableLab(signal, filePtr);
901 break;
902 case FileRecord::TABLE_CREATE:
903 jam();
904 tableCreateLab(signal, filePtr);
905 break;
906 case FileRecord::TABLE_OPEN_FOR_DELETE:
907 jam();
908 tableOpenLab(signal, filePtr);
909 break;
910 default:
911 ndbrequire(false);
912 break;
913 }//switch
914 return;
915 }//Dbdih::execFSOPENCONF()
917 void Dbdih::execFSOPENREF(Signal* signal)
919 FileRecordPtr filePtr;
920 jamEntry();
921 filePtr.i = signal->theData[0];
922 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
923 FileRecord::ReqStatus status = filePtr.p->reqStatus;
924 filePtr.p->reqStatus = FileRecord::IDLE;
925 switch (status) {
926 case FileRecord::CREATING_GCP:
927 /* --------------------------------------------------------------------- */
928 /* WE DID NOT MANAGE TO CREATE A GLOBAL CHECKPOINT FILE. SERIOUS ERROR */
929 /* WHICH CAUSES A SYSTEM RESTART. */
930 /* --------------------------------------------------------------------- */
931 jam();
932 break;
933 case FileRecord::OPENING_COPY_GCI:
934 jam();
935 openingCopyGciErrorLab(signal, filePtr);
936 return;
937 case FileRecord::CREATING_COPY_GCI:
938 jam();
939 break;
940 case FileRecord::OPENING_GCP:
941 jam();
942 openingGcpErrorLab(signal, filePtr);
943 return;
944 case FileRecord::OPENING_TABLE:
945 jam();
946 openingTableErrorLab(signal, filePtr);
947 return;
948 case FileRecord::TABLE_CREATE:
949 jam();
950 break;
951 case FileRecord::TABLE_OPEN_FOR_DELETE:
952 jam();
953 tableDeleteLab(signal, filePtr);
954 return;
955 default:
956 jam();
957 break;
958 }//switch
960 char msg[100];
961 sprintf(msg, "File system open failed during FileRecord status %d", (Uint32)status);
962 fsRefError(signal,__LINE__,msg);
964 return;
965 }//Dbdih::execFSOPENREF()
967 void Dbdih::execFSREADCONF(Signal* signal)
969 FileRecordPtr filePtr;
970 jamEntry();
971 filePtr.i = signal->theData[0];
972 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
973 FileRecord::ReqStatus status = filePtr.p->reqStatus;
974 filePtr.p->reqStatus = FileRecord::IDLE;
975 switch (status) {
976 case FileRecord::READING_GCP:
977 jam();
978 readingGcpLab(signal, filePtr);
979 break;
980 case FileRecord::READING_TABLE:
981 jam();
982 readingTableLab(signal, filePtr);
983 break;
984 default:
985 ndbrequire(false);
986 break;
987 }//switch
988 return;
989 }//Dbdih::execFSREADCONF()
991 void Dbdih::execFSREADREF(Signal* signal)
993 FileRecordPtr filePtr;
994 jamEntry();
995 filePtr.i = signal->theData[0];
996 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
997 FileRecord::ReqStatus status = filePtr.p->reqStatus;
998 filePtr.p->reqStatus = FileRecord::IDLE;
999 switch (status) {
1000 case FileRecord::READING_GCP:
1001 jam();
1002 readingGcpErrorLab(signal, filePtr);
1003 return;
1004 case FileRecord::READING_TABLE:
1005 jam();
1006 readingTableErrorLab(signal, filePtr);
1007 return;
1008 default:
1009 break;
1010 }//switch
1012 char msg[100];
1013 sprintf(msg, "File system read failed during FileRecord status %d", (Uint32)status);
1014 fsRefError(signal,__LINE__,msg);
1016 }//Dbdih::execFSREADREF()
1018 void Dbdih::execFSWRITECONF(Signal* signal)
1020 FileRecordPtr filePtr;
1021 jamEntry();
1022 filePtr.i = signal->theData[0];
1023 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1024 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1025 filePtr.p->reqStatus = FileRecord::IDLE;
1026 switch (status) {
1027 case FileRecord::WRITING_COPY_GCI:
1028 jam();
1029 writingCopyGciLab(signal, filePtr);
1030 break;
1031 case FileRecord::WRITE_INIT_GCP:
1032 jam();
1033 writeInitGcpLab(signal, filePtr);
1034 break;
1035 case FileRecord::TABLE_WRITE:
1036 jam();
1037 tableWriteLab(signal, filePtr);
1038 break;
1039 default:
1040 ndbrequire(false);
1041 break;
1042 }//switch
1043 return;
1044 }//Dbdih::execFSWRITECONF()
1046 void Dbdih::execFSWRITEREF(Signal* signal)
1048 FileRecordPtr filePtr;
1049 jamEntry();
1050 filePtr.i = signal->theData[0];
1051 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1052 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1053 filePtr.p->reqStatus = FileRecord::IDLE;
1054 switch (status) {
1055 case FileRecord::WRITING_COPY_GCI:
1056 /* --------------------------------------------------------------------- */
1057 /* EVEN CREATING THE FILE DID NOT WORK. WE WILL THEN CRASH. */
1058 /* ERROR IN WRITING FILE. WE WILL NOT CONTINUE FROM HERE. */
1059 /* --------------------------------------------------------------------- */
1060 jam();
1061 break;
1062 case FileRecord::WRITE_INIT_GCP:
1063 /* --------------------------------------------------------------------- */
1064 /* AN ERROR OCCURRED IN WRITING A GCI FILE WHICH IS A SERIOUS ERROR */
1065 /* THAT CAUSE A SYSTEM RESTART. */
1066 /* --------------------------------------------------------------------- */
1067 jam();
1068 break;
1069 case FileRecord::TABLE_WRITE:
1070 jam();
1071 break;
1072 default:
1073 jam();
1074 break;
1075 }//switch
1077 char msg[100];
1078 sprintf(msg, "File system write failed during FileRecord status %d", (Uint32)status);
1079 fsRefError(signal,__LINE__,msg);
1081 return;
1082 }//Dbdih::execFSWRITEREF()
1084 void Dbdih::execGETGCIREQ(Signal* signal)
1087 jamEntry();
1088 Uint32 userPtr = signal->theData[0];
1089 BlockReference userRef = signal->theData[1];
1091 signal->theData[0] = userPtr;
1092 signal->theData[1] = SYSFILE->newestRestorableGCI;
1093 sendSignal(userRef, GSN_GETGCICONF, signal, 2, JBB);
1094 }//Dbdih::execGETGCIREQ()
1096 void Dbdih::execREAD_CONFIG_REQ(Signal* signal)
1098 const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
1099 Uint32 ref = req->senderRef;
1100 Uint32 senderData = req->senderData;
1101 ndbrequire(req->noOfParameters == 0);
1103 jamEntry();
1105 const ndb_mgm_configuration_iterator * p =
1106 m_ctx.m_config.getOwnConfigIterator();
1107 ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG);
1109 initData();
1111 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT,
1112 &capiConnectFileSize),
1113 NDBD_EXIT_INVALID_CONFIG);
1114 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT,
1115 &cconnectFileSize),
1116 NDBD_EXIT_INVALID_CONFIG);
1117 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT,
1118 &cfragstoreFileSize),
1119 NDBD_EXIT_INVALID_CONFIG);
1120 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS,
1121 &creplicaFileSize),
1122 NDBD_EXIT_INVALID_CONFIG);
1123 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize),
1124 NDBD_EXIT_INVALID_CONFIG);
1125 cfileFileSize = (2 * ctabFileSize) + 2;
1126 initRecords();
1127 initialiseRecordsLab(signal, 0, ref, senderData);
1128 return;
1129 }//Dbdih::execSIZEALT_REP()
1131 void Dbdih::execSTART_COPYREF(Signal* signal)
1133 jamEntry();
1134 ndbrequire(false);
1135 }//Dbdih::execSTART_COPYREF()
1137 void Dbdih::execSTART_FRAGCONF(Signal* signal)
1139 (void)signal; // Don't want compiler warning
1140 /* ********************************************************************* */
1141 /* If anyone wants to add functionality in this method, be aware that */
1142 /* for temporary tables no START_FRAGREQ is sent and therefore no */
1143 /* START_FRAGCONF signal will be received for those tables!! */
1144 /* ********************************************************************* */
1145 jamEntry();
1146 return;
1147 }//Dbdih::execSTART_FRAGCONF()
1149 void Dbdih::execSTART_FRAGREF(Signal* signal)
1151 jamEntry();
1154 * Kill starting node
1156 Uint32 errCode = signal->theData[1];
1157 Uint32 nodeId = signal->theData[2];
1159 SystemError * const sysErr = (SystemError*)&signal->theData[0];
1160 sysErr->errorCode = SystemError::StartFragRefError;
1161 sysErr->errorRef = reference();
1162 sysErr->data1 = errCode;
1163 sysErr->data2 = 0;
1164 sendSignal(calcNdbCntrBlockRef(nodeId), GSN_SYSTEM_ERROR, signal,
1165 SystemError::SignalLength, JBB);
1166 return;
1167 }//Dbdih::execSTART_FRAGCONF()
1169 void Dbdih::execSTART_MEREF(Signal* signal)
1171 jamEntry();
1172 ndbrequire(false);
1173 }//Dbdih::execSTART_MEREF()
1175 void Dbdih::execTAB_COMMITREQ(Signal* signal)
1177 TabRecordPtr tabPtr;
1178 jamEntry();
1179 Uint32 tdictPtr = signal->theData[0];
1180 BlockReference tdictBlockref = signal->theData[1];
1181 tabPtr.i = signal->theData[2];
1182 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
1184 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_CREATING);
1185 tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
1186 signal->theData[0] = tdictPtr;
1187 signal->theData[1] = cownNodeId;
1188 signal->theData[2] = tabPtr.i;
1189 sendSignal(tdictBlockref, GSN_TAB_COMMITCONF, signal, 3, JBB);
1190 return;
1191 }//Dbdih::execTAB_COMMITREQ()
1194 3.2 S T A N D A R D S U B P R O G R A M S I N P L E X
1195 *************************************************************
1198 3.2.1 S T A R T / R E S T A R T
1199 **********************************
1201 /*****************************************************************************/
1202 /* ********** START / RESTART MODULE *************/
1203 /*****************************************************************************/
1205 3.2.1.1 LOADING O W N B L O C K R E F E R E N C E (ABSOLUTE PHASE 1)
1206 *****************************************************************************
1208 void Dbdih::execDIH_RESTARTREQ(Signal* signal)
1210 jamEntry();
1211 if (signal->theData[0])
1213 jam();
1214 cntrlblockref = signal->theData[0];
1215 if(m_ctx.m_config.getInitialStart()){
1216 sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
1217 } else {
1218 readGciFileLab(signal);
1221 else
1224 * Precondition, (not checked)
1225 * atleast 1 node in each node group
1227 Uint32 i;
1228 NdbNodeBitmask mask;
1229 mask.assign(NdbNodeBitmask::Size, signal->theData + 1);
1230 Uint32 *node_gcis = signal->theData+1+NdbNodeBitmask::Size;
1231 Uint32 node_group_gcis[MAX_NDB_NODES+1];
1232 bzero(node_group_gcis, sizeof(node_group_gcis));
1233 for (i = 0; i<MAX_NDB_NODES; i++)
1235 if (mask.get(i))
1237 jam();
1238 Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
1239 ndbrequire(ng < MAX_NDB_NODES);
1240 Uint32 gci = node_gcis[i];
1241 if (gci < SYSFILE->lastCompletedGCI[i])
1243 jam();
1245 * Handle case, where *I* know that node complete GCI
1246 * but node does not...bug#29167
1247 * i.e node died before it wrote own sysfile
1249 gci = SYSFILE->lastCompletedGCI[i];
1252 if (gci > node_group_gcis[ng])
1254 jam();
1255 node_group_gcis[ng] = gci;
1259 for (i = 0; i<MAX_NDB_NODES && node_group_gcis[i] == 0; i++);
1261 Uint32 gci = node_group_gcis[i];
1262 for (i++ ; i<MAX_NDB_NODES; i++)
1264 jam();
1265 if (node_group_gcis[i] && node_group_gcis[i] != gci)
1267 jam();
1268 signal->theData[0] = i;
1269 return;
1272 signal->theData[0] = MAX_NDB_NODES;
1273 return;
1275 return;
1276 }//Dbdih::execDIH_RESTARTREQ()
1278 void Dbdih::execSTTOR(Signal* signal)
1280 jamEntry();
1282 signal->theData[0] = 0;
1283 signal->theData[1] = 0;
1284 signal->theData[2] = 0;
1285 signal->theData[3] = 1; // Next start phase
1286 signal->theData[4] = 255; // Next start phase
1287 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 5, JBB);
1288 return;
1289 }//Dbdih::execSTTOR()
1291 void Dbdih::initialStartCompletedLab(Signal* signal)
1293 /*-------------------------------------------------------------------------*/
1294 /* NOW THAT (RE)START IS COMPLETED WE CAN START THE LCP.*/
1295 /*-------------------------------------------------------------------------*/
1296 return;
1297 }//Dbdih::initialStartCompletedLab()
1300 * ***************************************************************************
1301 * S E N D I N G R E P L Y T O S T A R T / R E S T A R T R E Q U E S T S
1302 * ****************************************************************************
1304 void Dbdih::ndbsttorry10Lab(Signal* signal, Uint32 _line)
1306 /*-------------------------------------------------------------------------*/
1307 // AN NDB START PHASE HAS BEEN COMPLETED. WHEN START PHASE 6 IS COMPLETED WE
1308 // RECORD THAT THE SYSTEM IS RUNNING.
1309 /*-------------------------------------------------------------------------*/
1310 signal->theData[0] = reference();
1311 sendSignal(cntrlblockref, GSN_NDB_STTORRY, signal, 1, JBB);
1312 return;
1313 }//Dbdih::ndbsttorry10Lab()
1316 ****************************************
1317 I N T E R N A L P H A S E S
1318 ****************************************
1320 /*---------------------------------------------------------------------------*/
1321 /*NDB_STTOR START SIGNAL AT START/RESTART */
1322 /*---------------------------------------------------------------------------*/
1323 void Dbdih::execNDB_STTOR(Signal* signal)
1325 jamEntry();
1326 BlockReference cntrRef = signal->theData[0]; /* SENDERS BLOCK REFERENCE */
1327 Uint32 ownNodeId = signal->theData[1]; /* OWN PROCESSOR ID*/
1328 Uint32 phase = signal->theData[2]; /* INTERNAL START PHASE*/
1329 Uint32 typestart = signal->theData[3];
1331 cstarttype = typestart;
1332 cstartPhase = phase;
1334 switch (phase){
1335 case ZNDB_SPH1:
1336 jam();
1337 /*----------------------------------------------------------------------*/
1338 /* Set the delay between local checkpoints in ndb startphase 1. */
1339 /*----------------------------------------------------------------------*/
1340 cownNodeId = ownNodeId;
1341 /*-----------------------------------------------------------------------*/
1342 // Compute all static block references in this node as part of
1343 // ndb start phase 1.
1344 /*-----------------------------------------------------------------------*/
1345 cntrlblockref = cntrRef;
1346 clocaltcblockref = calcTcBlockRef(ownNodeId);
1347 clocallqhblockref = calcLqhBlockRef(ownNodeId);
1348 cdictblockref = calcDictBlockRef(ownNodeId);
1349 ndbsttorry10Lab(signal, __LINE__);
1350 break;
1352 case ZNDB_SPH2:
1353 jam();
1354 /*-----------------------------------------------------------------------*/
1355 // Set the number of replicas, maximum is 4 replicas.
1356 // Read the ndb nodes from the configuration.
1357 /*-----------------------------------------------------------------------*/
1359 /*-----------------------------------------------------------------------*/
1360 // For node restarts we will also add a request for permission
1361 // to continue the system restart.
1362 // The permission is given by the master node in the alive set.
1363 /*-----------------------------------------------------------------------*/
1364 createMutexes(signal, 0);
1365 if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
1367 jam();
1368 c_set_initial_start_flag = TRUE; // In sysfile...
1370 break;
1372 case ZNDB_SPH3:
1373 jam();
1374 /*-----------------------------------------------------------------------*/
1375 // Non-master nodes performing an initial start will execute
1376 // the start request here since the
1377 // initial start do not synchronise so much from the master.
1378 // In the master nodes the start
1379 // request will be sent directly to dih (in ndb_startreq) when all
1380 // nodes have completed phase 3 of the start.
1381 /*-----------------------------------------------------------------------*/
1382 cmasterState = MASTER_IDLE;
1383 if(cstarttype == NodeState::ST_INITIAL_START ||
1384 cstarttype == NodeState::ST_SYSTEM_RESTART){
1385 jam();
1386 cmasterState = isMaster() ? MASTER_ACTIVE : MASTER_IDLE;
1388 if (!isMaster() && cstarttype == NodeState::ST_INITIAL_START) {
1389 jam();
1390 ndbStartReqLab(signal, cntrRef);
1391 return;
1392 }//if
1393 ndbsttorry10Lab(signal, __LINE__);
1394 break;
1396 case ZNDB_SPH4:
1397 jam();
1398 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
1399 cmasterTakeOverNode = ZNIL;
1400 switch(typestart){
1401 case NodeState::ST_INITIAL_START:
1402 jam();
1403 ndbsttorry10Lab(signal, __LINE__);
1404 return;
1405 case NodeState::ST_SYSTEM_RESTART:
1406 jam();
1407 if (isMaster()) {
1408 jam();
1409 systemRestartTakeOverLab(signal);
1410 if (anyActiveTakeOver())
1412 jam();
1413 return;
1416 ndbsttorry10Lab(signal, __LINE__);
1417 return;
1418 case NodeState::ST_INITIAL_NODE_RESTART:
1419 case NodeState::ST_NODE_RESTART:
1420 jam();
1422 /***********************************************************************
1423 * When starting nodes while system is operational we must be controlled
1424 * by the master since only one node restart is allowed at a time.
1425 * When this signal is confirmed the master has also copied the
1426 * dictionary and the distribution information.
1428 StartMeReq * req = (StartMeReq*)&signal->theData[0];
1429 req->startingRef = reference();
1430 req->startingVersion = 0; // Obsolete
1431 sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
1432 StartMeReq::SignalLength, JBB);
1433 return;
1435 ndbrequire(false);
1436 break;
1437 case ZNDB_SPH5:
1438 jam();
1439 switch(typestart){
1440 case NodeState::ST_INITIAL_START:
1441 case NodeState::ST_SYSTEM_RESTART:
1442 jam();
1443 jam();
1444 /*---------------------------------------------------------------------*/
1445 // WE EXECUTE A LOCAL CHECKPOINT AS A PART OF A SYSTEM RESTART.
1446 // THE IDEA IS THAT WE NEED TO
1447 // ENSURE THAT WE CAN RECOVER FROM PROBLEMS CAUSED BY MANY NODE
1448 // CRASHES THAT CAUSES THE LOG
1449 // TO GROW AND THE NUMBER OF LOG ROUNDS TO EXECUTE TO GROW.
1450 // THIS CAN OTHERWISE GET US INTO
1451 // A SITUATION WHICH IS UNREPAIRABLE. THUS WE EXECUTE A CHECKPOINT
1452 // BEFORE ALLOWING ANY TRANSACTIONS TO START.
1453 /*---------------------------------------------------------------------*/
1454 if (!isMaster()) {
1455 jam();
1456 ndbsttorry10Lab(signal, __LINE__);
1457 return;
1458 }//if
1460 c_lcpState.immediateLcpStart = true;
1461 cwaitLcpSr = true;
1462 checkLcpStart(signal, __LINE__);
1463 return;
1464 case NodeState::ST_NODE_RESTART:
1465 case NodeState::ST_INITIAL_NODE_RESTART:
1466 jam();
1467 signal->theData[0] = cownNodeId;
1468 signal->theData[1] = reference();
1469 sendSignal(cmasterdihref, GSN_START_COPYREQ, signal, 2, JBB);
1470 return;
1472 ndbrequire(false);
1473 case ZNDB_SPH6:
1474 jam();
1475 switch(typestart){
1476 case NodeState::ST_INITIAL_START:
1477 case NodeState::ST_SYSTEM_RESTART:
1478 jam();
1479 if(isMaster()){
1480 jam();
1481 startGcp(signal);
1483 ndbsttorry10Lab(signal, __LINE__);
1484 return;
1485 case NodeState::ST_NODE_RESTART:
1486 case NodeState::ST_INITIAL_NODE_RESTART:
1487 ndbsttorry10Lab(signal, __LINE__);
1488 return;
1490 ndbrequire(false);
1491 break;
1492 default:
1493 jam();
1494 ndbsttorry10Lab(signal, __LINE__);
1495 break;
1496 }//switch
1497 }//Dbdih::execNDB_STTOR()
1499 void
1500 Dbdih::execNODE_START_REP(Signal* signal)
1503 * Send DICT_UNLOCK_ORD when this node is SL_STARTED.
1505 * Sending it before (sp 7) conflicts with code which assumes
1506 * SL_STARTING means we are in copy phase of NR.
1508 * NodeState::starting.restartType is not supposed to be used
1509 * when SL_STARTED. Also it seems NODE_START_REP can arrive twice.
1511 * For these reasons there are no consistency checks and
1512 * we rely on c_dictLockSlavePtrI_nodeRestart alone.
1514 if (c_dictLockSlavePtrI_nodeRestart != RNIL) {
1515 sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
1516 c_dictLockSlavePtrI_nodeRestart = RNIL;
1520 void
1521 Dbdih::createMutexes(Signal * signal, Uint32 count){
1522 Callback c = { safe_cast(&Dbdih::createMutex_done), count };
1524 switch(count){
1525 case 0:{
1526 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1527 mutex.create(c);
1528 return;
1530 case 1:{
1531 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1532 mutex.create(c);
1533 return;
1537 signal->theData[0] = reference();
1538 sendSignal(cntrlblockref, GSN_READ_NODESREQ, signal, 1, JBB);
1541 void
1542 Dbdih::createMutex_done(Signal* signal, Uint32 senderData, Uint32 retVal){
1543 jamEntry();
1544 ndbrequire(retVal == 0);
1546 switch(senderData){
1547 case 0:{
1548 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1549 mutex.release();
1551 case 1:{
1552 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1553 mutex.release();
1557 createMutexes(signal, senderData + 1);
1560 /*****************************************************************************/
1561 /* ------------------------------------------------------------------------- */
1562 /* WE HAVE BEEN REQUESTED BY NDBCNTR TO PERFORM A RESTART OF THE */
1563 /* DATABASE TABLES. */
1564 /* THIS SIGNAL IS SENT AFTER COMPLETING PHASE 3 IN ALL BLOCKS IN A */
1565 /* SYSTEM RESTART. WE WILL ALSO JUMP TO THIS LABEL FROM PHASE 3 IN AN */
1566 /* INITIAL START. */
1567 /* ------------------------------------------------------------------------- */
1568 /*****************************************************************************/
1569 void Dbdih::execNDB_STARTREQ(Signal* signal)
1571 jamEntry();
1572 BlockReference ref = signal->theData[0];
1573 cstarttype = signal->theData[1];
1574 ndbStartReqLab(signal, ref);
1575 }//Dbdih::execNDB_STARTREQ()
1577 void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
1579 cndbStartReqBlockref = ref;
1580 if (cstarttype == NodeState::ST_INITIAL_START) {
1581 jam();
1582 initRestartInfo();
1583 initGciFilesLab(signal);
1584 return;
1587 NodeRecordPtr nodePtr;
1588 Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
1589 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
1591 jam();
1592 ptrAss(nodePtr, nodeRecord);
1593 if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci)
1595 jam();
1597 * Since we're starting(is master) and there
1598 * there are other nodes with higher GCI...
1599 * there gci's must be invalidated...
1600 * and they _must_ do an initial start
1601 * indicate this by setting lastCompletedGCI = 0
1603 SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
1604 ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
1605 warningEvent("Making filesystem for node %d unusable (need --initial)",
1606 nodePtr.i);
1608 else if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
1609 SYSFILE->lastCompletedGCI[nodePtr.i] == 0)
1611 jam();
1612 CRASH_INSERTION(7170);
1613 char buf[255];
1614 BaseString::snprintf(buf, sizeof(buf),
1615 "Cluster requires this node to be started "
1616 " with --initial as partial start has been performed"
1617 " and this filesystem is unusable");
1618 progError(__LINE__,
1619 NDBD_EXIT_SR_RESTARTCONFLICT,
1620 buf);
1621 ndbrequire(false);
1626 * This set which GCI we will try to restart to
1628 SYSFILE->newestRestorableGCI = gci;
1630 ndbrequire(isMaster());
1631 copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
1632 }//Dbdih::ndbStartReqLab()
1634 void Dbdih::execREAD_NODESCONF(Signal* signal)
1636 unsigned i;
1637 ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
1638 jamEntry();
1639 Uint32 nodeArray[MAX_NDB_NODES];
1641 csystemnodes = readNodes->noOfNodes;
1642 cmasterNodeId = readNodes->masterNodeId;
1643 int index = 0;
1644 NdbNodeBitmask tmp; tmp.assign(2, readNodes->allNodes);
1645 for (i = 1; i < MAX_NDB_NODES; i++){
1646 jam();
1647 if(tmp.get(i)){
1648 jam();
1649 nodeArray[index] = i;
1650 if(NodeBitmask::get(readNodes->inactiveNodes, i) == false){
1651 jam();
1652 con_lineNodes++;
1653 }//if
1654 index++;
1655 }//if
1656 }//for
1658 if(cstarttype == NodeState::ST_SYSTEM_RESTART ||
1659 cstarttype == NodeState::ST_NODE_RESTART){
1661 for(i = 1; i<MAX_NDB_NODES; i++){
1662 const Uint32 stat = Sysfile::getNodeStatus(i, SYSFILE->nodeStatus);
1663 if(stat == Sysfile::NS_NotDefined && !tmp.get(i)){
1664 jam();
1665 continue;
1668 if(tmp.get(i) && stat != Sysfile::NS_NotDefined){
1669 jam();
1670 continue;
1672 char buf[255];
1673 BaseString::snprintf(buf, sizeof(buf),
1674 "Illegal configuration change."
1675 " Initial start needs to be performed "
1676 " when changing no of storage nodes (node %d)", i);
1677 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
1681 ndbrequire(csystemnodes >= 1 && csystemnodes < MAX_NDB_NODES);
1682 if (cstarttype == NodeState::ST_INITIAL_START) {
1683 jam();
1684 ndbrequire(cnoReplicas <= csystemnodes);
1685 calculateHotSpare();
1686 ndbrequire(cnoReplicas <= (csystemnodes - cnoHotSpare));
1687 }//if
1689 cmasterdihref = calcDihBlockRef(cmasterNodeId);
1690 /*-------------------------------------------------------------------------*/
1691 /* MAKE THE LIST OF PRN-RECORD WHICH IS ONE OF THE NODES-LIST IN THIS BLOCK*/
1692 /*-------------------------------------------------------------------------*/
1693 makePrnList(readNodes, nodeArray);
1694 if (cstarttype == NodeState::ST_INITIAL_START) {
1695 jam();
1696 /**----------------------------------------------------------------------
1697 * WHEN WE INITIALLY START A DATABASE WE WILL CREATE NODE GROUPS.
1698 * ALL NODES ARE PUT INTO NODE GROUPS ALTHOUGH HOT SPARE NODES ARE PUT
1699 * INTO A SPECIAL NODE GROUP. IN EACH NODE GROUP WE HAVE THE SAME AMOUNT
1700 * OF NODES AS THERE ARE NUMBER OF REPLICAS.
1701 * ONE POSSIBLE USAGE OF NODE GROUPS ARE TO MAKE A NODE GROUP A COMPLETE
1702 * FRAGMENT OF THE DATABASE. THIS MEANS THAT ALL REPLICAS WILL BE STORED
1703 * IN THE NODE GROUP.
1704 *-----------------------------------------------------------------------*/
1705 makeNodeGroups(nodeArray);
1706 }//if
1707 ndbrequire(checkNodeAlive(cmasterNodeId));
1708 if (cstarttype == NodeState::ST_INITIAL_START) {
1709 jam();
1710 /**-----------------------------------------------------------------------
1711 * INITIALISE THE SECOND NODE-LIST AND SET NODE BITS AND SOME NODE STATUS.
1712 * VERY CONNECTED WITH MAKE_NODE_GROUPS. CHANGING ONE WILL AFFECT THE
1713 * OTHER AS WELL.
1714 *-----------------------------------------------------------------------*/
1715 setInitialActiveStatus();
1716 } else if (cstarttype == NodeState::ST_SYSTEM_RESTART) {
1717 jam();
1718 /*empty*/;
1719 } else if ((cstarttype == NodeState::ST_NODE_RESTART) ||
1720 (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)) {
1721 jam();
1722 nodeRestartPh2Lab(signal);
1723 return;
1724 } else {
1725 ndbrequire(false);
1726 }//if
1727 /**------------------------------------------------------------------------
1728 * ESTABLISH CONNECTIONS WITH THE OTHER DIH BLOCKS AND INITIALISE THIS
1729 * NODE-LIST THAT HANDLES CONNECTION WITH OTHER DIH BLOCKS.
1730 *-------------------------------------------------------------------------*/
1731 ndbsttorry10Lab(signal, __LINE__);
1732 }//Dbdih::execREAD_NODESCONF()
1734 /*---------------------------------------------------------------------------*/
1735 /* START NODE LOGIC FOR NODE RESTART */
1736 /*---------------------------------------------------------------------------*/
1737 void Dbdih::nodeRestartPh2Lab(Signal* signal)
1740 * Lock master DICT to avoid metadata operations during INR/NR.
1741 * Done just before START_PERMREQ.
1743 * It would be more elegant to do this just before START_MEREQ.
1744 * The problem is, on INR we end up in massive invalidateNodeLCP
1745 * which is not fully protected against metadata ops.
1747 ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
1749 // check that we are not yet taking part in schema ops
1750 CRASH_INSERTION(7174);
1752 Uint32 lockType = DictLockReq::NodeRestartLock;
1753 Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
1754 sendDictLockReq(signal, lockType, c);
1757 void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
1759 ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
1760 ndbrequire(data != RNIL);
1761 c_dictLockSlavePtrI_nodeRestart = data;
1763 nodeRestartPh2Lab2(signal);
1766 void Dbdih::nodeRestartPh2Lab2(Signal* signal)
1768 /*------------------------------------------------------------------------*/
1769 // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
1770 // RUNNING SYSTEM.
1771 /*------------------------------------------------------------------------*/
1772 StartPermReq * const req = (StartPermReq *)&signal->theData[0];
1774 req->blockRef = reference();
1775 req->nodeId = cownNodeId;
1776 req->startType = cstarttype;
1777 sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
1780 void Dbdih::execSTART_PERMCONF(Signal* signal)
1782 jamEntry();
1783 CRASH_INSERTION(7121);
1784 Uint32 nodeId = signal->theData[0];
1785 cfailurenr = signal->theData[1];
1786 ndbrequire(nodeId == cownNodeId);
1787 ndbsttorry10Lab(signal, __LINE__);
1788 }//Dbdih::execSTART_PERMCONF()
1790 void Dbdih::execSTART_PERMREF(Signal* signal)
1792 jamEntry();
1793 Uint32 errorCode = signal->theData[1];
1794 if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR ||
1795 errorCode == StartPermRef::ZNODE_START_DISALLOWED_ERROR) {
1796 jam();
1797 /*-----------------------------------------------------------------------*/
1798 // The master was busy adding another node. We will wait for a second and
1799 // try again.
1800 /*-----------------------------------------------------------------------*/
1801 signal->theData[0] = DihContinueB::ZSTART_PERMREQ_AGAIN;
1802 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
1803 return;
1804 }//if
1806 if (errorCode == StartPermRef::InitialStartRequired)
1808 CRASH_INSERTION(7170);
1809 char buf[255];
1810 BaseString::snprintf(buf, sizeof(buf),
1811 "Cluster requires this node to be started "
1812 " with --initial as partial start has been performed"
1813 " and this filesystem is unusable");
1814 progError(__LINE__,
1815 NDBD_EXIT_SR_RESTARTCONFLICT,
1816 buf);
1817 ndbrequire(false);
1819 /*------------------------------------------------------------------------*/
1820 // Some node process in another node involving our node was still active. We
1821 // will recover from this by crashing here.
1822 // This is controlled restart using the
1823 // already existing features of node crashes. It is not a bug getting here.
1824 /*-------------------------------------------------------------------------*/
1825 ndbrequire(false);
1826 return;
1827 }//Dbdih::execSTART_PERMREF()
1829 /*---------------------------------------------------------------------------*/
1830 /* THIS SIGNAL IS RECEIVED IN THE STARTING NODE WHEN THE START_MEREQ */
1831 /* HAS BEEN EXECUTED IN THE MASTER NODE. */
1832 /*---------------------------------------------------------------------------*/
1833 void Dbdih::execSTART_MECONF(Signal* signal)
1835 jamEntry();
1836 StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
1837 Uint32 nodeId = startMe->startingNodeId;
1838 const Uint32 startWord = startMe->startWord;
1839 Uint32 i;
1841 CRASH_INSERTION(7130);
1842 ndbrequire(nodeId == cownNodeId);
1843 arrGuard(startWord + StartMeConf::DATA_SIZE, sizeof(cdata)/4);
1844 for(i = 0; i < StartMeConf::DATA_SIZE; i++)
1845 cdata[startWord+i] = startMe->data[i];
1847 if(startWord + StartMeConf::DATA_SIZE < Sysfile::SYSFILE_SIZE32){
1848 jam();
1850 * We are still waiting for data
1852 return;
1854 jam();
1857 * Copy into sysfile
1859 * But dont copy lastCompletedGCI:s
1861 Uint32 key = SYSFILE->m_restart_seq;
1862 Uint32 tempGCP[MAX_NDB_NODES];
1863 for(i = 0; i < MAX_NDB_NODES; i++)
1864 tempGCP[i] = SYSFILE->lastCompletedGCI[i];
1866 for(i = 0; i < Sysfile::SYSFILE_SIZE32; i++)
1867 sysfileData[i] = cdata[i];
1869 SYSFILE->m_restart_seq = key;
1870 for(i = 0; i < MAX_NDB_NODES; i++)
1871 SYSFILE->lastCompletedGCI[i] = tempGCP[i];
1873 setNodeActiveStatus();
1874 setNodeGroups();
1875 ndbsttorry10Lab(signal, __LINE__);
1876 }//Dbdih::execSTART_MECONF()
1878 void Dbdih::execSTART_COPYCONF(Signal* signal)
1880 jamEntry();
1881 Uint32 nodeId = signal->theData[0];
1882 ndbrequire(nodeId == cownNodeId);
1883 CRASH_INSERTION(7132);
1884 ndbsttorry10Lab(signal, __LINE__);
1885 return;
1886 }//Dbdih::execSTART_COPYCONF()
1888 /*---------------------------------------------------------------------------*/
1889 /* MASTER LOGIC FOR NODE RESTART */
1890 /*---------------------------------------------------------------------------*/
1891 /* NODE RESTART PERMISSION REQUEST */
1892 /*---------------------------------------------------------------------------*/
1893 // A REQUEST FROM A STARTING NODE TO PERFORM A NODE RESTART. IF NO OTHER NODE
1894 // IS ACTIVE IN PERFORMING A NODE RESTART AND THERE ARE NO ACTIVE PROCESSES IN
1895 // THIS NODE INVOLVING THE STARTING NODE THIS REQUEST WILL BE GRANTED.
1896 /*---------------------------------------------------------------------------*/
1897 void Dbdih::execSTART_PERMREQ(Signal* signal)
1899 StartPermReq * const req = (StartPermReq*)&signal->theData[0];
1900 jamEntry();
1901 const BlockReference retRef = req->blockRef;
1902 const Uint32 nodeId = req->nodeId;
1903 const Uint32 typeStart = req->startType;
1904 CRASH_INSERTION(7122);
1905 ndbrequire(isMaster());
1906 ndbrequire(refToNode(retRef) == nodeId);
1907 if ((c_nodeStartMaster.activeState) ||
1908 (c_nodeStartMaster.wait != ZFALSE) ||
1909 ERROR_INSERTED_CLEAR(7175)) {
1910 jam();
1911 signal->theData[0] = nodeId;
1912 signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
1913 sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
1914 return;
1915 }//if
1916 if (getNodeStatus(nodeId) != NodeRecord::DEAD){
1917 g_eventLogger.error("nodeStatus in START_PERMREQ = %u",
1918 (Uint32) getNodeStatus(nodeId));
1919 ndbrequire(false);
1920 }//if
1922 if (SYSFILE->lastCompletedGCI[nodeId] == 0 &&
1923 typeStart != NodeState::ST_INITIAL_NODE_RESTART)
1925 jam();
1926 signal->theData[0] = nodeId;
1927 signal->theData[1] = StartPermRef::InitialStartRequired;
1928 sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
1929 return;
1932 /*----------------------------------------------------------------------
1933 * WE START THE INCLUSION PROCEDURE
1934 * ---------------------------------------------------------------------*/
1935 c_nodeStartMaster.failNr = cfailurenr;
1936 c_nodeStartMaster.wait = ZFALSE;
1937 c_nodeStartMaster.startInfoErrorCode = 0;
1938 c_nodeStartMaster.startNode = nodeId;
1939 c_nodeStartMaster.activeState = true;
1940 c_nodeStartMaster.m_outstandingGsn = GSN_START_INFOREQ;
1942 setNodeStatus(nodeId, NodeRecord::STARTING);
1944 * But if it's a NodeState::ST_INITIAL_NODE_RESTART
1946 * We first have to clear LCP's
1947 * For normal node restart we simply ensure that all nodes
1948 * are informed of the node restart
1950 StartInfoReq *const r =(StartInfoReq*)&signal->theData[0];
1951 r->startingNodeId = nodeId;
1952 r->typeStart = typeStart;
1953 r->systemFailureNo = cfailurenr;
1954 sendLoopMacro(START_INFOREQ, sendSTART_INFOREQ);
1955 }//Dbdih::execSTART_PERMREQ()
1957 void Dbdih::execSTART_INFOREF(Signal* signal)
1959 StartInfoRef * ref = (StartInfoRef*)&signal->theData[0];
1960 if (getNodeStatus(ref->startingNodeId) != NodeRecord::STARTING) {
1961 jam();
1962 return;
1963 }//if
1964 ndbrequire(c_nodeStartMaster.startNode == ref->startingNodeId);
1965 c_nodeStartMaster.startInfoErrorCode = ref->errorCode;
1966 startInfoReply(signal, ref->sendingNodeId);
1967 }//Dbdih::execSTART_INFOREF()
1969 void Dbdih::execSTART_INFOCONF(Signal* signal)
1971 jamEntry();
1972 StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
1973 if (getNodeStatus(conf->startingNodeId) != NodeRecord::STARTING) {
1974 jam();
1975 return;
1976 }//if
1977 ndbrequire(c_nodeStartMaster.startNode == conf->startingNodeId);
1978 startInfoReply(signal, conf->sendingNodeId);
1979 }//Dbdih::execSTART_INFOCONF()
1981 void Dbdih::startInfoReply(Signal* signal, Uint32 nodeId)
1983 receiveLoopMacro(START_INFOREQ, nodeId);
1985 * We're finished with the START_INFOREQ's
1987 if (c_nodeStartMaster.startInfoErrorCode == 0) {
1988 jam();
1990 * Everything has been a success so far
1992 StartPermConf * conf = (StartPermConf*)&signal->theData[0];
1993 conf->startingNodeId = c_nodeStartMaster.startNode;
1994 conf->systemFailureNo = cfailurenr;
1995 sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
1996 GSN_START_PERMCONF, signal, StartPermConf::SignalLength, JBB);
1997 c_nodeStartMaster.m_outstandingGsn = GSN_START_PERMCONF;
1998 } else {
1999 jam();
2000 StartPermRef * ref = (StartPermRef*)&signal->theData[0];
2001 ref->startingNodeId = c_nodeStartMaster.startNode;
2002 ref->errorCode = c_nodeStartMaster.startInfoErrorCode;
2003 sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
2004 GSN_START_PERMREF, signal, StartPermRef::SignalLength, JBB);
2005 nodeResetStart();
2006 }//if
2007 }//Dbdih::startInfoReply()
2009 /*---------------------------------------------------------------------------*/
2010 /* NODE RESTART CONTINUE REQUEST */
2011 /*---------------------------------------------------------------------------*/
2012 // THIS SIGNAL AND THE CODE BELOW IS EXECUTED BY THE MASTER WHEN IT HAS BEEN
2013 // REQUESTED TO START UP A NEW NODE. The master instructs the starting node
2014 // how to set up its log for continued execution.
2015 /*---------------------------------------------------------------------------*/
2016 void Dbdih::execSTART_MEREQ(Signal* signal)
2018 StartMeReq * req = (StartMeReq*)&signal->theData[0];
2019 jamEntry();
2020 const BlockReference Tblockref = req->startingRef;
2021 const Uint32 Tnodeid = refToNode(Tblockref);
2023 ndbrequire(isMaster());
2024 ndbrequire(c_nodeStartMaster.startNode == Tnodeid);
2025 ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING);
2027 c_nodeStartMaster.blockLcp = true;
2028 if ((c_lcpState.lcpStatus != LCP_STATUS_IDLE) &&
2029 (c_lcpState.lcpStatus != LCP_TCGET)) {
2030 jam();
2031 /*-----------------------------------------------------------------------*/
2032 // WE WILL NOT ALLOW A NODE RESTART TO COME IN WHEN A LOCAL CHECKPOINT IS
2033 // ONGOING. IT WOULD COMPLICATE THE LCP PROTOCOL TOO MUCH. WE WILL ADD THIS
2034 // LATER.
2035 /*-----------------------------------------------------------------------*/
2036 return;
2037 }//if
2038 lcpBlockedLab(signal);
2039 }//Dbdih::nodeRestartStartRecConfLab()
2041 void Dbdih::lcpBlockedLab(Signal* signal)
2043 ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)==NodeRecord::STARTING);
2044 /*------------------------------------------------------------------------*/
2045 // NOW WE HAVE COPIED ALL INFORMATION IN DICT WE ARE NOW READY TO COPY ALL
2046 // INFORMATION IN DIH TO THE NEW NODE.
2047 /*------------------------------------------------------------------------*/
2048 c_nodeStartMaster.wait = 10;
2049 signal->theData[0] = DihContinueB::ZCOPY_NODE;
2050 signal->theData[1] = 0;
2051 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
2052 c_nodeStartMaster.m_outstandingGsn = GSN_COPY_TABREQ;
2053 }//Dbdih::lcpBlockedLab()
2055 void Dbdih::nodeDictStartConfLab(Signal* signal)
2057 /*-------------------------------------------------------------------------*/
2058 // NOW WE HAVE COPIED BOTH DIH AND DICT INFORMATION. WE ARE NOW READY TO
2059 // INTEGRATE THE NODE INTO THE LCP AND GCP PROTOCOLS AND TO ALLOW UPDATES OF
2060 // THE DICTIONARY AGAIN.
2061 /*-------------------------------------------------------------------------*/
2062 c_nodeStartMaster.wait = ZFALSE;
2063 c_nodeStartMaster.blockGcp = true;
2064 if (cgcpStatus != GCP_READY) {
2065 /*-----------------------------------------------------------------------*/
2066 // The global checkpoint is executing. Wait until it is completed before we
2067 // continue processing the node recovery.
2068 /*-----------------------------------------------------------------------*/
2069 jam();
2070 return;
2071 }//if
2072 gcpBlockedLab(signal);
2074 /*-----------------------------------------------------------------*/
2075 // Report that node restart has completed copy of dictionary.
2076 /*-----------------------------------------------------------------*/
2077 signal->theData[0] = NDB_LE_NR_CopyDict;
2078 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
2079 }//Dbdih::nodeDictStartConfLab()
2081 void Dbdih::dihCopyCompletedLab(Signal* signal)
2083 BlockReference ref = calcDictBlockRef(c_nodeStartMaster.startNode);
2084 DictStartReq * req = (DictStartReq*)&signal->theData[0];
2085 req->restartGci = cnewgcp;
2086 req->senderRef = reference();
2087 sendSignal(ref, GSN_DICTSTARTREQ,
2088 signal, DictStartReq::SignalLength, JBB);
2089 c_nodeStartMaster.m_outstandingGsn = GSN_DICTSTARTREQ;
2090 c_nodeStartMaster.wait = 0;
2091 }//Dbdih::dihCopyCompletedLab()
2093 void Dbdih::gcpBlockedLab(Signal* signal)
2095 /*-----------------------------------------------------------------*/
2096 // Report that node restart has completed copy of distribution info.
2097 /*-----------------------------------------------------------------*/
2098 signal->theData[0] = NDB_LE_NR_CopyDistr;
2099 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
2102 * The node DIH will be part of LCP
2104 NodeRecordPtr nodePtr;
2105 nodePtr.i = c_nodeStartMaster.startNode;
2106 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
2107 nodePtr.p->m_inclDihLcp = true;
2109 /*-------------------------------------------------------------------------*/
2110 // NOW IT IS TIME TO INFORM ALL OTHER NODES IN THE CLUSTER OF THE STARTED
2111 // NODE SUCH THAT THEY ALSO INCLUDE THE NODE IN THE NODE LISTS AND SO FORTH.
2112 /*------------------------------------------------------------------------*/
2113 sendLoopMacro(INCL_NODEREQ, sendINCL_NODEREQ);
2114 /*-------------------------------------------------------------------------*/
2115 // We also need to send to the starting node to ensure he is aware of the
2116 // global checkpoint id and the correct state. We do not wait for any reply
2117 // since the starting node will not send any.
2118 /*-------------------------------------------------------------------------*/
2119 Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
2121 if ((getMajor(startVersion) == 4 &&
2122 startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2123 (getMajor(startVersion) == 5 &&
2124 startVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2125 (getMajor(startVersion) > 5))
2127 c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
2130 sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
2131 }//Dbdih::gcpBlockedLab()
2133 /*---------------------------------------------------------------------------*/
2134 // THIS SIGNAL IS EXECUTED IN BOTH SLAVES AND IN THE MASTER
2135 /*---------------------------------------------------------------------------*/
2136 void Dbdih::execINCL_NODECONF(Signal* signal)
2138 jamEntry();
2139 Uint32 TstartNode = signal->theData[0];
2140 Uint32 TsendNodeId_or_blockref = signal->theData[1];
2142 Uint32 blocklist[6];
2143 blocklist[0] = clocallqhblockref;
2144 blocklist[1] = clocaltcblockref;
2145 blocklist[2] = cdictblockref;
2146 blocklist[3] = numberToRef(BACKUP, getOwnNodeId());
2147 blocklist[4] = numberToRef(SUMA, getOwnNodeId());
2148 blocklist[5] = 0;
2150 for (Uint32 i = 0; blocklist[i] != 0; i++)
2152 if (TsendNodeId_or_blockref == blocklist[i])
2154 jam();
2156 if (TstartNode != c_nodeStartSlave.nodeId)
2158 jam();
2159 warningEvent("Recevied INCL_NODECONF for %u from %s"
2160 " while %u is starting",
2161 TstartNode,
2162 getBlockName(refToBlock(TsendNodeId_or_blockref)),
2163 c_nodeStartSlave.nodeId);
2164 return;
2167 if (getNodeStatus(c_nodeStartSlave.nodeId) == NodeRecord::ALIVE &&
2168 blocklist[i+1] != 0)
2171 * Send to next in block list
2173 jam();
2174 signal->theData[0] = reference();
2175 signal->theData[1] = c_nodeStartSlave.nodeId;
2176 sendSignal(blocklist[i+1], GSN_INCL_NODEREQ, signal, 2, JBB);
2177 return;
2179 else
2182 * All done, reply to master
2184 jam();
2185 signal->theData[0] = c_nodeStartSlave.nodeId;
2186 signal->theData[1] = cownNodeId;
2187 sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2189 c_nodeStartSlave.nodeId = 0;
2190 return;
2195 if (c_nodeStartMaster.startNode != TstartNode)
2197 jam();
2198 warningEvent("Recevied INCL_NODECONF for %u from %u"
2199 " while %u is starting",
2200 TstartNode,
2201 TsendNodeId_or_blockref,
2202 c_nodeStartMaster.startNode);
2203 return;
2206 ndbrequire(cmasterdihref = reference());
2207 receiveLoopMacro(INCL_NODEREQ, TsendNodeId_or_blockref);
2209 CRASH_INSERTION(7128);
2210 /*-------------------------------------------------------------------------*/
2211 // Now that we have included the starting node in the node lists in the
2212 // various blocks we are ready to start the global checkpoint protocol
2213 /*------------------------------------------------------------------------*/
2214 c_nodeStartMaster.wait = 11;
2215 c_nodeStartMaster.blockGcp = false;
2217 signal->theData[0] = reference();
2218 sendSignal(reference(), GSN_UNBLO_DICTCONF, signal, 1, JBB);
2219 }//Dbdih::execINCL_NODECONF()
2221 void Dbdih::execUNBLO_DICTCONF(Signal* signal)
2223 jamEntry();
2224 c_nodeStartMaster.wait = ZFALSE;
2225 if (!c_nodeStartMaster.activeState) {
2226 jam();
2227 return;
2228 }//if
2230 CRASH_INSERTION(7129);
2231 /**-----------------------------------------------------------------------
2232 * WE HAVE NOW PREPARED IT FOR INCLUSION IN THE LCP PROTOCOL.
2233 * WE CAN NOW START THE LCP PROTOCOL AGAIN.
2234 * WE HAVE ALSO MADE THIS FOR THE GCP PROTOCOL.
2235 * WE ARE READY TO START THE PROTOCOLS AND RESPOND TO THE START REQUEST
2236 * FROM THE STARTING NODE.
2237 *------------------------------------------------------------------------*/
2239 StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
2241 const Uint32 wordPerSignal = StartMeConf::DATA_SIZE;
2242 const int noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
2243 wordPerSignal);
2245 startMe->startingNodeId = c_nodeStartMaster.startNode;
2246 startMe->startWord = 0;
2248 const Uint32 ref = calcDihBlockRef(c_nodeStartMaster.startNode);
2249 for(int i = 0; i < noOfSignals; i++){
2250 jam();
2251 { // Do copy
2252 const int startWord = startMe->startWord;
2253 for(Uint32 j = 0; j < wordPerSignal; j++){
2254 startMe->data[j] = sysfileData[j+startWord];
2257 sendSignal(ref, GSN_START_MECONF, signal, StartMeConf::SignalLength, JBB);
2258 startMe->startWord += wordPerSignal;
2259 }//for
2260 c_nodeStartMaster.m_outstandingGsn = GSN_START_MECONF;
2261 }//Dbdih::execUNBLO_DICTCONF()
2263 /*---------------------------------------------------------------------------*/
2264 /* NODE RESTART COPY REQUEST */
2265 /*---------------------------------------------------------------------------*/
2266 // A NODE RESTART HAS REACHED ITS FINAL PHASE WHEN THE DATA IS TO BE COPIED
2267 // TO THE NODE. START_COPYREQ IS EXECUTED BY THE MASTER NODE.
2268 /*---------------------------------------------------------------------------*/
2269 void Dbdih::execSTART_COPYREQ(Signal* signal)
2271 jamEntry();
2272 Uint32 startNodeId = signal->theData[0];
2273 //BlockReference startingRef = signal->theData[1];
2274 ndbrequire(c_nodeStartMaster.startNode == startNodeId);
2275 /*-------------------------------------------------------------------------*/
2276 // REPORT Copy process of node restart is now about to start up.
2277 /*-------------------------------------------------------------------------*/
2278 signal->theData[0] = NDB_LE_NR_CopyFragsStarted;
2279 signal->theData[1] = startNodeId;
2280 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2282 CRASH_INSERTION(7131);
2283 nodeRestartTakeOver(signal, startNodeId);
2284 // BlockReference ref = calcQmgrBlockRef(startNodeId);
2285 // signal->theData[0] = cownNodeId;
2286 // Remove comments as soon as I open up the Qmgr block
2287 // TODO_RONM
2288 // sendSignal(ref, GSN_ALLOW_NODE_CRASHORD, signal, 1, JBB);
2289 }//Dbdih::execSTART_COPYREQ()
2291 /*---------------------------------------------------------------------------*/
2292 /* SLAVE LOGIC FOR NODE RESTART */
2293 /*---------------------------------------------------------------------------*/
2294 void Dbdih::execSTART_INFOREQ(Signal* signal)
2296 jamEntry();
2297 StartInfoReq *const req =(StartInfoReq*)&signal->theData[0];
2298 Uint32 startNode = req->startingNodeId;
2299 if (cfailurenr != req->systemFailureNo) {
2300 jam();
2301 //---------------------------------------------------------------
2302 // A failure occurred since master sent this request. We will ignore
2303 // this request since the node is already dead that is starting.
2304 //---------------------------------------------------------------
2305 return;
2306 }//if
2307 CRASH_INSERTION(7123);
2308 if (isMaster()) {
2309 jam();
2310 ndbrequire(getNodeStatus(startNode) == NodeRecord::STARTING);
2311 } else {
2312 jam();
2313 ndbrequire(getNodeStatus(startNode) == NodeRecord::DEAD);
2314 }//if
2315 if ((!getAllowNodeStart(startNode)) ||
2316 (c_nodeStartSlave.nodeId != 0) ||
2317 (ERROR_INSERTED(7124))) {
2318 jam();
2319 StartInfoRef *const ref =(StartInfoRef*)&signal->theData[0];
2320 ref->startingNodeId = startNode;
2321 ref->sendingNodeId = cownNodeId;
2322 ref->errorCode = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
2323 sendSignal(cmasterdihref, GSN_START_INFOREF, signal,
2324 StartInfoRef::SignalLength, JBB);
2325 return;
2326 }//if
2327 setNodeStatus(startNode, NodeRecord::STARTING);
2328 if (req->typeStart == NodeState::ST_INITIAL_NODE_RESTART) {
2329 jam();
2330 setAllowNodeStart(startNode, false);
2331 invalidateNodeLCP(signal, startNode, 0);
2332 } else {
2333 jam();
2334 StartInfoConf * c = (StartInfoConf*)&signal->theData[0];
2335 c->sendingNodeId = cownNodeId;
2336 c->startingNodeId = startNode;
2337 sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
2338 StartInfoConf::SignalLength, JBB);
2339 return;
2340 }//if
2341 }//Dbdih::execSTART_INFOREQ()
2343 void Dbdih::execINCL_NODEREQ(Signal* signal)
2345 jamEntry();
2346 Uint32 retRef = signal->theData[0];
2347 Uint32 nodeId = signal->theData[1];
2348 if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
2350 CLEAR_ERROR_INSERT_VALUE;
2351 sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
2352 return;
2355 Uint32 tnodeStartFailNr = signal->theData[2];
2356 currentgcp = signal->theData[4];
2357 CRASH_INSERTION(7127);
2358 cnewgcp = currentgcp;
2359 coldgcp = currentgcp - 1;
2360 if (!isMaster()) {
2361 jam();
2362 /*-----------------------------------------------------------------------*/
2363 // We don't want to change the state of the master since he can be in the
2364 // state LCP_TCGET at this time.
2365 /*-----------------------------------------------------------------------*/
2366 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
2367 }//if
2369 /*-------------------------------------------------------------------------*/
2370 // When a node is restarted we must ensure that a lcp will be run
2371 // as soon as possible and the reset the delay according to the original
2372 // configuration.
2373 // Without an initial local checkpoint the new node will not be available.
2374 /*-------------------------------------------------------------------------*/
2375 if (getOwnNodeId() == nodeId) {
2376 jam();
2377 /*-----------------------------------------------------------------------*/
2378 // We are the starting node. We came here only to set the global checkpoint
2379 // id's and the lcp status.
2380 /*-----------------------------------------------------------------------*/
2381 CRASH_INSERTION(7171);
2382 Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
2384 if ((NDB_VERSION_MAJOR == 4 &&
2385 masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2386 (NDB_VERSION_MAJOR == 5 &&
2387 masterVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2388 (NDB_VERSION_MAJOR > 5))
2390 signal->theData[0] = getOwnNodeId();
2391 signal->theData[1] = getOwnNodeId();
2392 sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2394 return;
2395 }//if
2396 if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
2397 jam();
2398 return;
2399 }//if
2400 ndbrequire(cfailurenr == tnodeStartFailNr);
2401 ndbrequire (c_nodeStartSlave.nodeId == 0);
2402 c_nodeStartSlave.nodeId = nodeId;
2404 ndbrequire (retRef == cmasterdihref);
2406 NodeRecordPtr nodePtr;
2407 nodePtr.i = nodeId;
2408 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
2410 Sysfile::ActiveStatus TsaveState = nodePtr.p->activeStatus;
2411 Uint32 TnodeGroup = nodePtr.p->nodeGroup;
2413 new (nodePtr.p) NodeRecord();
2414 nodePtr.p->nodeGroup = TnodeGroup;
2415 nodePtr.p->activeStatus = TsaveState;
2416 nodePtr.p->nodeStatus = NodeRecord::ALIVE;
2417 nodePtr.p->useInTransactions = true;
2418 nodePtr.p->m_inclDihLcp = true;
2420 removeDeadNode(nodePtr);
2421 insertAlive(nodePtr);
2422 con_lineNodes++;
2424 /*-------------------------------------------------------------------------*/
2425 // WE WILL ALSO SEND THE INCLUDE NODE REQUEST TO THE LOCAL LQH BLOCK.
2426 /*-------------------------------------------------------------------------*/
2427 signal->theData[0] = reference();
2428 signal->theData[1] = nodeId;
2429 signal->theData[2] = currentgcp;
2430 sendSignal(clocallqhblockref, GSN_INCL_NODEREQ, signal, 3, JBB);
2431 }//Dbdih::execINCL_NODEREQ()
2433 /* ------------------------------------------------------------------------- */
2434 // execINCL_NODECONF() is found in the master logic part since it is used by
2435 // both the master and the slaves.
2436 /* ------------------------------------------------------------------------- */
2438 /*****************************************************************************/
2439 /*********** TAKE OVER DECISION MODULE *************/
2440 /*****************************************************************************/
2441 // This module contains the subroutines that take the decision whether to take
2442 // over a node now or not.
2443 /* ------------------------------------------------------------------------- */
2444 /* MASTER LOGIC FOR SYSTEM RESTART */
2445 /* ------------------------------------------------------------------------- */
2446 // WE ONLY COME HERE IF WE ARE THE MASTER AND WE ARE PERFORMING A SYSTEM
2447 // RESTART. WE ALSO COME HERE DURING THIS SYSTEM RESTART ONE TIME PER NODE
2448 // THAT NEEDS TAKE OVER.
2449 /*---------------------------------------------------------------------------*/
2450 // WE CHECK IF ANY NODE NEEDS TO BE TAKEN OVER AND THE TAKE OVER HAS NOT YET
2451 // BEEN STARTED OR COMPLETED.
2452 /*---------------------------------------------------------------------------*/
2453 void
2454 Dbdih::systemRestartTakeOverLab(Signal* signal)
2456 NodeRecordPtr nodePtr;
2457 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2458 jam();
2459 ptrAss(nodePtr, nodeRecord);
2460 switch (nodePtr.p->activeStatus) {
2461 case Sysfile::NS_Active:
2462 case Sysfile::NS_ActiveMissed_1:
2463 jam();
2464 break;
2465 /*---------------------------------------------------------------------*/
2466 // WE HAVE NOT REACHED A STATE YET WHERE THIS NODE NEEDS TO BE TAKEN OVER
2467 /*---------------------------------------------------------------------*/
2468 case Sysfile::NS_ActiveMissed_2:
2469 case Sysfile::NS_NotActive_NotTakenOver:
2470 jam();
2471 /*---------------------------------------------------------------------*/
2472 // THIS NODE IS IN TROUBLE.
2473 // WE MUST SUCCEED WITH A LOCAL CHECKPOINT WITH THIS NODE TO REMOVE THE
2474 // DANGER. IF THE NODE IS NOT ALIVE THEN THIS WILL NOT BE
2475 // POSSIBLE AND WE CAN START THE TAKE OVER IMMEDIATELY IF WE HAVE ANY
2476 // NODES THAT CAN PERFORM A TAKE OVER.
2477 /*---------------------------------------------------------------------*/
2478 if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
2479 jam();
2480 Uint32 ThotSpareNode = findHotSpare();
2481 if (ThotSpareNode != RNIL) {
2482 jam();
2483 startTakeOver(signal, RNIL, ThotSpareNode, nodePtr.i);
2484 }//if
2485 } else if(nodePtr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver){
2486 jam();
2487 /*-------------------------------------------------------------------*/
2488 // NOT ACTIVE NODES THAT HAVE NOT YET BEEN TAKEN OVER NEEDS TAKE OVER
2489 // IMMEDIATELY. IF WE ARE ALIVE WE TAKE OVER OUR OWN NODE.
2490 /*-------------------------------------------------------------------*/
2491 infoEvent("Take over of node %d started",
2492 nodePtr.i);
2493 startTakeOver(signal, RNIL, nodePtr.i, nodePtr.i);
2494 }//if
2495 break;
2496 case Sysfile::NS_TakeOver:
2497 /**-------------------------------------------------------------------
2498 * WE MUST HAVE FAILED IN THE MIDDLE OF THE TAKE OVER PROCESS.
2499 * WE WILL CONCLUDE THE TAKE OVER PROCESS NOW.
2500 *-------------------------------------------------------------------*/
2501 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
2502 jam();
2503 Uint32 takeOverNode = Sysfile::getTakeOverNode(nodePtr.i,
2504 SYSFILE->takeOver);
2505 if(takeOverNode == 0){
2506 jam();
2507 warningEvent("Bug in take-over code restarting");
2508 takeOverNode = nodePtr.i;
2510 startTakeOver(signal, RNIL, nodePtr.i, takeOverNode);
2511 } else {
2512 jam();
2513 /**-------------------------------------------------------------------
2514 * We are not currently taking over, change our active status.
2515 *-------------------------------------------------------------------*/
2516 nodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
2517 setNodeRestartInfoBits();
2518 }//if
2519 break;
2520 case Sysfile::NS_HotSpare:
2521 jam();
2522 break;
2523 /*---------------------------------------------------------------------*/
2524 // WE NEED NOT TAKE OVER NODES THAT ARE HOT SPARE.
2525 /*---------------------------------------------------------------------*/
2526 case Sysfile::NS_NotDefined:
2527 jam();
2528 break;
2529 /*---------------------------------------------------------------------*/
2530 // WE NEED NOT TAKE OVER NODES THAT DO NOT EVEN EXIST IN THE CLUSTER.
2531 /*---------------------------------------------------------------------*/
2532 default:
2533 ndbrequire(false);
2534 break;
2535 }//switch
2536 }//for
2537 /*-------------------------------------------------------------------------*/
2538 /* NO TAKE OVER HAS BEEN INITIATED. */
2539 /*-------------------------------------------------------------------------*/
2540 }//Dbdih::systemRestartTakeOverLab()
2542 /*---------------------------------------------------------------------------*/
2543 // This subroutine is called as part of node restart in the master node.
2544 /*---------------------------------------------------------------------------*/
2545 void Dbdih::nodeRestartTakeOver(Signal* signal, Uint32 startNodeId)
2547 switch (getNodeActiveStatus(startNodeId)) {
2548 case Sysfile::NS_Active:
2549 case Sysfile::NS_ActiveMissed_1:
2550 case Sysfile::NS_ActiveMissed_2:
2551 jam();
2552 /*-----------------------------------------------------------------------*/
2553 // AN ACTIVE NODE HAS BEEN STARTED. THE ACTIVE NODE MUST THEN GET ALL DATA
2554 // IT HAD BEFORE ITS CRASH. WE START THE TAKE OVER IMMEDIATELY.
2555 // SINCE WE ARE AN ACTIVE NODE WE WILL TAKE OVER OUR OWN NODE THAT
2556 // PREVIOUSLY CRASHED.
2557 /*-----------------------------------------------------------------------*/
2558 startTakeOver(signal, RNIL, startNodeId, startNodeId);
2559 break;
2560 case Sysfile::NS_HotSpare:{
2561 jam();
2562 /*-----------------------------------------------------------------------*/
2563 // WHEN STARTING UP A HOT SPARE WE WILL CHECK IF ANY NODE NEEDS TO TAKEN
2564 // OVER. IF SO THEN WE WILL START THE TAKE OVER.
2565 /*-----------------------------------------------------------------------*/
2566 bool takeOverStarted = false;
2567 NodeRecordPtr nodePtr;
2568 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2569 jam();
2570 ptrAss(nodePtr, nodeRecord);
2571 if (nodePtr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver) {
2572 jam();
2573 takeOverStarted = true;
2574 startTakeOver(signal, RNIL, startNodeId, nodePtr.i);
2575 }//if
2576 }//for
2577 if (!takeOverStarted) {
2578 jam();
2579 /*-------------------------------------------------------------------*/
2580 // NO TAKE OVER WAS NEEDED AT THE MOMENT WE START-UP AND WAIT UNTIL A
2581 // TAKE OVER IS NEEDED.
2582 /*-------------------------------------------------------------------*/
2583 BlockReference ref = calcDihBlockRef(startNodeId);
2584 signal->theData[0] = startNodeId;
2585 sendSignal(ref, GSN_START_COPYCONF, signal, 1, JBB);
2586 }//if
2587 break;
2589 case Sysfile::NS_NotActive_NotTakenOver:
2590 jam();
2591 /*-----------------------------------------------------------------------*/
2592 // ALL DATA IN THE NODE IS LOST BUT WE HAVE NOT TAKEN OVER YET. WE WILL
2593 // TAKE OVER OUR OWN NODE
2594 /*-----------------------------------------------------------------------*/
2595 startTakeOver(signal, RNIL, startNodeId, startNodeId);
2596 break;
2597 case Sysfile::NS_TakeOver:{
2598 jam();
2599 /*--------------------------------------------------------------------
2600 * We were in the process of taking over but it was not completed.
2601 * We will complete it now instead.
2602 *--------------------------------------------------------------------*/
2603 Uint32 takeOverNode = Sysfile::getTakeOverNode(startNodeId,
2604 SYSFILE->takeOver);
2605 if(takeOverNode == 0){
2606 jam();
2607 warningEvent("Bug in take-over code restarting");
2608 takeOverNode = startNodeId;
2611 startTakeOver(signal, RNIL, startNodeId, takeOverNode);
2612 break;
2614 default:
2615 ndbrequire(false);
2616 break;
2617 }//switch
2618 nodeResetStart();
2619 }//Dbdih::nodeRestartTakeOver()
2621 /*************************************************************************/
2622 // Ths routine is called when starting a local checkpoint.
2623 /*************************************************************************/
2624 void Dbdih::checkStartTakeOver(Signal* signal)
2626 NodeRecordPtr csoNodeptr;
2627 Uint32 tcsoHotSpareNode;
2628 Uint32 tcsoTakeOverNode;
2629 if (isMaster()) {
2630 /*-----------------------------------------------------------------*/
2631 /* WE WILL ONLY START TAKE OVER IF WE ARE MASTER. */
2632 /*-----------------------------------------------------------------*/
2633 /* WE WILL ONLY START THE TAKE OVER IF THERE WERE A NEED OF */
2634 /* A TAKE OVER. */
2635 /*-----------------------------------------------------------------*/
2636 /* WE CAN ONLY PERFORM THE TAKE OVER IF WE HAVE A HOT SPARE */
2637 /* AVAILABLE. */
2638 /*-----------------------------------------------------------------*/
2639 tcsoTakeOverNode = 0;
2640 tcsoHotSpareNode = 0;
2641 for (csoNodeptr.i = 1; csoNodeptr.i < MAX_NDB_NODES; csoNodeptr.i++) {
2642 ptrAss(csoNodeptr, nodeRecord);
2643 if (csoNodeptr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver) {
2644 jam();
2645 tcsoTakeOverNode = csoNodeptr.i;
2646 } else {
2647 jam();
2648 if (csoNodeptr.p->activeStatus == Sysfile::NS_HotSpare) {
2649 jam();
2650 tcsoHotSpareNode = csoNodeptr.i;
2651 }//if
2652 }//if
2653 }//for
2654 if ((tcsoTakeOverNode != 0) &&
2655 (tcsoHotSpareNode != 0)) {
2656 jam();
2657 startTakeOver(signal, RNIL, tcsoHotSpareNode, tcsoTakeOverNode);
2658 }//if
2659 }//if
2660 }//Dbdih::checkStartTakeOver()
2662 /*****************************************************************************/
2663 /*********** NODE ADDING MODULE *************/
2664 /*********** CODE TO HANDLE TAKE OVER *************/
2665 /*****************************************************************************/
2666 // A take over can be initiated by a number of things:
2667 // 1) A node restart, usually the node takes over itself but can also take
2668 // over somebody else if its own data was already taken over
2669 // 2) At system restart it is necessary to use the take over code to recover
2670 // nodes which had too old checkpoints to be restorable by the usual
2671 // restoration from disk.
2672 // 3) When a node has missed too many local checkpoints and is decided by the
2673 // master to be taken over by a hot spare node that sits around waiting
2674 // for this to happen.
2676 // To support multiple node failures efficiently the code is written such that
2677 // only one take over can handle transitions in state but during a copy
2678 // fragment other take over's can perform state transitions.
2679 /*****************************************************************************/
2680 void Dbdih::startTakeOver(Signal* signal,
2681 Uint32 takeOverPtrI,
2682 Uint32 startNode,
2683 Uint32 nodeTakenOver)
2685 NodeRecordPtr toNodePtr;
2686 NodeGroupRecordPtr NGPtr;
2687 toNodePtr.i = nodeTakenOver;
2688 ptrCheckGuard(toNodePtr, MAX_NDB_NODES, nodeRecord);
2689 NGPtr.i = toNodePtr.p->nodeGroup;
2690 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
2691 TakeOverRecordPtr takeOverPtr;
2692 if (takeOverPtrI == RNIL) {
2693 jam();
2694 setAllowNodeStart(startNode, false);
2695 seizeTakeOver(takeOverPtr);
2696 if (startNode == c_nodeStartMaster.startNode) {
2697 jam();
2698 takeOverPtr.p->toNodeRestart = true;
2699 }//if
2700 takeOverPtr.p->toStartingNode = startNode;
2701 takeOverPtr.p->toFailedNode = nodeTakenOver;
2702 } else {
2703 jam();
2704 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
2705 ndbrequire(takeOverPtr.p->toStartingNode == startNode);
2706 ndbrequire(takeOverPtr.p->toFailedNode == nodeTakenOver);
2707 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_WAIT_START_TAKE_OVER);
2708 }//if
2709 if ((NGPtr.p->activeTakeOver) || (ERROR_INSERTED(7157))) {
2710 jam();
2711 /**------------------------------------------------------------------------
2712 * A take over is already active in this node group. We only allow one
2713 * take over per node group. Otherwise we will overload the node group and
2714 * also we will require much more checks when starting up copying of
2715 * fragments. The parallelism for take over is mainly to ensure that we
2716 * can handle take over efficiently in large systems with 4 nodes and above
2717 * A typical case is a 8 node system executing on two 8-cpu boxes.
2718 * A box crash in one of the boxes will mean 4 nodes crashes.
2719 * We want to be able to restart those four nodes to some
2720 * extent in parallel.
2722 * We will wait for a few seconds and then try again.
2724 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_START_TAKE_OVER;
2725 signal->theData[0] = DihContinueB::ZSTART_TAKE_OVER;
2726 signal->theData[1] = takeOverPtr.i;
2727 signal->theData[2] = startNode;
2728 signal->theData[3] = nodeTakenOver;
2729 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 5000, 4);
2730 return;
2731 }//if
2732 NGPtr.p->activeTakeOver = true;
2733 if (startNode == nodeTakenOver) {
2734 jam();
2735 switch (getNodeActiveStatus(nodeTakenOver)) {
2736 case Sysfile::NS_Active:
2737 case Sysfile::NS_ActiveMissed_1:
2738 case Sysfile::NS_ActiveMissed_2:
2739 jam();
2740 break;
2741 case Sysfile::NS_NotActive_NotTakenOver:
2742 case Sysfile::NS_TakeOver:
2743 jam();
2744 setNodeActiveStatus(nodeTakenOver, Sysfile::NS_TakeOver);
2745 break;
2746 default:
2747 ndbrequire(false);
2748 }//switch
2749 } else {
2750 jam();
2751 setNodeActiveStatus(nodeTakenOver, Sysfile::NS_HotSpare);
2752 setNodeActiveStatus(startNode, Sysfile::NS_TakeOver);
2753 changeNodeGroups(startNode, nodeTakenOver);
2754 }//if
2755 setNodeRestartInfoBits();
2756 /* ---------------------------------------------------------------------- */
2757 /* WE SET THE RESTART INFORMATION TO INDICATE THAT WE ARE ABOUT TO TAKE */
2758 /* OVER THE FAILED NODE. WE SET THIS INFORMATION AND WAIT UNTIL THE */
2759 /* GLOBAL CHECKPOINT HAS WRITTEN THE RESTART INFORMATION. */
2760 /* ---------------------------------------------------------------------- */
2761 Sysfile::setTakeOverNode(takeOverPtr.p->toFailedNode, SYSFILE->takeOver,
2762 startNode);
2763 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_START_COPY;
2765 if (getNodeState().getSystemRestartInProgress())
2767 jam();
2768 checkToCopy();
2769 checkToCopyCompleted(signal);
2770 return;
2772 cstartGcpNow = true;
2773 }//Dbdih::startTakeOver()
2775 void Dbdih::changeNodeGroups(Uint32 startNode, Uint32 nodeTakenOver)
2777 NodeRecordPtr startNodePtr;
2778 NodeRecordPtr toNodePtr;
2779 startNodePtr.i = startNode;
2780 ptrCheckGuard(startNodePtr, MAX_NDB_NODES, nodeRecord);
2781 toNodePtr.i = nodeTakenOver;
2782 ptrCheckGuard(toNodePtr, MAX_NDB_NODES, nodeRecord);
2783 ndbrequire(startNodePtr.p->nodeGroup == ZNIL);
2784 NodeGroupRecordPtr NGPtr;
2786 NGPtr.i = toNodePtr.p->nodeGroup;
2787 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
2788 bool nodeFound = false;
2789 for (Uint32 i = 0; i < NGPtr.p->nodeCount; i++) {
2790 jam();
2791 if (NGPtr.p->nodesInGroup[i] == nodeTakenOver) {
2792 jam();
2793 NGPtr.p->nodesInGroup[i] = startNode;
2794 nodeFound = true;
2795 }//if
2796 }//for
2797 ndbrequire(nodeFound);
2798 Sysfile::setNodeGroup(startNodePtr.i, SYSFILE->nodeGroups, toNodePtr.p->nodeGroup);
2799 startNodePtr.p->nodeGroup = toNodePtr.p->nodeGroup;
2800 Sysfile::setNodeGroup(toNodePtr.i, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
2801 toNodePtr.p->nodeGroup = ZNIL;
2802 }//Dbdih::changeNodeGroups()
2804 void Dbdih::checkToCopy()
2806 TakeOverRecordPtr takeOverPtr;
2807 for (takeOverPtr.i = 0;takeOverPtr.i < MAX_NDB_NODES; takeOverPtr.i++) {
2808 ptrAss(takeOverPtr, takeOverRecord);
2809 /*----------------------------------------------------------------------*/
2810 // TAKE OVER HANDLING WRITES RESTART INFORMATION THROUGH
2811 // THE GLOBAL CHECKPOINT
2812 // PROTOCOL. WE CHECK HERE BEFORE STARTING A WRITE OF THE RESTART
2813 // INFORMATION.
2814 /*-----------------------------------------------------------------------*/
2815 if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_START_COPY) {
2816 jam();
2817 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_START_COPY_ONGOING;
2818 } else if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_END_COPY) {
2819 jam();
2820 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_END_COPY_ONGOING;
2821 }//if
2822 }//for
2823 }//Dbdih::checkToCopy()
2825 void Dbdih::checkToCopyCompleted(Signal* signal)
2827 /* ------------------------------------------------------------------------*/
2828 /* WE CHECK HERE IF THE WRITING OF TAKE OVER INFORMATION ALSO HAS BEEN */
2829 /* COMPLETED. */
2830 /* ------------------------------------------------------------------------*/
2831 TakeOverRecordPtr toPtr;
2832 for (toPtr.i = 0; toPtr.i < MAX_NDB_NODES; toPtr.i++) {
2833 ptrAss(toPtr, takeOverRecord);
2834 if (toPtr.p->toMasterStatus == TakeOverRecord::TO_START_COPY_ONGOING){
2835 jam();
2836 sendStartTo(signal, toPtr.i);
2837 } else if (toPtr.p->toMasterStatus == TakeOverRecord::TO_END_COPY_ONGOING){
2838 jam();
2839 sendEndTo(signal, toPtr.i);
2840 } else {
2841 jam();
2842 }//if
2843 }//for
2844 }//Dbdih::checkToCopyCompleted()
2846 bool Dbdih::checkToInterrupted(TakeOverRecordPtr& takeOverPtr)
2848 if (checkNodeAlive(takeOverPtr.p->toStartingNode)) {
2849 jam();
2850 return false;
2851 } else {
2852 jam();
2853 endTakeOver(takeOverPtr.i);
2854 return true;
2855 }//if
2856 }//Dbdih::checkToInterrupted()
2858 void Dbdih::sendStartTo(Signal* signal, Uint32 takeOverPtrI)
2860 TakeOverRecordPtr takeOverPtr;
2861 CRASH_INSERTION(7155);
2862 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
2863 if ((c_startToLock != RNIL) || (ERROR_INSERTED(7158))) {
2864 jam();
2865 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_START;
2866 signal->theData[0] = DihContinueB::ZSEND_START_TO;
2867 signal->theData[1] = takeOverPtrI;
2868 signal->theData[2] = takeOverPtr.p->toStartingNode;
2869 signal->theData[3] = takeOverPtr.p->toFailedNode;
2870 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 4);
2871 return;
2872 }//if
2873 c_startToLock = takeOverPtrI;
2875 takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING;
2876 StartToReq * const req = (StartToReq *)&signal->theData[0];
2877 req->userPtr = takeOverPtr.i;
2878 req->userRef = reference();
2879 req->startingNodeId = takeOverPtr.p->toStartingNode;
2880 req->nodeTakenOver = takeOverPtr.p->toFailedNode;
2881 req->nodeRestart = takeOverPtr.p->toNodeRestart;
2882 sendLoopMacro(START_TOREQ, sendSTART_TOREQ);
2883 }//Dbdih::sendStartTo()
2885 void Dbdih::execSTART_TOREQ(Signal* signal)
2887 TakeOverRecordPtr takeOverPtr;
2888 jamEntry();
2889 const StartToReq * const req = (StartToReq *)&signal->theData[0];
2890 takeOverPtr.i = req->userPtr;
2891 BlockReference ref = req->userRef;
2892 Uint32 startingNode = req->startingNodeId;
2894 CRASH_INSERTION(7133);
2895 RETURN_IF_NODE_NOT_ALIVE(req->startingNodeId);
2896 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
2897 allocateTakeOver(takeOverPtr);
2898 initStartTakeOver(req, takeOverPtr);
2900 StartToConf * const conf = (StartToConf *)&signal->theData[0];
2901 conf->userPtr = takeOverPtr.i;
2902 conf->sendingNodeId = cownNodeId;
2903 conf->startingNodeId = startingNode;
2904 sendSignal(ref, GSN_START_TOCONF, signal, StartToConf::SignalLength, JBB);
2905 }//Dbdih::execSTART_TOREQ()
2907 void Dbdih::execSTART_TOCONF(Signal* signal)
2909 TakeOverRecordPtr takeOverPtr;
2910 jamEntry();
2911 const StartToConf * const conf = (StartToConf *)&signal->theData[0];
2913 CRASH_INSERTION(7147);
2915 RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
2917 takeOverPtr.i = conf->userPtr;
2918 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
2919 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::STARTING);
2920 ndbrequire(takeOverPtr.p->toStartingNode == conf->startingNodeId);
2921 receiveLoopMacro(START_TOREQ, conf->sendingNodeId);
2922 CRASH_INSERTION(7134);
2923 c_startToLock = RNIL;
2925 if (takeOverPtr.p->toNodeRestart)
2927 jam();
2928 takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING_LOCAL_FRAGMENTS;
2929 nr_start_fragments(signal, takeOverPtr);
2930 return;
2933 startNextCopyFragment(signal, takeOverPtr.i);
2934 }//Dbdih::execSTART_TOCONF()
2936 void
2937 Dbdih::nr_start_fragments(Signal* signal,
2938 TakeOverRecordPtr takeOverPtr)
2940 Uint32 loopCount = 0 ;
2941 TabRecordPtr tabPtr;
2942 while (loopCount++ < 100) {
2943 tabPtr.i = takeOverPtr.p->toCurrentTabref;
2944 if (tabPtr.i >= ctabFileSize) {
2945 jam();
2946 nr_run_redo(signal, takeOverPtr);
2947 return;
2948 }//if
2949 ptrAss(tabPtr, tabRecord);
2950 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
2951 tabPtr.p->tabStorage != TabRecord::ST_NORMAL)
2953 jam();
2954 takeOverPtr.p->toCurrentFragid = 0;
2955 takeOverPtr.p->toCurrentTabref++;
2956 continue;
2957 }//if
2958 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
2959 if (fragId >= tabPtr.p->totalfragments) {
2960 jam();
2961 takeOverPtr.p->toCurrentFragid = 0;
2962 takeOverPtr.p->toCurrentTabref++;
2963 continue;
2964 }//if
2965 FragmentstorePtr fragPtr;
2966 getFragstore(tabPtr.p, fragId, fragPtr);
2967 ReplicaRecordPtr loopReplicaPtr;
2968 loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
2969 while (loopReplicaPtr.i != RNIL) {
2970 ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
2971 if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
2972 jam();
2973 nr_start_fragment(signal, takeOverPtr, loopReplicaPtr);
2974 break;
2975 } else {
2976 jam();
2977 loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
2978 }//if
2979 }//while
2980 takeOverPtr.p->toCurrentFragid++;
2981 }//while
2982 signal->theData[0] = DihContinueB::ZTO_START_FRAGMENTS;
2983 signal->theData[1] = takeOverPtr.i;
2984 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
2987 void
2988 Dbdih::nr_start_fragment(Signal* signal,
2989 TakeOverRecordPtr takeOverPtr,
2990 ReplicaRecordPtr replicaPtr)
2992 Uint32 i, j = 0;
2993 Uint32 maxLcpId = 0;
2994 Uint32 maxLcpIndex = ~0;
2996 Uint32 restorableGCI = 0;
2998 ndbout_c("tab: %d frag: %d replicaP->nextLcp: %d",
2999 takeOverPtr.p->toCurrentTabref,
3000 takeOverPtr.p->toCurrentFragid,
3001 replicaPtr.p->nextLcp);
3003 Uint32 idx = replicaPtr.p->nextLcp;
3004 for(i = 0; i<MAX_LCP_STORED; i++, idx = nextLcpNo(idx))
3006 ndbout_c("scanning idx: %d lcpId: %d", idx, replicaPtr.p->lcpId[idx]);
3007 if (replicaPtr.p->lcpStatus[idx] == ZVALID)
3009 ndbrequire(replicaPtr.p->lcpId[idx] > maxLcpId);
3010 Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
3011 for (;j < replicaPtr.p->noCrashedReplicas; j++)
3013 ndbout_c("crashed replica: %d(%d) replicaLastGci: %d",
3015 replicaPtr.p->noCrashedReplicas,
3016 replicaPtr.p->replicaLastGci[j]);
3017 if (replicaPtr.p->replicaLastGci[j] > stopGci)
3019 maxLcpId = replicaPtr.p->lcpId[idx];
3020 maxLcpIndex = idx;
3021 restorableGCI = replicaPtr.p->replicaLastGci[j];
3022 break;
3028 if (maxLcpIndex == ~ (Uint32) 0)
3030 ndbout_c("Didnt find any LCP for node: %d tab: %d frag: %d",
3031 takeOverPtr.p->toStartingNode,
3032 takeOverPtr.p->toCurrentTabref,
3033 takeOverPtr.p->toCurrentFragid);
3034 replicaPtr.p->lcpIdStarted = 0;
3035 BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
3036 StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3037 req->userPtr = 0;
3038 req->userRef = reference();
3039 req->lcpNo = ZNIL;
3040 req->lcpId = 0;
3041 req->tableId = takeOverPtr.p->toCurrentTabref;
3042 req->fragId = takeOverPtr.p->toCurrentFragid;
3043 req->noOfLogNodes = 0;
3044 sendSignal(ref, GSN_START_FRAGREQ, signal,
3045 StartFragReq::SignalLength, JBB);
3047 else
3049 ndbout_c("Found LCP: %d(%d) maxGciStarted: %d maxGciCompleted: %d restorable: %d(%d) newestRestorableGCI: %d",
3050 maxLcpId,
3051 maxLcpIndex,
3052 replicaPtr.p->maxGciStarted[maxLcpIndex],
3053 replicaPtr.p->maxGciCompleted[maxLcpIndex],
3054 restorableGCI,
3055 SYSFILE->lastCompletedGCI[takeOverPtr.p->toStartingNode],
3056 SYSFILE->newestRestorableGCI);
3058 replicaPtr.p->lcpIdStarted = restorableGCI;
3059 BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
3060 StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3061 req->userPtr = 0;
3062 req->userRef = reference();
3063 req->lcpNo = maxLcpIndex;
3064 req->lcpId = maxLcpId;
3065 req->tableId = takeOverPtr.p->toCurrentTabref;
3066 req->fragId = takeOverPtr.p->toCurrentFragid;
3067 req->noOfLogNodes = 1;
3068 req->lqhLogNode[0] = takeOverPtr.p->toStartingNode;
3069 req->startGci[0] = replicaPtr.p->maxGciCompleted[maxLcpIndex];
3070 req->lastGci[0] = restorableGCI;
3071 sendSignal(ref, GSN_START_FRAGREQ, signal,
3072 StartFragReq::SignalLength, JBB);
3076 void
3077 Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr)
3079 takeOverPtr.p->toCurrentTabref = 0;
3080 takeOverPtr.p->toCurrentFragid = 0;
3081 sendSTART_RECREQ(signal, takeOverPtr.p->toStartingNode);
3084 void Dbdih::initStartTakeOver(const StartToReq * req,
3085 TakeOverRecordPtr takeOverPtr)
3087 takeOverPtr.p->toCurrentTabref = 0;
3088 takeOverPtr.p->toCurrentFragid = 0;
3089 takeOverPtr.p->toStartingNode = req->startingNodeId;
3090 takeOverPtr.p->toFailedNode = req->nodeTakenOver;
3091 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_STARTED;
3092 takeOverPtr.p->toCopyNode = RNIL;
3093 takeOverPtr.p->toCurrentReplica = RNIL;
3094 takeOverPtr.p->toNodeRestart = req->nodeRestart;
3095 }//Dbdih::initStartTakeOver()
3097 void Dbdih::startNextCopyFragment(Signal* signal, Uint32 takeOverPtrI)
3099 TabRecordPtr tabPtr;
3100 TakeOverRecordPtr takeOverPtr;
3101 Uint32 loopCount;
3102 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3103 takeOverPtr.p->toMasterStatus = TakeOverRecord::SELECTING_NEXT;
3104 loopCount = 0;
3105 if (ERROR_INSERTED(7159)) {
3106 loopCount = 100;
3107 }//if
3108 while (loopCount++ < 100) {
3109 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3110 if (tabPtr.i >= ctabFileSize) {
3111 jam();
3112 CRASH_INSERTION(7136);
3113 sendUpdateTo(signal, takeOverPtr.i, UpdateToReq::TO_COPY_COMPLETED);
3114 return;
3115 }//if
3116 ptrAss(tabPtr, tabRecord);
3117 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
3118 jam();
3119 takeOverPtr.p->toCurrentFragid = 0;
3120 takeOverPtr.p->toCurrentTabref++;
3121 continue;
3122 }//if
3123 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
3124 if (fragId >= tabPtr.p->totalfragments) {
3125 jam();
3126 takeOverPtr.p->toCurrentFragid = 0;
3127 takeOverPtr.p->toCurrentTabref++;
3128 if (ERROR_INSERTED(7135)) {
3129 if (takeOverPtr.p->toCurrentTabref == 1) {
3130 ndbrequire(false);
3131 }//if
3132 }//if
3133 continue;
3134 }//if
3135 FragmentstorePtr fragPtr;
3136 getFragstore(tabPtr.p, fragId, fragPtr);
3137 ReplicaRecordPtr loopReplicaPtr;
3138 loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
3139 while (loopReplicaPtr.i != RNIL) {
3140 ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
3141 if (loopReplicaPtr.p->procNode == takeOverPtr.p->toFailedNode) {
3142 jam();
3143 /* ----------------------------------------------------------------- */
3144 /* WE HAVE FOUND A REPLICA THAT BELONGED THE FAILED NODE THAT NEEDS */
3145 /* TAKE OVER. WE TAKE OVER THIS REPLICA TO THE NEW NODE. */
3146 /* ----------------------------------------------------------------- */
3147 takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
3148 toCopyFragLab(signal, takeOverPtr.i);
3149 return;
3150 } else if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
3151 jam();
3152 /* ----------------------------------------------------------------- */
3153 /* WE HAVE OBVIOUSLY STARTED TAKING OVER THIS WITHOUT COMPLETING IT. */
3154 /* WE */
3155 /* NEED TO COMPLETE THE TAKE OVER OF THIS REPLICA. */
3156 /* ----------------------------------------------------------------- */
3157 takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
3158 toCopyFragLab(signal, takeOverPtr.i);
3159 return;
3160 } else {
3161 jam();
3162 loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
3163 }//if
3164 }//while
3165 takeOverPtr.p->toCurrentFragid++;
3166 }//while
3167 signal->theData[0] = DihContinueB::ZTO_START_COPY_FRAG;
3168 signal->theData[1] = takeOverPtr.i;
3169 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
3170 }//Dbdih::startNextCopyFragment()
3172 void Dbdih::toCopyFragLab(Signal* signal,
3173 Uint32 takeOverPtrI)
3175 TakeOverRecordPtr takeOverPtr;
3176 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3179 * Inform starting node that TakeOver is about to start
3181 Uint32 nodeId = takeOverPtr.p->toStartingNode;
3183 Uint32 version = getNodeInfo(nodeId).m_version;
3184 if (ndb_check_prep_copy_frag_version(version))
3186 jam();
3187 TabRecordPtr tabPtr;
3188 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3189 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3191 FragmentstorePtr fragPtr;
3192 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3193 Uint32 nodes[MAX_REPLICAS];
3194 extractNodeInfo(fragPtr.p, nodes);
3196 PrepareCopyFragReq* req= (PrepareCopyFragReq*)signal->getDataPtrSend();
3197 req->senderRef = reference();
3198 req->senderData = takeOverPtrI;
3199 req->tableId = takeOverPtr.p->toCurrentTabref;
3200 req->fragId = takeOverPtr.p->toCurrentFragid;
3201 req->copyNodeId = nodes[0]; // Src
3202 req->startingNodeId = takeOverPtr.p->toStartingNode; // Dst
3203 Uint32 ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
3205 sendSignal(ref, GSN_PREPARE_COPY_FRAG_REQ, signal,
3206 PrepareCopyFragReq::SignalLength, JBB);
3208 takeOverPtr.p->toMasterStatus = TakeOverRecord::PREPARE_COPY;
3209 return;
3212 takeOverPtr.p->maxPage = RNIL;
3213 toStartCopyFrag(signal, takeOverPtr);
3216 void
3217 Dbdih::execPREPARE_COPY_FRAG_REF(Signal* signal)
3219 jamEntry();
3220 PrepareCopyFragRef ref = *(PrepareCopyFragRef*)signal->getDataPtr();
3222 TakeOverRecordPtr takeOverPtr;
3223 RETURN_IF_TAKE_OVER_INTERRUPTED(ref.senderData, takeOverPtr);
3225 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::PREPARE_COPY);
3228 * Treat this as copy frag ref
3230 CopyFragRef * cfref = (CopyFragRef*)signal->getDataPtrSend();
3231 cfref->userPtr = ref.senderData;
3232 cfref->startingNodeId = ref.startingNodeId;
3233 cfref->errorCode = ref.errorCode;
3234 cfref->tableId = ref.tableId;
3235 cfref->fragId = ref.fragId;
3236 cfref->sendingNodeId = ref.copyNodeId;
3237 takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
3238 execCOPY_FRAGREF(signal);
3241 void
3242 Dbdih::execPREPARE_COPY_FRAG_CONF(Signal* signal)
3244 PrepareCopyFragConf conf = *(PrepareCopyFragConf*)signal->getDataPtr();
3246 TakeOverRecordPtr takeOverPtr;
3247 RETURN_IF_TAKE_OVER_INTERRUPTED(conf.senderData, takeOverPtr);
3249 Uint32 version = getNodeInfo(refToNode(conf.senderRef)).m_version;
3250 if (ndb_check_prep_copy_frag_version(version) >= 2)
3252 jam();
3253 takeOverPtr.p->maxPage = conf.maxPageNo;
3255 else
3257 jam();
3258 takeOverPtr.p->maxPage = RNIL;
3260 toStartCopyFrag(signal, takeOverPtr);
3263 void
3264 Dbdih::toStartCopyFrag(Signal* signal, TakeOverRecordPtr takeOverPtr)
3266 CreateReplicaRecordPtr createReplicaPtr;
3267 createReplicaPtr.i = 0;
3268 ptrAss(createReplicaPtr, createReplicaRecord);
3270 ReplicaRecordPtr replicaPtr;
3271 replicaPtr.i = takeOverPtr.p->toCurrentReplica;
3272 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
3274 TabRecordPtr tabPtr;
3275 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3276 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3277 /* ----------------------------------------------------------------------- */
3278 /* WE HAVE FOUND A REPLICA THAT NEEDS TAKE OVER. WE WILL START THIS TAKE */
3279 /* OVER BY ADDING THE FRAGMENT WHEREAFTER WE WILL ORDER THE PRIMARY */
3280 /* REPLICA TO COPY ITS CONTENT TO THE NEW STARTING REPLICA. */
3281 /* THIS OPERATION IS A SINGLE USER OPERATION UNTIL WE HAVE SENT */
3282 /* COPY_FRAGREQ. AFTER SENDING COPY_FRAGREQ WE ARE READY TO START A NEW */
3283 /* FRAGMENT REPLICA. WE WILL NOT IMPLEMENT THIS IN THE FIRST PHASE. */
3284 /* ----------------------------------------------------------------------- */
3285 cnoOfCreateReplicas = 1;
3286 createReplicaPtr.p->hotSpareUse = true;
3287 createReplicaPtr.p->dataNodeId = takeOverPtr.p->toStartingNode;
3289 prepareSendCreateFragReq(signal, takeOverPtr.i);
3290 }//Dbdih::toStartCopy()
3292 void Dbdih::prepareSendCreateFragReq(Signal* signal, Uint32 takeOverPtrI)
3294 TakeOverRecordPtr takeOverPtr;
3295 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3297 TabRecordPtr tabPtr;
3298 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3299 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3300 FragmentstorePtr fragPtr;
3302 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3303 Uint32 nodes[MAX_REPLICAS];
3304 extractNodeInfo(fragPtr.p, nodes);
3305 takeOverPtr.p->toCopyNode = nodes[0];
3306 sendCreateFragReq(signal, 0, CreateFragReq::STORED, takeOverPtr.i);
3307 }//Dbdih::prepareSendCreateFragReq()
3309 void Dbdih::sendCreateFragReq(Signal* signal,
3310 Uint32 startGci,
3311 Uint32 replicaType,
3312 Uint32 takeOverPtrI)
3314 TakeOverRecordPtr takeOverPtr;
3315 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3316 if ((c_createFragmentLock != RNIL) ||
3317 ((ERROR_INSERTED(7161))&&(replicaType == CreateFragReq::STORED)) ||
3318 ((ERROR_INSERTED(7162))&&(replicaType == CreateFragReq::COMMIT_STORED))){
3319 if (replicaType == CreateFragReq::STORED) {
3320 jam();
3321 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_PREPARE_CREATE;
3322 } else {
3323 ndbrequire(replicaType == CreateFragReq::COMMIT_STORED);
3324 jam();
3325 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_COMMIT_CREATE;
3326 }//if
3327 signal->theData[0] = DihContinueB::ZSEND_CREATE_FRAG;
3328 signal->theData[1] = takeOverPtr.i;
3329 signal->theData[2] = replicaType;
3330 signal->theData[3] = startGci;
3331 signal->theData[4] = takeOverPtr.p->toStartingNode;
3332 signal->theData[5] = takeOverPtr.p->toFailedNode;
3333 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 50, 6);
3334 return;
3335 }//if
3336 c_createFragmentLock = takeOverPtr.i;
3337 sendLoopMacro(CREATE_FRAGREQ, nullRoutine);
3339 CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
3340 req->userPtr = takeOverPtr.i;
3341 req->userRef = reference();
3342 req->tableId = takeOverPtr.p->toCurrentTabref;
3343 req->fragId = takeOverPtr.p->toCurrentFragid;
3344 req->startingNodeId = takeOverPtr.p->toStartingNode;
3345 req->copyNodeId = takeOverPtr.p->toCopyNode;
3346 req->startGci = startGci;
3347 req->replicaType = replicaType;
3349 NodeRecordPtr nodePtr;
3350 nodePtr.i = cfirstAliveNode;
3351 do {
3352 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3353 BlockReference ref = calcDihBlockRef(nodePtr.i);
3354 sendSignal(ref, GSN_CREATE_FRAGREQ, signal,
3355 CreateFragReq::SignalLength, JBB);
3356 nodePtr.i = nodePtr.p->nextNode;
3357 } while (nodePtr.i != RNIL);
3359 if (replicaType == CreateFragReq::STORED) {
3360 jam();
3361 takeOverPtr.p->toMasterStatus = TakeOverRecord::PREPARE_CREATE;
3362 } else {
3363 ndbrequire(replicaType == CreateFragReq::COMMIT_STORED);
3364 jam();
3365 takeOverPtr.p->toMasterStatus = TakeOverRecord::COMMIT_CREATE;
3367 }//Dbdih::sendCreateFragReq()
3369 /* --------------------------------------------------------------------------*/
3370 /* AN ORDER TO START OR COMMIT THE REPLICA CREATION ARRIVED FROM THE */
3371 /* MASTER. */
3372 /* --------------------------------------------------------------------------*/
3373 void Dbdih::execCREATE_FRAGREQ(Signal* signal)
3375 jamEntry();
3376 CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
3378 TakeOverRecordPtr takeOverPtr;
3379 takeOverPtr.i = req->userPtr;
3380 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3382 BlockReference retRef = req->userRef;
3384 TabRecordPtr tabPtr;
3385 tabPtr.i = req->tableId;
3386 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3388 Uint32 fragId = req->fragId;
3389 Uint32 tdestNodeid = req->startingNodeId;
3390 Uint32 tsourceNodeid = req->copyNodeId;
3391 Uint32 startGci = req->startGci;
3392 Uint32 replicaType = req->replicaType;
3394 FragmentstorePtr fragPtr;
3395 getFragstore(tabPtr.p, fragId, fragPtr);
3396 RETURN_IF_NODE_NOT_ALIVE(tdestNodeid);
3397 ReplicaRecordPtr frReplicaPtr;
3398 findToReplica(takeOverPtr.p, replicaType, fragPtr, frReplicaPtr);
3399 ndbrequire(frReplicaPtr.i != RNIL);
3401 switch (replicaType) {
3402 case CreateFragReq::STORED:
3403 jam();
3404 CRASH_INSERTION(7138);
3405 /* ----------------------------------------------------------------------*/
3406 /* HERE WE ARE INSERTING THE NEW BACKUP NODE IN THE EXECUTION OF ALL */
3407 /* OPERATIONS. FROM HERE ON ALL OPERATIONS ON THIS FRAGMENT WILL INCLUDE*/
3408 /* USE OF THE NEW REPLICA. */
3409 /* --------------------------------------------------------------------- */
3410 insertBackup(fragPtr, tdestNodeid);
3411 takeOverPtr.p->toCopyNode = tsourceNodeid;
3412 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_CREATE_PREPARE;
3414 fragPtr.p->distributionKey++;
3415 fragPtr.p->distributionKey &= 255;
3416 break;
3417 case CreateFragReq::COMMIT_STORED:
3418 jam();
3419 CRASH_INSERTION(7139);
3420 /* ----------------------------------------------------------------------*/
3421 /* HERE WE ARE MOVING THE REPLICA TO THE STORED SECTION SINCE IT IS NOW */
3422 /* FULLY LOADED WITH ALL DATA NEEDED. */
3423 // We also update the order of the replicas here so that if the new
3424 // replica is the desired primary we insert it as primary.
3425 /* ----------------------------------------------------------------------*/
3426 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_CREATE_COMMIT;
3427 removeOldStoredReplica(fragPtr, frReplicaPtr);
3428 linkStoredReplica(fragPtr, frReplicaPtr);
3429 updateNodeInfo(fragPtr);
3430 break;
3431 default:
3432 ndbrequire(false);
3433 break;
3434 }//switch
3436 /* ------------------------------------------------------------------------*/
3437 /* THE NEW NODE OF THIS REPLICA IS THE STARTING NODE. */
3438 /* ------------------------------------------------------------------------*/
3439 if (frReplicaPtr.p->procNode != takeOverPtr.p->toStartingNode) {
3440 jam();
3441 /* ---------------------------------------------------------------------*/
3442 /* IF WE ARE STARTING A TAKE OVER NODE WE MUST INVALIDATE ALL LCP'S. */
3443 /* OTHERWISE WE WILL TRY TO START LCP'S THAT DO NOT EXIST. */
3444 /* ---------------------------------------------------------------------*/
3445 frReplicaPtr.p->procNode = takeOverPtr.p->toStartingNode;
3446 frReplicaPtr.p->noCrashedReplicas = 0;
3447 frReplicaPtr.p->createGci[0] = startGci;
3448 ndbrequire(startGci != 0xF1F1F1F1);
3449 frReplicaPtr.p->replicaLastGci[0] = (Uint32)-1;
3450 for (Uint32 i = 0; i < MAX_LCP_STORED; i++) {
3451 frReplicaPtr.p->lcpStatus[i] = ZINVALID;
3452 }//for
3453 } else {
3454 jam();
3455 const Uint32 noCrashed = frReplicaPtr.p->noCrashedReplicas;
3456 arrGuard(noCrashed, 8);
3457 frReplicaPtr.p->createGci[noCrashed] = startGci;
3458 ndbrequire(startGci != 0xF1F1F1F1);
3459 frReplicaPtr.p->replicaLastGci[noCrashed] = (Uint32)-1;
3460 }//if
3461 takeOverPtr.p->toCurrentTabref = tabPtr.i;
3462 takeOverPtr.p->toCurrentFragid = fragId;
3463 CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
3464 conf->userPtr = takeOverPtr.i;
3465 conf->tableId = tabPtr.i;
3466 conf->fragId = fragId;
3467 conf->sendingNodeId = cownNodeId;
3468 conf->startingNodeId = tdestNodeid;
3469 sendSignal(retRef, GSN_CREATE_FRAGCONF, signal,
3470 CreateFragConf::SignalLength, JBB);
3471 }//Dbdih::execCREATE_FRAGREQ()
3473 void Dbdih::execCREATE_FRAGCONF(Signal* signal)
3475 jamEntry();
3476 CRASH_INSERTION(7148);
3477 const CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
3478 Uint32 fragId = conf->fragId;
3480 RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
3482 TabRecordPtr tabPtr;
3483 tabPtr.i = conf->tableId;
3484 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3486 TakeOverRecordPtr takeOverPtr;
3487 takeOverPtr.i = conf->userPtr;
3488 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3490 ndbrequire(tabPtr.i == takeOverPtr.p->toCurrentTabref);
3491 ndbrequire(fragId == takeOverPtr.p->toCurrentFragid);
3492 receiveLoopMacro(CREATE_FRAGREQ, conf->sendingNodeId);
3493 c_createFragmentLock = RNIL;
3495 if (takeOverPtr.p->toMasterStatus == TakeOverRecord::PREPARE_CREATE) {
3496 jam();
3497 CRASH_INSERTION(7140);
3498 /* --------------------------------------------------------------------- */
3499 /* ALL NODES HAVE PREPARED THE INTRODUCTION OF THIS NEW NODE AND IT IS */
3500 /* ALREADY IN USE. WE CAN NOW START COPYING THE FRAGMENT. */
3501 /*---------------------------------------------------------------------- */
3502 FragmentstorePtr fragPtr;
3503 getFragstore(tabPtr.p, fragId, fragPtr);
3504 Uint32 gci = 0;
3505 if (takeOverPtr.p->toNodeRestart)
3507 ReplicaRecordPtr replicaPtr;
3508 findReplica(replicaPtr, fragPtr.p, takeOverPtr.p->toStartingNode, true);
3509 gci = replicaPtr.p->lcpIdStarted;
3510 replicaPtr.p->lcpIdStarted = 0;
3512 takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
3513 BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toCopyNode);
3514 CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
3515 copyFragReq->userPtr = takeOverPtr.i;
3516 copyFragReq->userRef = reference();
3517 copyFragReq->tableId = tabPtr.i;
3518 copyFragReq->fragId = fragId;
3519 copyFragReq->nodeId = takeOverPtr.p->toStartingNode;
3520 copyFragReq->schemaVersion = tabPtr.p->schemaVersion;
3521 copyFragReq->distributionKey = fragPtr.p->distributionKey;
3522 copyFragReq->gci = gci;
3523 Uint32 len = copyFragReq->nodeCount =
3524 extractNodeInfo(fragPtr.p,
3525 copyFragReq->nodeList);
3526 copyFragReq->nodeList[len] = takeOverPtr.p->maxPage;
3527 sendSignal(ref, GSN_COPY_FRAGREQ, signal,
3528 CopyFragReq::SignalLength + len, JBB);
3529 } else {
3530 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COMMIT_CREATE);
3531 jam();
3532 CRASH_INSERTION(7141);
3533 /* --------------------------------------------------------------------- */
3534 // REPORT that copy of fragment has been completed.
3535 /* --------------------------------------------------------------------- */
3536 signal->theData[0] = NDB_LE_NR_CopyFragDone;
3537 signal->theData[1] = takeOverPtr.p->toStartingNode;
3538 signal->theData[2] = tabPtr.i;
3539 signal->theData[3] = takeOverPtr.p->toCurrentFragid;
3540 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
3541 /* --------------------------------------------------------------------- */
3542 /* WE HAVE NOW CREATED THIS NEW REPLICA AND WE ARE READY TO TAKE THE */
3543 /* THE NEXT REPLICA. */
3544 /* --------------------------------------------------------------------- */
3546 Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
3547 mutex.unlock(); // ignore result
3549 takeOverPtr.p->toCurrentFragid++;
3550 startNextCopyFragment(signal, takeOverPtr.i);
3551 }//if
3552 }//Dbdih::execCREATE_FRAGCONF()
3554 void Dbdih::execCOPY_FRAGREF(Signal* signal)
3556 const CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
3557 jamEntry();
3558 Uint32 takeOverPtrI = ref->userPtr;
3559 Uint32 startingNodeId = ref->startingNodeId;
3560 Uint32 errorCode = ref->errorCode;
3562 TakeOverRecordPtr takeOverPtr;
3563 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3564 ndbrequire(errorCode != ZNODE_FAILURE_ERROR);
3565 ndbrequire(ref->tableId == takeOverPtr.p->toCurrentTabref);
3566 ndbrequire(ref->fragId == takeOverPtr.p->toCurrentFragid);
3567 ndbrequire(ref->startingNodeId == takeOverPtr.p->toStartingNode);
3568 ndbrequire(ref->sendingNodeId == takeOverPtr.p->toCopyNode);
3569 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG);
3570 endTakeOver(takeOverPtrI);
3571 //--------------------------------------------------------------------------
3572 // For some reason we did not succeed in copying a fragment. We treat this
3573 // as a serious failure and crash the starting node.
3574 //--------------------------------------------------------------------------
3575 BlockReference cntrRef = calcNdbCntrBlockRef(startingNodeId);
3576 SystemError * const sysErr = (SystemError*)&signal->theData[0];
3577 sysErr->errorCode = SystemError::CopyFragRefError;
3578 sysErr->errorRef = reference();
3579 sysErr->data1 = errorCode;
3580 sysErr->data2 = 0;
3581 sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
3582 SystemError::SignalLength, JBB);
3583 return;
3584 }//Dbdih::execCOPY_FRAGREF()
3586 void Dbdih::execCOPY_FRAGCONF(Signal* signal)
3588 const CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
3589 jamEntry();
3590 CRASH_INSERTION(7142);
3592 TakeOverRecordPtr takeOverPtr;
3593 Uint32 takeOverPtrI = conf->userPtr;
3594 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3596 ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
3597 ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
3598 ndbrequire(conf->startingNodeId == takeOverPtr.p->toStartingNode);
3599 ndbrequire(conf->sendingNodeId == takeOverPtr.p->toCopyNode);
3600 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG);
3601 sendUpdateTo(signal, takeOverPtr.i,
3602 (Uint32)UpdateToReq::TO_COPY_FRAG_COMPLETED);
3603 }//Dbdih::execCOPY_FRAGCONF()
3605 void Dbdih::sendUpdateTo(Signal* signal,
3606 Uint32 takeOverPtrI, Uint32 updateState)
3608 TakeOverRecordPtr takeOverPtr;
3609 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3610 if ((c_updateToLock != RNIL) ||
3611 ((ERROR_INSERTED(7163)) &&
3612 (updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED)) ||
3613 ((ERROR_INSERTED(7169)) &&
3614 (updateState == UpdateToReq::TO_COPY_COMPLETED))) {
3615 jam();
3616 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_UPDATE_TO;
3617 signal->theData[0] = DihContinueB::ZSEND_UPDATE_TO;
3618 signal->theData[1] = takeOverPtrI;
3619 signal->theData[2] = takeOverPtr.p->toStartingNode;
3620 signal->theData[3] = takeOverPtr.p->toFailedNode;
3621 signal->theData[4] = updateState;
3622 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 5);
3623 return;
3624 }//if
3625 c_updateToLock = takeOverPtrI;
3626 if (updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED) {
3627 jam();
3628 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_UPDATE_TO;
3629 } else {
3630 jam();
3631 ndbrequire(updateState == UpdateToReq::TO_COPY_COMPLETED);
3632 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_COPY_COMPLETED;
3633 }//if
3635 UpdateToReq * const req = (UpdateToReq *)&signal->theData[0];
3636 req->userPtr = takeOverPtr.i;
3637 req->userRef = reference();
3638 req->updateState = (UpdateToReq::UpdateState)updateState;
3639 req->startingNodeId = takeOverPtr.p->toStartingNode;
3640 req->tableId = takeOverPtr.p->toCurrentTabref;
3641 req->fragmentNo = takeOverPtr.p->toCurrentFragid;
3642 sendLoopMacro(UPDATE_TOREQ, sendUPDATE_TOREQ);
3643 }//Dbdih::sendUpdateTo()
3645 void Dbdih::execUPDATE_TOREQ(Signal* signal)
3647 jamEntry();
3648 const UpdateToReq * const req = (UpdateToReq *)&signal->theData[0];
3649 BlockReference ref = req->userRef;
3650 ndbrequire(cmasterdihref == ref);
3652 CRASH_INSERTION(7154);
3653 RETURN_IF_NODE_NOT_ALIVE(req->startingNodeId);
3655 TakeOverRecordPtr takeOverPtr;
3656 takeOverPtr.i = req->userPtr;
3657 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3659 ndbrequire(req->startingNodeId == takeOverPtr.p->toStartingNode);
3660 if (req->updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED) {
3661 jam();
3662 ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_SLAVE_CREATE_PREPARE);
3663 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED;
3664 takeOverPtr.p->toCurrentTabref = req->tableId;
3665 takeOverPtr.p->toCurrentFragid = req->fragmentNo;
3666 } else {
3667 jam();
3668 ndbrequire(req->updateState == UpdateToReq::TO_COPY_COMPLETED);
3669 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_COPY_COMPLETED;
3670 setNodeCopyCompleted(takeOverPtr.p->toStartingNode, true);
3671 }//if
3674 UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
3675 conf->userPtr = takeOverPtr.i;
3676 conf->sendingNodeId = cownNodeId;
3677 conf->startingNodeId = takeOverPtr.p->toStartingNode;
3678 sendSignal(ref, GSN_UPDATE_TOCONF, signal, UpdateToConf::SignalLength, JBB);
3679 }//Dbdih::execUPDATE_TOREQ()
3681 void Dbdih::execUPDATE_TOCONF(Signal* signal)
3683 const UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
3684 CRASH_INSERTION(7152);
3686 RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
3688 TakeOverRecordPtr takeOverPtr;
3689 takeOverPtr.i = conf->userPtr;
3690 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3692 receiveLoopMacro(UPDATE_TOREQ, conf->sendingNodeId);
3693 CRASH_INSERTION(7153);
3694 c_updateToLock = RNIL;
3696 if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_COPY_COMPLETED) {
3697 jam();
3698 toCopyCompletedLab(signal, takeOverPtr);
3699 return;
3700 } else {
3701 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_UPDATE_TO);
3702 }//if
3703 TabRecordPtr tabPtr;
3704 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3705 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3707 FragmentstorePtr fragPtr;
3708 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3709 takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_ACTIVE;
3710 BlockReference lqhRef = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
3711 CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
3712 req->userPtr = takeOverPtr.i;
3713 req->userRef = reference();
3714 req->tableId = takeOverPtr.p->toCurrentTabref;
3715 req->fragId = takeOverPtr.p->toCurrentFragid;
3716 req->distributionKey = fragPtr.p->distributionKey;
3718 sendSignal(lqhRef, GSN_COPY_ACTIVEREQ, signal,
3719 CopyActiveReq::SignalLength, JBB);
3720 }//Dbdih::execUPDATE_TOCONF()
3722 void Dbdih::execCOPY_ACTIVECONF(Signal* signal)
3724 const CopyActiveConf * const conf = (CopyActiveConf *)&signal->theData[0];
3725 jamEntry();
3726 CRASH_INSERTION(7143);
3728 TakeOverRecordPtr takeOverPtr;
3729 takeOverPtr.i = conf->userPtr;
3730 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3732 ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
3733 ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
3734 ndbrequire(checkNodeAlive(conf->startingNodeId));
3735 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_ACTIVE);
3737 takeOverPtr.p->startGci = conf->startGci;
3738 takeOverPtr.p->toMasterStatus = TakeOverRecord::LOCK_MUTEX;
3740 Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
3741 Callback c = { safe_cast(&Dbdih::switchPrimaryMutex_locked), takeOverPtr.i };
3742 ndbrequire(mutex.lock(c));
3743 }//Dbdih::execCOPY_ACTIVECONF()
3745 void
3746 Dbdih::switchPrimaryMutex_locked(Signal* signal, Uint32 toPtrI, Uint32 retVal){
3747 jamEntry();
3748 ndbrequire(retVal == 0);
3750 TakeOverRecordPtr takeOverPtr;
3751 takeOverPtr.i = toPtrI;
3752 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3754 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::LOCK_MUTEX);
3756 if (!checkNodeAlive((takeOverPtr.p->toStartingNode))) {
3757 // We have mutex
3758 Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
3759 mutex.unlock(); // Ignore result
3761 c_createFragmentLock = RNIL;
3762 c_CREATE_FRAGREQ_Counter.clearWaitingFor();
3763 endTakeOver(takeOverPtr.i);
3764 return;
3767 takeOverPtr.p->toMasterStatus = TakeOverRecord::COMMIT_CREATE;
3768 sendCreateFragReq(signal, takeOverPtr.p->startGci,
3769 CreateFragReq::COMMIT_STORED, takeOverPtr.i);
3772 void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr)
3774 signal->theData[0] = NDB_LE_NR_CopyFragsCompleted;
3775 signal->theData[1] = takeOverPtr.p->toStartingNode;
3776 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3778 if (getNodeState().getSystemRestartInProgress())
3780 jam();
3781 infoEvent("Take over of node %d complete", takeOverPtr.p->toStartingNode);
3782 setNodeActiveStatus(takeOverPtr.p->toStartingNode, Sysfile::NS_Active);
3783 takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
3784 takeOverCompleted(takeOverPtr.p->toStartingNode);
3785 checkToCopy();
3786 checkToCopyCompleted(signal);
3787 return;
3790 c_lcpState.immediateLcpStart = true;
3791 takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
3793 /*-----------------------------------------------------------------------*/
3794 /* NOW WE CAN ALLOW THE NEW NODE TO PARTICIPATE IN LOCAL CHECKPOINTS. */
3795 /* WHEN THE FIRST LOCAL CHECKPOINT IS READY WE DECLARE THE TAKE OVER AS */
3796 /* COMPLETED. SINCE LOCAL CHECKPOINTS HAVE BEEN BLOCKED DURING THE COPY */
3797 /* PROCESS WE MUST ALSO START A NEW LOCAL CHECKPOINT PROCESS BY ENSURING */
3798 /* THAT IT LOOKS LIKE IT IS TIME FOR A NEW LOCAL CHECKPOINT AND BY */
3799 /* UNBLOCKING THE LOCAL CHECKPOINT AGAIN. */
3800 /* --------------------------------------------------------------------- */
3801 }//Dbdih::toCopyCompletedLab()
3803 void Dbdih::sendEndTo(Signal* signal, Uint32 takeOverPtrI)
3805 TakeOverRecordPtr takeOverPtr;
3806 CRASH_INSERTION(7156);
3807 RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
3808 if ((c_endToLock != RNIL) || (ERROR_INSERTED(7164))) {
3809 jam();
3810 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_ENDING;
3811 signal->theData[0] = DihContinueB::ZSEND_END_TO;
3812 signal->theData[1] = takeOverPtrI;
3813 signal->theData[2] = takeOverPtr.p->toStartingNode;
3814 signal->theData[3] = takeOverPtr.p->toFailedNode;
3815 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 4);
3816 return;
3817 }//if
3818 c_endToLock = takeOverPtr.i;
3819 takeOverPtr.p->toMasterStatus = TakeOverRecord::ENDING;
3820 EndToReq * const req = (EndToReq *)&signal->theData[0];
3821 req->userPtr = takeOverPtr.i;
3822 req->userRef = reference();
3823 req->startingNodeId = takeOverPtr.p->toStartingNode;
3824 sendLoopMacro(END_TOREQ, sendEND_TOREQ);
3825 }//Dbdih::sendStartTo()
3827 void Dbdih::execEND_TOREQ(Signal* signal)
3829 jamEntry();
3830 const EndToReq * const req = (EndToReq *)&signal->theData[0];
3831 BlockReference ref = req->userRef;
3832 Uint32 startingNodeId = req->startingNodeId;
3834 CRASH_INSERTION(7144);
3835 RETURN_IF_NODE_NOT_ALIVE(startingNodeId);
3837 TakeOverRecordPtr takeOverPtr;
3838 takeOverPtr.i = req->userPtr;
3839 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3841 ndbrequire(startingNodeId == takeOverPtr.p->toStartingNode);
3842 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
3844 if (!isMaster()) {
3845 jam();
3846 endTakeOver(takeOverPtr.i);
3847 }//if
3849 EndToConf * const conf = (EndToConf *)&signal->theData[0];
3850 conf->userPtr = takeOverPtr.i;
3851 conf->sendingNodeId = cownNodeId;
3852 conf->startingNodeId = startingNodeId;
3853 sendSignal(ref, GSN_END_TOCONF, signal, EndToConf::SignalLength, JBB);
3854 }//Dbdih::execEND_TOREQ()
3856 void Dbdih::execEND_TOCONF(Signal* signal)
3858 const EndToConf * const conf = (EndToConf *)&signal->theData[0];
3859 jamEntry();
3861 const Uint32 nodeId = conf->startingNodeId;
3862 CRASH_INSERTION(7145);
3864 RETURN_IF_NODE_NOT_ALIVE(nodeId);
3866 TakeOverRecordPtr takeOverPtr;
3867 takeOverPtr.i = conf->userPtr;
3868 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3870 ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::ENDING);
3871 ndbrequire(nodeId == takeOverPtr.p->toStartingNode);
3873 receiveLoopMacro(END_TOREQ, conf->sendingNodeId);
3874 CRASH_INSERTION(7146);
3875 c_endToLock = RNIL;
3877 /* -----------------------------------------------------------------------*/
3878 /* WE HAVE FINALLY COMPLETED THE TAKE OVER. WE RESET THE STATUS AND CHECK*/
3879 /* IF ANY MORE TAKE OVERS ARE NEEDED AT THE MOMENT. */
3880 /* FIRST WE CHECK IF A RESTART IS ONGOING. IN THAT CASE WE RESTART PHASE */
3881 /* 4 AND CHECK IF ANY MORE TAKE OVERS ARE NEEDED BEFORE WE START NDB */
3882 /* CLUSTER. THIS CAN ONLY HAPPEN IN A SYSTEM RESTART. */
3883 /* ---------------------------------------------------------------------- */
3884 if (takeOverPtr.p->toNodeRestart) {
3885 jam();
3886 /* ----------------------------------------------------------------------*/
3887 /* THE TAKE OVER NODE WAS A STARTING NODE. WE WILL SEND START_COPYCONF */
3888 /* TO THE STARTING NODE SUCH THAT THE NODE CAN COMPLETE THE START-UP. */
3889 /* --------------------------------------------------------------------- */
3890 BlockReference ref = calcDihBlockRef(takeOverPtr.p->toStartingNode);
3891 signal->theData[0] = takeOverPtr.p->toStartingNode;
3892 sendSignal(ref, GSN_START_COPYCONF, signal, 1,JBB);
3893 }//if
3894 endTakeOver(takeOverPtr.i);
3896 if (cstartPhase == ZNDB_SPH4) {
3897 jam();
3898 if (anyActiveTakeOver()) {
3899 jam();
3900 return;
3901 }//if
3902 ndbsttorry10Lab(signal, __LINE__);
3903 return;
3904 }//if
3905 checkStartTakeOver(signal);
3906 }//Dbdih::execEND_TOCONF()
3908 void Dbdih::allocateTakeOver(TakeOverRecordPtr& takeOverPtr)
3910 if (isMaster()) {
3911 jam();
3912 //--------------------------------------------
3913 // Master already seized the take over record.
3914 //--------------------------------------------
3915 return;
3916 }//if
3917 if (takeOverPtr.i == cfirstfreeTakeOver) {
3918 jam();
3919 seizeTakeOver(takeOverPtr);
3920 } else {
3921 TakeOverRecordPtr nextTakeOverptr;
3922 TakeOverRecordPtr prevTakeOverptr;
3923 nextTakeOverptr.i = takeOverPtr.p->nextTakeOver;
3924 prevTakeOverptr.i = takeOverPtr.p->prevTakeOver;
3925 if (prevTakeOverptr.i != RNIL) {
3926 jam();
3927 ptrCheckGuard(prevTakeOverptr, MAX_NDB_NODES, takeOverRecord);
3928 prevTakeOverptr.p->nextTakeOver = nextTakeOverptr.i;
3929 }//if
3930 if (nextTakeOverptr.i != RNIL) {
3931 jam();
3932 ptrCheckGuard(nextTakeOverptr, MAX_NDB_NODES, takeOverRecord);
3933 nextTakeOverptr.p->prevTakeOver = prevTakeOverptr.i;
3934 }//if
3935 }//if
3936 }//Dbdih::allocateTakeOver()
3938 void Dbdih::seizeTakeOver(TakeOverRecordPtr& takeOverPtr)
3940 TakeOverRecordPtr nextTakeOverptr;
3941 ndbrequire(cfirstfreeTakeOver != RNIL);
3942 takeOverPtr.i = cfirstfreeTakeOver;
3943 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3944 cfirstfreeTakeOver = takeOverPtr.p->nextTakeOver;
3945 nextTakeOverptr.i = takeOverPtr.p->nextTakeOver;
3946 if (nextTakeOverptr.i != RNIL) {
3947 jam();
3948 ptrCheckGuard(nextTakeOverptr, MAX_NDB_NODES, takeOverRecord);
3949 nextTakeOverptr.p->prevTakeOver = RNIL;
3950 }//if
3951 takeOverPtr.p->nextTakeOver = RNIL;
3952 takeOverPtr.p->prevTakeOver = RNIL;
3953 }//Dbdih::seizeTakeOver()
3955 void Dbdih::endTakeOver(Uint32 takeOverPtrI)
3957 TakeOverRecordPtr takeOverPtr;
3958 takeOverPtr.i = takeOverPtrI;
3959 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3961 if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
3962 (takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
3963 jam();
3964 NodeGroupRecordPtr NGPtr;
3965 NodeRecordPtr nodePtr;
3966 nodePtr.i = takeOverPtr.p->toStartingNode;
3967 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3968 NGPtr.i = nodePtr.p->nodeGroup;
3969 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3970 NGPtr.p->activeTakeOver = false;
3971 }//if
3972 setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
3973 initTakeOver(takeOverPtr);
3974 releaseTakeOver(takeOverPtrI);
3975 }//Dbdih::endTakeOver()
3977 void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
3979 TakeOverRecordPtr takeOverPtr;
3980 takeOverPtr.i = takeOverPtrI;
3981 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
3983 takeOverPtr.p->nextTakeOver = cfirstfreeTakeOver;
3984 cfirstfreeTakeOver = takeOverPtr.i;
3985 }//Dbdih::releaseTakeOver()
3987 void Dbdih::initTakeOver(TakeOverRecordPtr takeOverPtr)
3989 takeOverPtr.p->toCopyNode = RNIL;
3990 takeOverPtr.p->toCurrentFragid = RNIL;
3991 takeOverPtr.p->toCurrentReplica = RNIL;
3992 takeOverPtr.p->toCurrentTabref = RNIL;
3993 takeOverPtr.p->toFailedNode = RNIL;
3994 takeOverPtr.p->toStartingNode = RNIL;
3995 takeOverPtr.p->prevTakeOver = RNIL;
3996 takeOverPtr.p->nextTakeOver = RNIL;
3997 takeOverPtr.p->toNodeRestart = false;
3998 takeOverPtr.p->toMasterStatus = TakeOverRecord::IDLE;
3999 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
4000 }//Dbdih::initTakeOver()
4002 bool Dbdih::anyActiveTakeOver()
4004 TakeOverRecordPtr takeOverPtr;
4005 for (takeOverPtr.i = 0; takeOverPtr.i < MAX_NDB_NODES; takeOverPtr.i++) {
4006 ptrAss(takeOverPtr, takeOverRecord);
4007 if (takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) {
4008 jam();
4009 return true;
4010 }//if
4011 }//for
4012 return false;
4013 }//Dbdih::anyActiveTakeOver()
4015 /*****************************************************************************/
4016 /* ------------------------------------------------------------------------- */
4017 /* WE HAVE BEEN REQUESTED TO PERFORM A SYSTEM RESTART. WE START BY */
4018 /* READING THE GCI FILES. THIS REQUEST WILL ONLY BE SENT TO THE MASTER */
4019 /* DIH. THAT MEANS WE HAVE TO REPLICATE THE INFORMATION WE READ FROM */
4020 /* OUR FILES TO ENSURE THAT ALL NODES HAVE THE SAME DISTRIBUTION */
4021 /* INFORMATION. */
4022 /* ------------------------------------------------------------------------- */
4023 /*****************************************************************************/
4024 void Dbdih::readGciFileLab(Signal* signal)
4026 FileRecordPtr filePtr;
4027 filePtr.i = crestartInfoFile[0];
4028 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4029 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4031 openFileRo(signal, filePtr);
4032 }//Dbdih::readGciFileLab()
4034 void Dbdih::openingGcpLab(Signal* signal, FileRecordPtr filePtr)
4036 /* ----------------------------------------------------------------------- */
4037 /* WE HAVE SUCCESSFULLY OPENED A FILE CONTAINING INFORMATION ABOUT */
4038 /* THE GLOBAL CHECKPOINTS THAT ARE POSSIBLE TO RESTART. */
4039 /* ----------------------------------------------------------------------- */
4040 readRestorableGci(signal, filePtr);
4041 filePtr.p->reqStatus = FileRecord::READING_GCP;
4042 }//Dbdih::openingGcpLab()
4044 void Dbdih::readingGcpLab(Signal* signal, FileRecordPtr filePtr)
4046 /* ----------------------------------------------------------------------- */
4047 /* WE HAVE NOW SUCCESSFULLY MANAGED TO READ IN THE GLOBAL CHECKPOINT */
4048 /* INFORMATION FROM FILE. LATER WE WILL ADD SOME FUNCTIONALITY THAT */
4049 /* CHECKS THE RESTART TIMERS TO DEDUCE FROM WHERE TO RESTART. */
4050 /* NOW WE WILL SIMPLY RESTART FROM THE NEWEST GLOBAL CHECKPOINT */
4051 /* POSSIBLE TO RESTORE. */
4052 /* */
4053 /* BEFORE WE INVOKE DICT WE NEED TO COPY CRESTART_INFO TO ALL NODES. */
4054 /* WE ALSO COPY TO OUR OWN NODE. TO ENABLE US TO DO THIS PROPERLY WE */
4055 /* START BY CLOSING THIS FILE. */
4056 /* ----------------------------------------------------------------------- */
4057 globalData.m_restart_seq = ++SYSFILE->m_restart_seq;
4058 closeFile(signal, filePtr);
4059 filePtr.p->reqStatus = FileRecord::CLOSING_GCP;
4060 }//Dbdih::readingGcpLab()
4062 void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
4064 if (Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) == false){
4065 jam();
4066 selectMasterCandidateAndSend(signal);
4067 return;
4068 } else {
4069 jam();
4070 sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
4071 return;
4072 }//if
4073 }//Dbdih::closingGcpLab()
4075 /* ------------------------------------------------------------------------- */
4076 /* SELECT THE MASTER CANDIDATE TO BE USED IN SYSTEM RESTARTS. */
4077 /* ------------------------------------------------------------------------- */
4078 void Dbdih::selectMasterCandidateAndSend(Signal* signal)
4080 setNodeGroups();
4081 signal->theData[0] = getOwnNodeId();
4082 signal->theData[1] = SYSFILE->lastCompletedGCI[getOwnNodeId()];
4083 sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB);
4085 NodeRecordPtr nodePtr;
4086 Uint32 node_groups[MAX_NDB_NODES];
4087 memset(node_groups, 0, sizeof(node_groups));
4088 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4089 jam();
4090 const Uint32 ng = Sysfile::getNodeGroup(nodePtr.i, SYSFILE->nodeGroups);
4091 if(ng != NO_NODE_GROUP_ID){
4092 ndbrequire(ng < MAX_NDB_NODES);
4093 node_groups[ng]++;
4097 for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4098 jam();
4099 Uint32 count = node_groups[nodePtr.i];
4100 if(count != 0 && count != cnoReplicas){
4101 char buf[255];
4102 BaseString::snprintf(buf, sizeof(buf),
4103 "Illegal configuration change."
4104 " Initial start needs to be performed "
4105 " when changing no of replicas (%d != %d)",
4106 node_groups[nodePtr.i], cnoReplicas);
4107 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
4110 }//Dbdih::selectMasterCandidate()
4112 /* ------------------------------------------------------------------------- */
4113 /* ERROR HANDLING DURING READING RESTORABLE GCI FROM FILE. */
4114 /* ------------------------------------------------------------------------- */
4115 void Dbdih::openingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4117 filePtr.p->fileStatus = FileRecord::CRASHED;
4118 filePtr.p->reqStatus = FileRecord::IDLE;
4119 if (crestartInfoFile[0] == filePtr.i) {
4120 jam();
4121 /* --------------------------------------------------------------------- */
4122 /* THE FIRST FILE WAS NOT ABLE TO BE OPENED. SET STATUS TO CRASHED AND */
4123 /* TRY OPEN THE NEXT FILE. */
4124 /* --------------------------------------------------------------------- */
4125 filePtr.i = crestartInfoFile[1];
4126 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4127 openFileRo(signal, filePtr);
4128 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4129 } else {
4130 jam();
4131 /* --------------------------------------------------------------------- */
4132 /* WE FAILED IN OPENING THE SECOND FILE. BOTH FILES WERE CORRUPTED. WE */
4133 /* CANNOT CONTINUE THE RESTART IN THIS CASE. TELL NDBCNTR OF OUR */
4134 /* FAILURE. */
4135 /*---------------------------------------------------------------------- */
4136 sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
4137 return;
4138 }//if
4139 }//Dbdih::openingGcpErrorLab()
4141 void Dbdih::readingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4143 filePtr.p->fileStatus = FileRecord::CRASHED;
4144 /* ----------------------------------------------------------------------- */
4145 /* WE FAILED IN READING THE FILE AS WELL. WE WILL CLOSE THIS FILE. */
4146 /* ----------------------------------------------------------------------- */
4147 closeFile(signal, filePtr);
4148 filePtr.p->reqStatus = FileRecord::CLOSING_GCP_CRASH;
4149 }//Dbdih::readingGcpErrorLab()
4151 void Dbdih::closingGcpCrashLab(Signal* signal, FileRecordPtr filePtr)
4153 if (crestartInfoFile[0] == filePtr.i) {
4154 jam();
4155 /* --------------------------------------------------------------------- */
4156 /* ERROR IN FIRST FILE, TRY THE SECOND FILE. */
4157 /* --------------------------------------------------------------------- */
4158 filePtr.i = crestartInfoFile[1];
4159 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4160 openFileRw(signal, filePtr);
4161 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4162 return;
4163 }//if
4164 /* ----------------------------------------------------------------------- */
4165 /* WE DISCOVERED A FAILURE WITH THE SECOND FILE AS WELL. THIS IS A */
4166 /* SERIOUS PROBLEM. REPORT FAILURE TO NDBCNTR. */
4167 /* ----------------------------------------------------------------------- */
4168 sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
4169 }//Dbdih::closingGcpCrashLab()
4171 /*****************************************************************************/
4172 /* ------------------------------------------------------------------------- */
4173 /* THIS IS AN INITIAL RESTART. WE WILL CREATE THE TWO FILES DESCRIBING */
4174 /* THE GLOBAL CHECKPOINTS THAT ARE RESTORABLE. */
4175 /* ------------------------------------------------------------------------- */
4176 /*****************************************************************************/
4177 void Dbdih::initGciFilesLab(Signal* signal)
4179 FileRecordPtr filePtr;
4180 filePtr.i = crestartInfoFile[0];
4181 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4182 createFileRw(signal, filePtr);
4183 filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4184 }//Dbdih::initGciFilesLab()
4186 /* ------------------------------------------------------------------------- */
4187 /* GLOBAL CHECKPOINT FILE HAVE BEEN SUCCESSFULLY CREATED. */
4188 /* ------------------------------------------------------------------------- */
4189 void Dbdih::creatingGcpLab(Signal* signal, FileRecordPtr filePtr)
4191 if (filePtr.i == crestartInfoFile[0]) {
4192 jam();
4193 /* --------------------------------------------------------------------- */
4194 /* IF CREATED FIRST THEN ALSO CREATE THE SECOND FILE. */
4195 /* --------------------------------------------------------------------- */
4196 filePtr.i = crestartInfoFile[1];
4197 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4198 createFileRw(signal, filePtr);
4199 filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4200 } else {
4201 jam();
4202 /* --------------------------------------------------------------------- */
4203 /* BOTH FILES HAVE BEEN CREATED. NOW WRITE THE INITIAL DATA TO BOTH */
4204 /* OF THE FILES. */
4205 /* --------------------------------------------------------------------- */
4206 filePtr.i = crestartInfoFile[0];
4207 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4208 writeRestorableGci(signal, filePtr);
4209 filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4210 }//if
4211 }//Dbdih::creatingGcpLab()
4213 /* ------------------------------------------------------------------------- */
4214 /* WE HAVE SUCCESSFULLY WRITTEN A GCI FILE. */
4215 /* ------------------------------------------------------------------------- */
4216 void Dbdih::writeInitGcpLab(Signal* signal, FileRecordPtr filePtr)
4218 filePtr.p->reqStatus = FileRecord::IDLE;
4219 if (filePtr.i == crestartInfoFile[0]) {
4220 jam();
4221 /* --------------------------------------------------------------------- */
4222 /* WE HAVE WRITTEN THE FIRST FILE NOW ALSO WRITE THE SECOND FILE. */
4223 /* --------------------------------------------------------------------- */
4224 filePtr.i = crestartInfoFile[1];
4225 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4226 writeRestorableGci(signal, filePtr);
4227 filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4228 } else {
4229 /* --------------------------------------------------------------------- */
4230 /* WE HAVE WRITTEN BOTH FILES. LEAVE BOTH FILES OPEN AND CONFIRM OUR */
4231 /* PART OF THE INITIAL START. */
4232 /* --------------------------------------------------------------------- */
4233 if (isMaster()) {
4234 jam();
4235 /*---------------------------------------------------------------------*/
4236 // IN MASTER NODES THE START REQUEST IS RECEIVED FROM NDBCNTR AND WE MUST
4237 // RESPOND WHEN COMPLETED.
4238 /*---------------------------------------------------------------------*/
4239 signal->theData[0] = reference();
4240 sendSignal(cndbStartReqBlockref, GSN_NDB_STARTCONF, signal, 1, JBB);
4241 } else {
4242 jam();
4243 ndbsttorry10Lab(signal, __LINE__);
4244 return;
4245 }//if
4246 }//if
4247 }//Dbdih::writeInitGcpLab()
4249 /*****************************************************************************/
4250 /* ********** NODES DELETION MODULE *************/
4251 /*****************************************************************************/
4252 /*---------------------------------------------------------------------------*/
4253 /* LOGIC FOR NODE FAILURE */
4254 /*---------------------------------------------------------------------------*/
4255 void Dbdih::execNODE_FAILREP(Signal* signal)
4257 Uint32 i;
4258 Uint32 failedNodes[MAX_NDB_NODES];
4259 jamEntry();
4260 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4262 cfailurenr = nodeFail->failNo;
4263 Uint32 newMasterId = nodeFail->masterNodeId;
4264 const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
4266 if (ERROR_INSERTED(7179))
4268 CLEAR_ERROR_INSERT_VALUE;
4271 if (ERROR_INSERTED(7184))
4273 SET_ERROR_INSERT_VALUE(7000);
4276 /*-------------------------------------------------------------------------*/
4277 // The first step is to convert from a bit mask to an array of failed nodes.
4278 /*-------------------------------------------------------------------------*/
4279 Uint32 index = 0;
4280 for (i = 1; i < MAX_NDB_NODES; i++) {
4281 jam();
4282 if(NodeBitmask::get(nodeFail->theNodes, i)){
4283 jam();
4284 failedNodes[index] = i;
4285 index++;
4286 }//if
4287 }//for
4288 ndbrequire(noOfFailedNodes == index);
4289 ndbrequire(noOfFailedNodes - 1 < MAX_NDB_NODES);
4291 /*-------------------------------------------------------------------------*/
4292 // The second step is to update the node status of the failed nodes, remove
4293 // them from the alive node list and put them into the dead node list. Also
4294 // update the number of nodes on-line.
4295 // We also set certain state variables ensuring that the node no longer is
4296 // used in transactions and also mark that we received this signal.
4297 /*-------------------------------------------------------------------------*/
4298 for (i = 0; i < noOfFailedNodes; i++) {
4299 jam();
4300 NodeRecordPtr TNodePtr;
4301 TNodePtr.i = failedNodes[i];
4302 ptrCheckGuard(TNodePtr, MAX_NDB_NODES, nodeRecord);
4303 TNodePtr.p->useInTransactions = false;
4304 TNodePtr.p->m_inclDihLcp = false;
4305 TNodePtr.p->recNODE_FAILREP = ZTRUE;
4306 if (TNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
4307 jam();
4308 con_lineNodes--;
4309 TNodePtr.p->nodeStatus = NodeRecord::DIED_NOW;
4310 removeAlive(TNodePtr);
4311 insertDeadNode(TNodePtr);
4312 }//if
4313 }//for
4315 /*-------------------------------------------------------------------------*/
4316 // Verify that we can continue to operate the cluster. If we cannot we will
4317 // not return from checkEscalation.
4318 /*-------------------------------------------------------------------------*/
4319 checkEscalation();
4321 /*------------------------------------------------------------------------*/
4322 // Verify that a starting node has also crashed. Reset the node start record.
4323 /*-------------------------------------------------------------------------*/
4324 #if 0
4326 * Node will crash by itself...
4327 * nodeRestart is run then...
4329 if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
4331 BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
4332 SystemError * const sysErr = (SystemError*)&signal->theData[0];
4333 sysErr->errorCode = SystemError::StartInProgressError;
4334 sysErr->errorRef = reference();
4335 sysErr->data1= 0;
4336 sysErr->data2= __LINE__;
4337 sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
4338 nodeResetStart();
4339 }//if
4340 #endif
4342 /*--------------------------------------------------*/
4343 /* */
4344 /* WE CHANGE THE REFERENCE TO MASTER DIH */
4345 /* BLOCK AND POINTER AT THIS PLACE IN THE CODE*/
4346 /*--------------------------------------------------*/
4347 Uint32 oldMasterId = cmasterNodeId;
4348 BlockReference oldMasterRef = cmasterdihref;
4349 cmasterdihref = calcDihBlockRef(newMasterId);
4350 cmasterNodeId = newMasterId;
4352 const bool masterTakeOver = (oldMasterId != newMasterId);
4354 for(i = 0; i < noOfFailedNodes; i++) {
4355 NodeRecordPtr failedNodePtr;
4356 failedNodePtr.i = failedNodes[i];
4357 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
4358 Uint32 activeTakeOverPtr = findTakeOver(failedNodes[i]);
4359 if (oldMasterRef == reference()) {
4360 /*-------------------------------------------------------*/
4361 // Functions that need to be called only for master nodes.
4362 /*-------------------------------------------------------*/
4363 checkCopyTab(failedNodePtr);
4364 checkStopPermMaster(signal, failedNodePtr);
4365 checkWaitGCPMaster(signal, failedNodes[i]);
4366 checkTakeOverInMasterAllNodeFailure(signal, failedNodePtr);
4367 checkTakeOverInMasterCopyNodeFailure(signal, failedNodePtr.i);
4368 checkTakeOverInMasterStartNodeFailure(signal, activeTakeOverPtr);
4369 checkGcpOutstanding(signal, failedNodePtr.i);
4370 } else {
4371 jam();
4372 /*-----------------------------------------------------------*/
4373 // Functions that need to be called only for nodes that were
4374 // not master before these failures.
4375 /*-----------------------------------------------------------*/
4376 checkStopPermProxy(signal, failedNodes[i]);
4377 checkWaitGCPProxy(signal, failedNodes[i]);
4378 if (isMaster()) {
4379 /*-----------------------------------------------------------*/
4380 // We take over as master since old master has failed
4381 /*-----------------------------------------------------------*/
4382 handleTakeOverNewMaster(signal, activeTakeOverPtr);
4383 } else {
4384 /*-----------------------------------------------------------*/
4385 // We are not master and will not become master.
4386 /*-----------------------------------------------------------*/
4387 checkTakeOverInNonMasterStartNodeFailure(signal, activeTakeOverPtr);
4388 }//if
4389 }//if
4390 /*--------------------------------------------------*/
4391 // Functions that need to be called for all nodes.
4392 /*--------------------------------------------------*/
4393 checkStopMe(signal, failedNodePtr);
4394 failedNodeLcpHandling(signal, failedNodePtr);
4395 checkWaitDropTabFailedLqh(signal, failedNodePtr.i, 0); // 0 = start w/ tab 0
4396 startRemoveFailedNode(signal, failedNodePtr);
4399 * This is the last function called
4400 * It modifies failedNodePtr.p->nodeStatus
4402 failedNodeSynchHandling(signal, failedNodePtr);
4403 }//for
4405 if(masterTakeOver){
4406 jam();
4407 startLcpMasterTakeOver(signal, oldMasterId);
4408 startGcpMasterTakeOver(signal, oldMasterId);
4410 if(getNodeState().getNodeRestartInProgress()){
4411 jam();
4412 progError(__LINE__, NDBD_EXIT_MASTER_FAILURE_DURING_NR);
4417 if (isMaster()) {
4418 jam();
4419 setNodeRestartInfoBits();
4420 }//if
4421 }//Dbdih::execNODE_FAILREP()
4423 void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr)
4425 jam();
4427 if(c_nodeStartMaster.startNode != failedNodePtr.i){
4428 jam();
4429 return;
4432 switch(c_nodeStartMaster.m_outstandingGsn){
4433 case GSN_COPY_TABREQ:
4434 jam();
4435 ndbrequire(c_COPY_TABREQ_Counter.isWaitingFor(failedNodePtr.i));
4436 releaseTabPages(failedNodePtr.p->activeTabptr);
4437 c_COPY_TABREQ_Counter.clearWaitingFor(failedNodePtr.i);
4438 c_nodeStartMaster.wait = ZFALSE;
4439 break;
4440 case GSN_START_INFOREQ:
4441 case GSN_START_PERMCONF:
4442 case GSN_DICTSTARTREQ:
4443 case GSN_START_MECONF:
4444 jam();
4445 break;
4446 default:
4447 g_eventLogger.error("outstanding gsn: %s(%d)",
4448 getSignalName(c_nodeStartMaster.m_outstandingGsn),
4449 c_nodeStartMaster.m_outstandingGsn);
4450 ndbrequire(false);
4453 nodeResetStart();
4454 }//Dbdih::checkCopyTab()
4456 void Dbdih::checkStopMe(Signal* signal, NodeRecordPtr failedNodePtr)
4458 jam();
4459 if (c_STOP_ME_REQ_Counter.isWaitingFor(failedNodePtr.i)){
4460 jam();
4461 ndbrequire(c_stopMe.clientRef != 0);
4462 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
4463 stopMeConf->senderRef = calcDihBlockRef(failedNodePtr.i);
4464 stopMeConf->senderData = c_stopMe.clientData;
4465 sendSignal(reference(), GSN_STOP_ME_CONF, signal,
4466 StopMeConf::SignalLength, JBB);
4467 }//if
4468 }//Dbdih::checkStopMe()
4470 void Dbdih::checkStopPermMaster(Signal* signal, NodeRecordPtr failedNodePtr)
4472 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
4473 jam();
4474 if (c_DIH_SWITCH_REPLICA_REQ_Counter.isWaitingFor(failedNodePtr.i)){
4475 jam();
4476 ndbrequire(c_stopPermMaster.clientRef != 0);
4477 ref->senderNode = failedNodePtr.i;
4478 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
4479 sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REF, signal,
4480 DihSwitchReplicaRef::SignalLength, JBB);
4481 return;
4482 }//if
4483 }//Dbdih::checkStopPermMaster()
4485 void Dbdih::checkStopPermProxy(Signal* signal, NodeId failedNodeId)
4487 jam();
4488 if(c_stopPermProxy.clientRef != 0 &&
4489 refToNode(c_stopPermProxy.masterRef) == failedNodeId){
4492 * The master has failed report to proxy-client
4494 jam();
4495 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
4497 ref->senderData = c_stopPermProxy.clientData;
4498 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
4499 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
4500 c_stopPermProxy.clientRef = 0;
4501 }//if
4502 }//Dbdih::checkStopPermProxy()
4504 void
4505 Dbdih::checkTakeOverInMasterAllNodeFailure(Signal* signal,
4506 NodeRecordPtr failedNodePtr)
4508 //------------------------------------------------------------------------
4509 // This code is used to handle the failure of "all" nodes during the
4510 // take over when "all" nodes are informed about state changes in
4511 // the take over protocol.
4512 //--------------------------------------------------------------------------
4513 if (c_START_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
4514 jam();
4515 StartToConf * const conf = (StartToConf *)&signal->theData[0];
4516 conf->userPtr = c_startToLock;
4517 conf->sendingNodeId = failedNodePtr.i;
4518 conf->startingNodeId = getStartNode(c_startToLock);
4519 sendSignal(reference(), GSN_START_TOCONF, signal,
4520 StartToConf::SignalLength, JBB);
4521 }//if
4522 if (c_CREATE_FRAGREQ_Counter.isWaitingFor(failedNodePtr.i)){
4523 jam();
4524 CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
4525 TakeOverRecordPtr takeOverPtr;
4526 takeOverPtr.i = c_createFragmentLock;
4527 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4528 conf->userPtr = takeOverPtr.i;
4529 conf->tableId = takeOverPtr.p->toCurrentTabref;
4530 conf->fragId = takeOverPtr.p->toCurrentFragid;
4531 conf->sendingNodeId = failedNodePtr.i;
4532 conf->startingNodeId = takeOverPtr.p->toStartingNode;
4533 sendSignal(reference(), GSN_CREATE_FRAGCONF, signal,
4534 CreateFragConf::SignalLength, JBB);
4535 }//if
4536 if (c_UPDATE_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
4537 jam();
4538 UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
4539 conf->userPtr = c_updateToLock;
4540 conf->sendingNodeId = failedNodePtr.i;
4541 conf->startingNodeId = getStartNode(c_updateToLock);
4542 sendSignal(reference(), GSN_UPDATE_TOCONF, signal,
4543 UpdateToConf::SignalLength, JBB);
4544 }//if
4546 if (c_END_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
4547 jam();
4548 EndToConf * const conf = (EndToConf *)&signal->theData[0];
4549 conf->userPtr = c_endToLock;
4550 conf->sendingNodeId = failedNodePtr.i;
4551 conf->startingNodeId = getStartNode(c_endToLock);
4552 sendSignal(reference(), GSN_END_TOCONF, signal,
4553 EndToConf::SignalLength, JBB);
4554 }//if
4555 }//Dbdih::checkTakeOverInMasterAllNodeFailure()
4557 void Dbdih::checkTakeOverInMasterCopyNodeFailure(Signal* signal,
4558 Uint32 failedNodeId)
4560 //---------------------------------------------------------------------------
4561 // This code is used to handle failure of the copying node during a take over
4562 //---------------------------------------------------------------------------
4563 TakeOverRecordPtr takeOverPtr;
4564 for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
4565 jam();
4566 takeOverPtr.i = i;
4567 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4568 if ((takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG) &&
4569 (takeOverPtr.p->toCopyNode == failedNodeId)) {
4570 jam();
4572 * The copying node failed but the system is still operational.
4573 * We restart the copy process by selecting a new copy node.
4574 * We do not need to add a fragment however since it is already added.
4575 * We start again from the prepare create fragment phase.
4577 prepareSendCreateFragReq(signal, takeOverPtr.i);
4578 }//if
4579 }//for
4580 }//Dbdih::checkTakeOverInMasterCopyNodeFailure()
4582 void Dbdih::checkTakeOverInMasterStartNodeFailure(Signal* signal,
4583 Uint32 takeOverPtrI)
4585 jam();
4586 ndbout_c("checkTakeOverInMasterStartNodeFailure %x",
4587 takeOverPtrI);
4588 if (takeOverPtrI == RNIL) {
4589 jam();
4590 return;
4592 //-----------------------------------------------------------------------
4593 // We are the master and the starting node has failed during a take over.
4594 // We need to handle this failure in different ways depending on the state.
4595 //-----------------------------------------------------------------------
4597 TakeOverRecordPtr takeOverPtr;
4598 takeOverPtr.i = takeOverPtrI;
4599 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4601 ndbout_c("takeOverPtr.p->toMasterStatus: %x",
4602 takeOverPtr.p->toMasterStatus);
4604 bool ok = false;
4605 switch (takeOverPtr.p->toMasterStatus) {
4606 case TakeOverRecord::IDLE:
4607 //-----------------------------------------------------------------------
4608 // The state cannot be idle when it has a starting node.
4609 //-----------------------------------------------------------------------
4610 ndbrequire(false);
4611 break;
4612 case TakeOverRecord::TO_WAIT_START_TAKE_OVER:
4613 jam();
4614 case TakeOverRecord::TO_START_COPY:
4615 jam();
4616 case TakeOverRecord::TO_START_COPY_ONGOING:
4617 jam();
4618 case TakeOverRecord::TO_WAIT_START:
4619 jam();
4620 case TakeOverRecord::TO_WAIT_PREPARE_CREATE:
4621 jam();
4622 case TakeOverRecord::TO_WAIT_UPDATE_TO:
4623 jam();
4624 case TakeOverRecord::TO_WAIT_COMMIT_CREATE:
4625 jam();
4626 case TakeOverRecord::TO_END_COPY:
4627 jam();
4628 case TakeOverRecord::TO_END_COPY_ONGOING:
4629 jam();
4630 case TakeOverRecord::TO_WAIT_ENDING:
4631 jam();
4632 //-----------------------------------------------------------------------
4633 // We will not do anything since an internal signal process is outstanding.
4634 // When the signal arrives the take over will be released.
4635 //-----------------------------------------------------------------------
4636 ok = true;
4637 break;
4638 case TakeOverRecord::STARTING:
4639 jam();
4640 ok = true;
4641 c_startToLock = RNIL;
4642 c_START_TOREQ_Counter.clearWaitingFor();
4643 endTakeOver(takeOverPtr.i);
4644 break;
4645 case TakeOverRecord::TO_UPDATE_TO:
4646 jam();
4647 ok = true;
4648 c_updateToLock = RNIL;
4649 c_UPDATE_TOREQ_Counter.clearWaitingFor();
4650 endTakeOver(takeOverPtr.i);
4651 break;
4652 case TakeOverRecord::ENDING:
4653 jam();
4654 ok = true;
4655 c_endToLock = RNIL;
4656 c_END_TOREQ_Counter.clearWaitingFor();
4657 endTakeOver(takeOverPtr.i);
4658 break;
4659 case TakeOverRecord::COMMIT_CREATE:
4660 ok = true;
4661 jam();
4662 {// We have mutex
4663 Mutex m(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
4664 m.unlock(); // Ignore result
4666 // Fall through
4667 case TakeOverRecord::PREPARE_CREATE:
4668 ok = true;
4669 jam();
4670 c_createFragmentLock = RNIL;
4671 c_CREATE_FRAGREQ_Counter.clearWaitingFor();
4672 endTakeOver(takeOverPtr.i);
4673 break;
4674 case TakeOverRecord::LOCK_MUTEX:
4675 ok = true;
4676 jam();
4677 // Lock mutex will return and do endTakeOver
4678 break;
4680 //-----------------------------------------------------------------------
4681 // Signals are outstanding to external nodes. These signals carry the node
4682 // id of the starting node and will not use the take over record if the
4683 // starting node has failed.
4684 //-----------------------------------------------------------------------
4685 case TakeOverRecord::COPY_FRAG:
4686 ok = true;
4687 jam();
4688 //-----------------------------------------------------------------------
4689 // The copying node will discover the problem. We will receive either
4690 // COPY_FRAGREQ or COPY_FRAGCONF and then we can release the take over
4691 // record and end the process. If the copying node should also die then
4692 // we will try to send prepare create fragment and will then discover
4693 // that the starting node has failed.
4694 //-----------------------------------------------------------------------
4695 break;
4696 case TakeOverRecord::PREPARE_COPY:
4697 ok = true;
4698 jam();
4700 * We're waiting for the starting node...which just died...
4701 * endTakeOver
4703 endTakeOver(takeOverPtr.i);
4704 break;
4705 case TakeOverRecord::COPY_ACTIVE:
4706 ok = true;
4707 jam();
4708 //-----------------------------------------------------------------------
4709 // In this we are waiting for a signal from the starting node. Thus we
4710 // can release the take over record and end the process.
4711 //-----------------------------------------------------------------------
4712 endTakeOver(takeOverPtr.i);
4713 break;
4714 case TakeOverRecord::WAIT_LCP:
4715 ok = true;
4716 jam();
4717 //-----------------------------------------------------------------------
4718 //-----------------------------------------------------------------------
4719 endTakeOver(takeOverPtr.i);
4720 break;
4722 case TakeOverRecord::STARTING_LOCAL_FRAGMENTS:
4723 ok = true;
4724 jam();
4725 endTakeOver(takeOverPtr.i);
4726 break;
4729 * The following are states that it should not be possible to "be" in
4731 case TakeOverRecord::SELECTING_NEXT:
4732 jam();
4733 case TakeOverRecord::TO_COPY_COMPLETED:
4734 jam();
4735 ndbrequire(false);
4737 if(!ok){
4738 jamLine(takeOverPtr.p->toSlaveStatus);
4739 ndbrequire(ok);
4741 }//Dbdih::checkTakeOverInMasterStartNodeFailure()
4743 void Dbdih::checkTakeOverInNonMasterStartNodeFailure(Signal* signal,
4744 Uint32 takeOverPtrI)
4746 jam();
4747 if (takeOverPtrI == RNIL) {
4748 jam();
4749 return;
4751 //-----------------------------------------------------------------------
4752 // We are not master and not taking over as master. A take over was ongoing
4753 // but the starting node has now failed. Handle it according to the state
4754 // of the take over.
4755 //-----------------------------------------------------------------------
4756 TakeOverRecordPtr takeOverPtr;
4757 takeOverPtr.i = takeOverPtrI;
4758 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4759 bool ok = false;
4760 switch (takeOverPtr.p->toSlaveStatus) {
4761 case TakeOverRecord::TO_SLAVE_IDLE:
4762 ndbrequire(false);
4763 break;
4764 case TakeOverRecord::TO_SLAVE_STARTED:
4765 jam();
4766 case TakeOverRecord::TO_SLAVE_CREATE_PREPARE:
4767 jam();
4768 case TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED:
4769 jam();
4770 case TakeOverRecord::TO_SLAVE_CREATE_COMMIT:
4771 jam();
4772 case TakeOverRecord::TO_SLAVE_COPY_COMPLETED:
4773 jam();
4774 ok = true;
4775 endTakeOver(takeOverPtr.i);
4776 break;
4777 }//switch
4778 if(!ok){
4779 jamLine(takeOverPtr.p->toSlaveStatus);
4780 ndbrequire(ok);
4782 }//Dbdih::checkTakeOverInNonMasterStartNodeFailure()
4784 void Dbdih::failedNodeSynchHandling(Signal* signal,
4785 NodeRecordPtr failedNodePtr)
4787 jam();
4788 /*----------------------------------------------------*/
4789 /* INITIALISE THE VARIABLES THAT KEEP TRACK OF */
4790 /* WHEN A NODE FAILURE IS COMPLETED. */
4791 /*----------------------------------------------------*/
4792 failedNodePtr.p->dbdictFailCompleted = ZFALSE;
4793 failedNodePtr.p->dbtcFailCompleted = ZFALSE;
4794 failedNodePtr.p->dbdihFailCompleted = ZFALSE;
4795 failedNodePtr.p->dblqhFailCompleted = ZFALSE;
4797 failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor();
4799 NodeRecordPtr nodePtr;
4800 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4801 ptrAss(nodePtr, nodeRecord);
4802 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
4803 jam();
4805 * We'r waiting for nodePtr.i to complete
4806 * handling of failedNodePtr.i's death
4809 failedNodePtr.p->m_NF_COMPLETE_REP.setWaitingFor(nodePtr.i);
4810 } else {
4811 jam();
4812 if ((nodePtr.p->nodeStatus == NodeRecord::DYING) &&
4813 (nodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(failedNodePtr.i))){
4814 jam();
4815 /*----------------------------------------------------*/
4816 /* THE NODE FAILED BEFORE REPORTING THE FAILURE */
4817 /* HANDLING COMPLETED ON THIS FAILED NODE. */
4818 /* REPORT THAT NODE FAILURE HANDLING WAS */
4819 /* COMPLETED ON THE NEW FAILED NODE FOR THIS */
4820 /* PARTICULAR OLD FAILED NODE. */
4821 /*----------------------------------------------------*/
4822 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
4823 nf->blockNo = 0;
4824 nf->nodeId = failedNodePtr.i;
4825 nf->failedNodeId = nodePtr.i;
4826 nf->from = __LINE__;
4827 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
4828 NFCompleteRep::SignalLength, JBB);
4829 }//if
4830 }//if
4831 }//for
4832 if (failedNodePtr.p->nodeStatus == NodeRecord::DIED_NOW) {
4833 jam();
4834 failedNodePtr.p->nodeStatus = NodeRecord::DYING;
4835 } else {
4836 jam();
4837 /*----------------------------------------------------*/
4838 // No more processing needed when node not even started
4839 // yet. We give the node status to DEAD since we do not
4840 // care whether all nodes complete the node failure
4841 // handling. The node have not been included in the
4842 // node failure protocols.
4843 /*----------------------------------------------------*/
4844 failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
4845 /**-----------------------------------------------------------------------
4846 * WE HAVE COMPLETED HANDLING THE NODE FAILURE IN DIH. WE CAN REPORT THIS
4847 * TO DIH THAT WAIT FOR THE OTHER BLOCKS TO BE CONCLUDED AS WELL.
4848 *-----------------------------------------------------------------------*/
4849 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
4850 nf->blockNo = DBDIH;
4851 nf->nodeId = cownNodeId;
4852 nf->failedNodeId = failedNodePtr.i;
4853 nf->from = __LINE__;
4854 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
4855 NFCompleteRep::SignalLength, JBB);
4856 }//if
4857 }//Dbdih::failedNodeSynchHandling()
4859 Uint32 Dbdih::findTakeOver(Uint32 failedNodeId)
4861 for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
4862 jam();
4863 TakeOverRecordPtr takeOverPtr;
4864 takeOverPtr.i = i;
4865 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4866 if (takeOverPtr.p->toStartingNode == failedNodeId) {
4867 jam();
4868 return i;
4869 }//if
4870 }//for
4871 return RNIL;
4872 }//Dbdih::findTakeOver()
4874 Uint32 Dbdih::getStartNode(Uint32 takeOverPtrI)
4876 TakeOverRecordPtr takeOverPtr;
4877 takeOverPtr.i = takeOverPtrI;
4878 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
4879 return takeOverPtr.p->toStartingNode;
4880 }//Dbdih::getStartNode()
4882 void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
4884 jam();
4885 const Uint32 nodeId = failedNodePtr.i;
4887 if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
4889 /*----------------------------------------------------*/
4890 /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
4891 /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
4892 /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
4893 /*----------------------------------------------------*/
4896 * Bug#28717, Only master should do this, as this status is copied
4897 * to other nodes
4899 switch (failedNodePtr.p->activeStatus) {
4900 case Sysfile::NS_Active:
4901 jam();
4902 failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
4903 break;
4904 case Sysfile::NS_ActiveMissed_1:
4905 jam();
4906 failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
4907 break;
4908 case Sysfile::NS_ActiveMissed_2:
4909 jam();
4910 failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
4911 break;
4912 case Sysfile::NS_TakeOver:
4913 jam();
4914 failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
4915 break;
4916 default:
4917 g_eventLogger.error("activeStatus = %u "
4918 "at failure after NODE_FAILREP of node = %u",
4919 (Uint32) failedNodePtr.p->activeStatus,
4920 failedNodePtr.i);
4921 ndbrequire(false);
4922 break;
4923 }//switch
4924 }//if
4926 c_lcpState.m_participatingDIH.clear(failedNodePtr.i);
4927 c_lcpState.m_participatingLQH.clear(failedNodePtr.i);
4929 if(c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.isWaitingFor(failedNodePtr.i)){
4930 jam();
4931 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
4932 rep->nodeId = failedNodePtr.i;
4933 rep->lcpId = SYSFILE->latestLCP_ID;
4934 rep->blockNo = DBDIH;
4935 sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
4936 LcpCompleteRep::SignalLength, JBB);
4940 * Check if we'r waiting for the failed node's LQH to complete
4942 * Note that this is ran "before" LCP master take over
4944 if(c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId)){
4945 jam();
4947 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
4948 rep->nodeId = nodeId;
4949 rep->lcpId = SYSFILE->latestLCP_ID;
4950 rep->blockNo = DBLQH;
4951 sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
4952 LcpCompleteRep::SignalLength, JBB);
4954 if(c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId)){
4955 jam();
4957 * Make sure we're ready to accept it
4959 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodeId);
4963 if (c_TCGETOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
4964 jam();
4965 signal->theData[0] = failedNodePtr.i;
4966 signal->theData[1] = 0;
4967 sendSignal(reference(), GSN_TCGETOPSIZECONF, signal, 2, JBB);
4968 }//if
4970 if (c_TC_CLOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
4971 jam();
4972 signal->theData[0] = failedNodePtr.i;
4973 sendSignal(reference(), GSN_TC_CLOPSIZECONF, signal, 1, JBB);
4974 }//if
4976 if (c_START_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
4977 jam();
4978 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
4979 conf->senderRef = numberToRef(DBLQH, failedNodePtr.i);
4980 conf->lcpId = SYSFILE->latestLCP_ID;
4981 sendSignal(reference(), GSN_START_LCP_CONF, signal,
4982 StartLcpConf::SignalLength, JBB);
4983 }//if
4985 if (c_EMPTY_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
4986 jam();
4987 EmptyLcpConf * const rep = (EmptyLcpConf *)&signal->theData[0];
4988 rep->senderNodeId = failedNodePtr.i;
4989 rep->tableId = ~0;
4990 rep->fragmentId = ~0;
4991 rep->lcpNo = 0;
4992 rep->lcpId = SYSFILE->latestLCP_ID;
4993 rep->idle = true;
4994 sendSignal(reference(), GSN_EMPTY_LCP_CONF, signal,
4995 EmptyLcpConf::SignalLength, JBB);
4996 }//if
4998 if (c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i)) {
4999 jam();
5000 MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
5001 ref->senderNodeId = failedNodePtr.i;
5002 ref->failedNodeId = cmasterTakeOverNode;
5003 sendSignal(reference(), GSN_MASTER_LCPREF, signal,
5004 MasterLCPRef::SignalLength, JBB);
5005 }//if
5007 }//Dbdih::failedNodeLcpHandling()
5009 void Dbdih::checkGcpOutstanding(Signal* signal, Uint32 failedNodeId){
5010 if (c_GCP_PREPARE_Counter.isWaitingFor(failedNodeId)){
5011 jam();
5012 signal->theData[0] = failedNodeId;
5013 signal->theData[1] = cnewgcp;
5014 sendSignal(reference(), GSN_GCP_PREPARECONF, signal, 2, JBB);
5015 }//if
5017 if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId)) {
5018 jam();
5019 signal->theData[0] = failedNodeId;
5020 signal->theData[1] = coldgcp;
5021 signal->theData[2] = cfailurenr;
5022 sendSignal(reference(), GSN_GCP_NODEFINISH, signal, 3, JBB);
5023 }//if
5025 if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
5026 jam();
5027 GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
5028 saveRef->dihPtr = failedNodeId;
5029 saveRef->nodeId = failedNodeId;
5030 saveRef->gci = coldgcp;
5031 saveRef->errorCode = GCPSaveRef::FakedSignalDueToNodeFailure;
5032 sendSignal(reference(), GSN_GCP_SAVEREF, signal,
5033 GCPSaveRef::SignalLength, JBB);
5034 }//if
5036 if (c_COPY_GCIREQ_Counter.isWaitingFor(failedNodeId)) {
5037 jam();
5038 signal->theData[0] = failedNodeId;
5039 sendSignal(reference(), GSN_COPY_GCICONF, signal, 1, JBB);
5040 }//if
5042 if (c_MASTER_GCPREQ_Counter.isWaitingFor(failedNodeId)){
5043 jam();
5044 MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
5045 ref->senderNodeId = failedNodeId;
5046 ref->failedNodeId = cmasterTakeOverNode;
5047 sendSignal(reference(), GSN_MASTER_GCPREF, signal,
5048 MasterGCPRef::SignalLength, JBB);
5049 }//if
5050 }//Dbdih::handleGcpStateInMaster()
5053 void
5054 Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
5055 jam();
5057 Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
5059 c_lcpMasterTakeOverState.minTableId = ~0;
5060 c_lcpMasterTakeOverState.minFragId = ~0;
5061 c_lcpMasterTakeOverState.failedNodeId = nodeId;
5063 c_lcpMasterTakeOverState.set(LMTOS_WAIT_EMPTY_LCP, __LINE__);
5065 if(c_EMPTY_LCP_REQ_Counter.done()){
5066 jam();
5067 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor();
5069 EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtrSend();
5070 req->senderRef = reference();
5071 sendLoopMacro(EMPTY_LCP_REQ, sendEMPTY_LCP_REQ);
5072 ndbrequire(!c_EMPTY_LCP_REQ_Counter.done());
5073 } else {
5075 * Node failure during master take over...
5077 g_eventLogger.info("Nodefail during master take over (old: %d)", oldNode);
5080 NodeRecordPtr nodePtr;
5081 nodePtr.i = oldNode;
5082 if (oldNode > 0 && oldNode < MAX_NDB_NODES)
5084 jam();
5085 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5086 if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
5088 jam();
5089 checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
5093 setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
5096 void Dbdih::startGcpMasterTakeOver(Signal* signal, Uint32 oldMasterId){
5097 jam();
5098 /*--------------------------------------------------*/
5099 /* */
5100 /* THE MASTER HAVE FAILED AND WE WERE ELECTED */
5101 /* TO BE THE NEW MASTER NODE. WE NEED TO QUERY*/
5102 /* ALL THE OTHER NODES ABOUT THEIR STATUS IN */
5103 /* ORDER TO BE ABLE TO TAKE OVER CONTROL OF */
5104 /* THE GLOBAL CHECKPOINT PROTOCOL AND THE */
5105 /* LOCAL CHECKPOINT PROTOCOL. */
5106 /*--------------------------------------------------*/
5107 if(!isMaster()){
5108 jam();
5109 return;
5111 cmasterState = MASTER_TAKE_OVER_GCP;
5112 cmasterTakeOverNode = oldMasterId;
5113 MasterGCPReq * const req = (MasterGCPReq *)&signal->theData[0];
5114 req->masterRef = reference();
5115 req->failedNodeId = oldMasterId;
5116 sendLoopMacro(MASTER_GCPREQ, sendMASTER_GCPREQ);
5117 cgcpMasterTakeOverState = GMTOS_INITIAL;
5119 signal->theData[0] = NDB_LE_GCP_TakeoverStarted;
5120 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
5122 setLocalNodefailHandling(signal, oldMasterId, NF_GCP_TAKE_OVER);
5123 }//Dbdih::handleNewMaster()
5125 void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI)
5127 jam();
5128 if (takeOverPtrI != RNIL) {
5129 jam();
5130 TakeOverRecordPtr takeOverPtr;
5131 takeOverPtr.i = takeOverPtrI;
5132 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
5133 bool ok = false;
5134 switch (takeOverPtr.p->toSlaveStatus) {
5135 case TakeOverRecord::TO_SLAVE_IDLE:
5136 ndbrequire(false);
5137 break;
5138 case TakeOverRecord::TO_SLAVE_STARTED:
5139 jam();
5140 case TakeOverRecord::TO_SLAVE_CREATE_PREPARE:
5141 jam();
5142 case TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED:
5143 jam();
5144 case TakeOverRecord::TO_SLAVE_CREATE_COMMIT:
5145 jam();
5146 ok = true;
5147 infoEvent("Unhandled MasterTO of TO slaveStatus=%d killing node %d",
5148 takeOverPtr.p->toSlaveStatus,
5149 takeOverPtr.p->toStartingNode);
5150 takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_ACTIVE;
5153 BlockReference cntrRef = calcNdbCntrBlockRef(takeOverPtr.p->toStartingNode);
5154 SystemError * const sysErr = (SystemError*)&signal->theData[0];
5155 sysErr->errorCode = SystemError::CopyFragRefError;
5156 sysErr->errorRef = reference();
5157 sysErr->data1= 0;
5158 sysErr->data2= __LINE__;
5159 sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
5160 SystemError::SignalLength, JBB);
5162 break;
5163 case TakeOverRecord::TO_SLAVE_COPY_COMPLETED:
5164 ok = true;
5165 jam();
5166 takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
5167 break;
5169 ndbrequire(ok);
5170 endTakeOver(takeOverPtr.i);
5171 }//if
5172 }//Dbdih::handleTakeOverNewMaster()
5174 void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr)
5176 Uint32 nodeId = failedNodePtr.i;
5177 if(failedNodePtr.p->nodeStatus != NodeRecord::DIED_NOW){
5178 jam();
5180 * Is node isn't alive. It can't be part of LCP
5182 ndbrequire(!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId));
5185 * And there is no point in removing any replicas
5186 * It's dead...
5188 return;
5192 * If node has node complete LCP
5193 * we need to remove it as undo might not be complete
5194 * bug#31257
5196 failedNodePtr.p->m_remove_node_from_table_lcp_id = RNIL;
5197 if (c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(failedNodePtr.i))
5199 jam();
5200 failedNodePtr.p->m_remove_node_from_table_lcp_id = SYSFILE->latestLCP_ID;
5203 jam();
5205 if (!ERROR_INSERTED(7194))
5207 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5208 signal->theData[1] = failedNodePtr.i;
5209 signal->theData[2] = 0; // Tab id
5210 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5212 else
5214 ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE");
5217 setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE);
5218 }//Dbdih::startRemoveFailedNode()
5220 /*--------------------------------------------------*/
5221 /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
5222 /* QUERYING THIS NODE ABOUT THE STATE OF THE */
5223 /* GLOBAL CHECKPOINT PROTOCOL */
5224 /*--------------------------------------------------*/
5225 void Dbdih::execMASTER_GCPREQ(Signal* signal)
5227 NodeRecordPtr failedNodePtr;
5228 MasterGCPReq * const masterGCPReq = (MasterGCPReq *)&signal->theData[0];
5229 jamEntry();
5230 const BlockReference newMasterBlockref = masterGCPReq->masterRef;
5231 const Uint32 failedNodeId = masterGCPReq->failedNodeId;
5232 if (c_copyGCISlave.m_copyReason != CopyGCIReq::IDLE) {
5233 jam();
5234 /*--------------------------------------------------*/
5235 /* WE ARE CURRENTLY WRITING THE RESTART INFO */
5236 /* IN THIS NODE. SINCE ONLY ONE PROCESS IS */
5237 /* ALLOWED TO DO THIS AT A TIME WE MUST ENSURE*/
5238 /* THAT THIS IS NOT ONGOING WHEN THE NEW */
5239 /* MASTER TAKES OVER CONTROL. IF NOT ALL NODES*/
5240 /* RECEIVE THE SAME RESTART INFO DUE TO THE */
5241 /* FAILURE OF THE MASTER IT IS TAKEN CARE OF */
5242 /* BY THE NEW MASTER. */
5243 /*--------------------------------------------------*/
5244 sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
5245 signal, 10, MasterGCPReq::SignalLength);
5246 return;
5247 }//if
5248 failedNodePtr.i = failedNodeId;
5249 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5250 if (failedNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5251 jam();
5252 /*--------------------------------------------------*/
5253 /* ENSURE THAT WE HAVE PROCESSED THE SIGNAL */
5254 /* NODE_FAILURE BEFORE WE PROCESS THIS REQUEST*/
5255 /* FROM THE NEW MASTER. THIS ENSURES THAT WE */
5256 /* HAVE REMOVED THE FAILED NODE FROM THE LIST */
5257 /* OF ACTIVE NODES AND SO FORTH. */
5258 /*--------------------------------------------------*/
5259 sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
5260 signal, 10, MasterGCPReq::SignalLength);
5261 return;
5262 } else {
5263 ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
5264 }//if
5266 if (ERROR_INSERTED(7181))
5268 ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5269 CLEAR_ERROR_INSERT_VALUE;
5270 signal->theData[0] = c_error_7181_ref;
5271 signal->theData[1] = coldgcp;
5272 execGCP_TCFINISHED(signal);
5275 MasterGCPConf::State gcpState;
5276 switch (cgcpParticipantState) {
5277 case GCP_PARTICIPANT_READY:
5278 jam();
5279 /*--------------------------------------------------*/
5280 /* THE GLOBAL CHECKPOINT IS NOT ACTIVE SINCE */
5281 /* THE PREVIOUS GLOBAL CHECKPOINT IS COMPLETED*/
5282 /* AND THE NEW HAVE NOT STARTED YET. */
5283 /*--------------------------------------------------*/
5284 gcpState = MasterGCPConf::GCP_READY;
5285 break;
5286 case GCP_PARTICIPANT_PREPARE_RECEIVED:
5287 jam();
5288 /*--------------------------------------------------*/
5289 /* GCP_PREPARE HAVE BEEN RECEIVED AND RESPONSE*/
5290 /* HAVE BEEN SENT. */
5291 /*--------------------------------------------------*/
5292 gcpState = MasterGCPConf::GCP_PREPARE_RECEIVED;
5293 break;
5294 case GCP_PARTICIPANT_COMMIT_RECEIVED:
5295 jam();
5296 /*------------------------------------------------*/
5297 /* GCP_COMMIT HAVE BEEN RECEIVED BUT NOT YET*/
5298 /* GCP_TCFINISHED FROM LOCAL TC. */
5299 /*------------------------------------------------*/
5300 gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
5301 break;
5302 case GCP_PARTICIPANT_TC_FINISHED:
5303 jam();
5304 /*------------------------------------------------*/
5305 /* GCP_COMMIT HAS BEEN RECEIVED AND ALSO */
5306 /* GCP_TCFINISHED HAVE BEEN RECEIVED. */
5307 /*------------------------------------------------*/
5308 gcpState = MasterGCPConf::GCP_TC_FINISHED;
5309 break;
5310 case GCP_PARTICIPANT_COPY_GCI_RECEIVED:
5311 /*--------------------------------------------------*/
5312 /* COPY RESTART INFORMATION HAS BEEN RECEIVED */
5313 /* BUT NOT YET COMPLETED. */
5314 /*--------------------------------------------------*/
5315 ndbrequire(false);
5316 gcpState= MasterGCPConf::GCP_READY; // remove warning
5317 break;
5318 default:
5319 /*------------------------------------------------*/
5320 /* */
5321 /* THIS SHOULD NOT OCCUR SINCE THE ABOVE */
5322 /* STATES ARE THE ONLY POSSIBLE STATES AT A */
5323 /* NODE WHICH WAS NOT A MASTER NODE. */
5324 /*------------------------------------------------*/
5325 ndbrequire(false);
5326 gcpState= MasterGCPConf::GCP_READY; // remove warning
5327 break;
5328 }//switch
5329 MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5330 masterGCPConf->gcpState = gcpState;
5331 masterGCPConf->senderNodeId = cownNodeId;
5332 masterGCPConf->failedNodeId = failedNodeId;
5333 masterGCPConf->newGCP = cnewgcp;
5334 masterGCPConf->latestLCP = SYSFILE->latestLCP_ID;
5335 masterGCPConf->oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
5336 masterGCPConf->keepGCI = SYSFILE->keepGCI;
5337 for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5338 masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
5339 sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
5340 MasterGCPConf::SignalLength, JBB);
5342 if (ERROR_INSERTED(7182))
5344 ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5345 CLEAR_ERROR_INSERT_VALUE;
5346 signal->theData[0] = c_error_7181_ref;
5347 signal->theData[1] = coldgcp;
5348 execGCP_TCFINISHED(signal);
5350 }//Dbdih::execMASTER_GCPREQ()
5352 void Dbdih::execMASTER_GCPCONF(Signal* signal)
5354 NodeRecordPtr senderNodePtr;
5355 MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5356 jamEntry();
5357 senderNodePtr.i = masterGCPConf->senderNodeId;
5358 ptrCheckGuard(senderNodePtr, MAX_NDB_NODES, nodeRecord);
5360 MasterGCPConf::State gcpState = (MasterGCPConf::State)masterGCPConf->gcpState;
5361 const Uint32 failedNodeId = masterGCPConf->failedNodeId;
5362 const Uint32 newGcp = masterGCPConf->newGCP;
5363 const Uint32 latestLcpId = masterGCPConf->latestLCP;
5364 const Uint32 oldestRestorableGci = masterGCPConf->oldestRestorableGCI;
5365 const Uint32 oldestKeepGci = masterGCPConf->keepGCI;
5366 if (latestLcpId > SYSFILE->latestLCP_ID) {
5367 jam();
5368 #if 0
5369 g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d",
5370 latestLcpId);
5371 SYSFILE->latestLCP_ID = latestLcpId;
5372 #endif
5373 SYSFILE->keepGCI = oldestKeepGci;
5374 SYSFILE->oldestRestorableGCI = oldestRestorableGci;
5375 for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5376 SYSFILE->lcpActive[i] = masterGCPConf->lcpActive[i];
5377 }//if
5378 switch (gcpState) {
5379 case MasterGCPConf::GCP_READY:
5380 jam();
5381 senderNodePtr.p->gcpstate = NodeRecord::READY;
5382 break;
5383 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5384 jam();
5385 senderNodePtr.p->gcpstate = NodeRecord::PREPARE_RECEIVED;
5386 cnewgcp = newGcp;
5387 break;
5388 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5389 jam();
5390 senderNodePtr.p->gcpstate = NodeRecord::COMMIT_SENT;
5391 break;
5392 case MasterGCPConf::GCP_TC_FINISHED:
5393 jam();
5394 senderNodePtr.p->gcpstate = NodeRecord::NODE_FINISHED;
5395 break;
5396 default:
5397 ndbrequire(false);
5398 break;
5399 }//switch
5400 switch (cgcpMasterTakeOverState) {
5401 case GMTOS_INITIAL:
5402 switch (gcpState) {
5403 case MasterGCPConf::GCP_READY:
5404 jam();
5405 cgcpMasterTakeOverState = ALL_READY;
5406 break;
5407 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5408 jam();
5409 cgcpMasterTakeOverState = ALL_PREPARED;
5410 break;
5411 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5412 jam();
5413 cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
5414 break;
5415 case MasterGCPConf::GCP_TC_FINISHED:
5416 jam();
5417 cgcpMasterTakeOverState = COMMIT_COMPLETED;
5418 break;
5419 default:
5420 ndbrequire(false);
5421 break;
5422 }//switch
5423 break;
5424 case ALL_READY:
5425 switch (gcpState) {
5426 case MasterGCPConf::GCP_READY:
5427 jam();
5428 /*empty*/;
5429 break;
5430 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5431 jam();
5432 cgcpMasterTakeOverState = PREPARE_STARTED_NOT_COMMITTED;
5433 break;
5434 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5435 ndbrequire(false);
5436 break;
5437 case MasterGCPConf::GCP_TC_FINISHED:
5438 jam();
5439 cgcpMasterTakeOverState = SAVE_STARTED_NOT_COMPLETED;
5440 break;
5441 default:
5442 ndbrequire(false);
5443 break;
5444 }//switch
5445 break;
5446 case PREPARE_STARTED_NOT_COMMITTED:
5447 switch (gcpState) {
5448 case MasterGCPConf::GCP_READY:
5449 jam();
5450 break;
5451 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5452 jam();
5453 break;
5454 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5455 ndbrequire(false);
5456 break;
5457 case MasterGCPConf::GCP_TC_FINISHED:
5458 ndbrequire(false);
5459 break;
5460 default:
5461 ndbrequire(false);
5462 break;
5463 }//switch
5464 break;
5465 case ALL_PREPARED:
5466 switch (gcpState) {
5467 case MasterGCPConf::GCP_READY:
5468 jam();
5469 cgcpMasterTakeOverState = PREPARE_STARTED_NOT_COMMITTED;
5470 break;
5471 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5472 jam();
5473 break;
5474 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5475 jam();
5476 cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
5477 break;
5478 case MasterGCPConf::GCP_TC_FINISHED:
5479 jam();
5480 cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
5481 break;
5482 default:
5483 ndbrequire(false);
5484 break;
5485 }//switch
5486 break;
5487 case COMMIT_STARTED_NOT_COMPLETED:
5488 switch (gcpState) {
5489 case MasterGCPConf::GCP_READY:
5490 ndbrequire(false);
5491 break;
5492 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5493 jam();
5494 break;
5495 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5496 jam();
5497 break;
5498 case MasterGCPConf::GCP_TC_FINISHED:
5499 jam();
5500 break;
5501 default:
5502 ndbrequire(false);
5503 break;
5504 }//switch
5505 break;
5506 case COMMIT_COMPLETED:
5507 switch (gcpState) {
5508 case MasterGCPConf::GCP_READY:
5509 cgcpMasterTakeOverState = SAVE_STARTED_NOT_COMPLETED;
5510 break;
5511 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5512 jam();
5513 cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
5514 break;
5515 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5516 jam();
5517 cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
5518 break;
5519 case MasterGCPConf::GCP_TC_FINISHED:
5520 jam();
5521 break;
5522 default:
5523 ndbrequire(false);
5524 break;
5525 }//switch
5526 break;
5527 case SAVE_STARTED_NOT_COMPLETED:
5528 switch (gcpState) {
5529 case MasterGCPConf::GCP_READY:
5530 jam();
5531 break;
5532 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5533 ndbrequire(false);
5534 break;
5535 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5536 ndbrequire(false);
5537 break;
5538 case MasterGCPConf::GCP_TC_FINISHED:
5539 jam();
5540 break;
5541 default:
5542 ndbrequire(false);
5543 break;
5544 }//switch
5545 break;
5546 default:
5547 ndbrequire(false);
5548 break;
5549 }//switch
5550 receiveLoopMacro(MASTER_GCPREQ, senderNodePtr.i);
5551 /*-------------------------------------------------------------------------*/
5552 // We have now received all responses and are ready to take over the GCP
5553 // protocol as master.
5554 /*-------------------------------------------------------------------------*/
5555 MASTER_GCPhandling(signal, failedNodeId);
5556 return;
5557 }//Dbdih::execMASTER_GCPCONF()
5559 void Dbdih::execMASTER_GCPREF(Signal* signal)
5561 const MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
5562 jamEntry();
5563 receiveLoopMacro(MASTER_GCPREQ, ref->senderNodeId);
5564 /*-------------------------------------------------------------------------*/
5565 // We have now received all responses and are ready to take over the GCP
5566 // protocol as master.
5567 /*-------------------------------------------------------------------------*/
5568 MASTER_GCPhandling(signal, ref->failedNodeId);
5569 }//Dbdih::execMASTER_GCPREF()
5571 void Dbdih::MASTER_GCPhandling(Signal* signal, Uint32 failedNodeId)
5573 NodeRecordPtr failedNodePtr;
5574 cmasterState = MASTER_ACTIVE;
5575 /*----------------------------------------------------------*/
5576 /* REMOVE ALL ACTIVE STATUS ON ALREADY FAILED NODES */
5577 /* THIS IS PERFORMED HERE SINCE WE GET THE LCP ACTIVE */
5578 /* STATUS AS PART OF THE COPY RESTART INFO AND THIS IS*/
5579 /* HANDLED BY THE MASTER GCP TAKE OVER PROTOCOL. */
5580 /*----------------------------------------------------------*/
5582 failedNodePtr.i = failedNodeId;
5583 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5584 switch (cgcpMasterTakeOverState) {
5585 case ALL_READY:
5586 jam();
5587 startGcp(signal);
5588 break;
5589 case PREPARE_STARTED_NOT_COMMITTED:
5591 NodeRecordPtr nodePtr;
5592 jam();
5593 c_GCP_PREPARE_Counter.clearWaitingFor();
5594 nodePtr.i = cfirstAliveNode;
5595 do {
5596 jam();
5597 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5598 if (nodePtr.p->gcpstate == NodeRecord::READY) {
5599 jam();
5600 c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
5601 sendGCP_PREPARE(signal, nodePtr.i);
5602 }//if
5603 nodePtr.i = nodePtr.p->nextNode;
5604 } while(nodePtr.i != RNIL);
5605 if (c_GCP_PREPARE_Counter.done()) {
5606 jam();
5607 gcpcommitreqLab(signal);
5608 }//if
5609 break;
5611 case ALL_PREPARED:
5612 jam();
5613 gcpcommitreqLab(signal);
5614 break;
5615 case COMMIT_STARTED_NOT_COMPLETED:
5617 NodeRecordPtr nodePtr;
5618 jam();
5619 c_GCP_COMMIT_Counter.clearWaitingFor();
5620 nodePtr.i = cfirstAliveNode;
5621 do {
5622 jam();
5623 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5624 if (nodePtr.p->gcpstate == NodeRecord::PREPARE_RECEIVED) {
5625 jam();
5626 sendGCP_COMMIT(signal, nodePtr.i);
5627 c_GCP_COMMIT_Counter.setWaitingFor(nodePtr.i);
5628 } else {
5629 ndbrequire((nodePtr.p->gcpstate == NodeRecord::NODE_FINISHED) ||
5630 (nodePtr.p->gcpstate == NodeRecord::COMMIT_SENT));
5631 }//if
5632 nodePtr.i = nodePtr.p->nextNode;
5633 } while(nodePtr.i != RNIL);
5634 if (c_GCP_COMMIT_Counter.done()){
5635 jam();
5636 gcpsavereqLab(signal);
5637 }//if
5638 break;
5640 case COMMIT_COMPLETED:
5641 jam();
5642 gcpsavereqLab(signal);
5643 break;
5644 case SAVE_STARTED_NOT_COMPLETED:
5646 NodeRecordPtr nodePtr;
5647 jam();
5648 SYSFILE->newestRestorableGCI = coldgcp;
5649 nodePtr.i = cfirstAliveNode;
5650 do {
5651 jam();
5652 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5653 SYSFILE->lastCompletedGCI[nodePtr.i] = coldgcp;
5654 nodePtr.i = nodePtr.p->nextNode;
5655 } while (nodePtr.i != RNIL);
5656 /**-------------------------------------------------------------------
5657 * THE FAILED NODE DID ALSO PARTICIPATE IN THIS GLOBAL CHECKPOINT
5658 * WHICH IS RECORDED.
5659 *-------------------------------------------------------------------*/
5660 SYSFILE->lastCompletedGCI[failedNodeId] = coldgcp;
5661 copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
5662 break;
5664 default:
5665 ndbrequire(false);
5666 break;
5667 }//switch
5669 signal->theData[0] = NDB_LE_GCP_TakeoverCompleted;
5670 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
5672 /*--------------------------------------------------*/
5673 /* WE SEPARATE HANDLING OF GLOBAL CHECKPOINTS */
5674 /* AND LOCAL CHECKPOINTS HERE. LCP'S HAVE TO */
5675 /* REMOVE ALL FAILED FRAGMENTS BEFORE WE CAN */
5676 /* HANDLE THE LCP PROTOCOL. */
5677 /*--------------------------------------------------*/
5678 checkLocalNodefailComplete(signal, failedNodeId, NF_GCP_TAKE_OVER);
5680 return;
5681 }//Dbdih::masterGcpConfFromFailedLab()
5683 void
5684 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, Uint32 tableId)
5686 jamEntry();
5687 TabRecordPtr tabPtr;
5688 tabPtr.i = tableId;
5689 const Uint32 RT_BREAK = 64;
5690 if (ERROR_INSERTED(7125)) {
5691 return;
5692 }//if
5693 for (Uint32 i = 0; i<RT_BREAK; i++) {
5694 jam();
5695 if (tabPtr.i >= ctabFileSize){
5696 jam();
5698 * Ready with entire loop
5699 * Return to master
5701 setAllowNodeStart(nodeId, true);
5702 if (getNodeStatus(nodeId) == NodeRecord::STARTING) {
5703 jam();
5704 StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
5705 conf->sendingNodeId = cownNodeId;
5706 conf->startingNodeId = nodeId;
5707 sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
5708 StartInfoConf::SignalLength, JBB);
5709 }//if
5710 return;
5711 }//if
5712 ptrAss(tabPtr, tabRecord);
5713 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
5714 jam();
5715 invalidateNodeLCP(signal, nodeId, tabPtr);
5716 return;
5717 }//if
5718 tabPtr.i++;
5719 }//for
5720 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
5721 signal->theData[1] = nodeId;
5722 signal->theData[2] = tabPtr.i;
5723 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5724 }//Dbdih::invalidateNodeLCP()
5726 void
5727 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, TabRecordPtr tabPtr)
5730 * Check so that no one else is using the tab descriptior
5732 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
5733 jam();
5734 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
5735 signal->theData[1] = nodeId;
5736 signal->theData[2] = tabPtr.i;
5737 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
5738 return;
5739 }//if
5742 * For each fragment
5744 bool modified = false;
5745 FragmentstorePtr fragPtr;
5746 for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
5747 jam();
5748 getFragstore(tabPtr.p, fragNo, fragPtr);
5750 * For each of replica record
5752 ReplicaRecordPtr replicaPtr;
5753 for(replicaPtr.i = fragPtr.p->oldStoredReplicas; replicaPtr.i != RNIL;
5754 replicaPtr.i = replicaPtr.p->nextReplica) {
5755 jam();
5756 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
5757 if(replicaPtr.p->procNode == nodeId){
5758 jam();
5760 * Found one with correct node id
5763 * Invalidate all LCP's
5765 modified = true;
5766 for(int i = 0; i < MAX_LCP_STORED; i++) {
5767 replicaPtr.p->lcpStatus[i] = ZINVALID;
5768 }//if
5770 * And reset nextLcp
5772 replicaPtr.p->nextLcp = 0;
5773 replicaPtr.p->noCrashedReplicas = 0;
5774 }//if
5775 }//for
5776 }//for
5778 if (modified) {
5779 jam();
5781 * Save table description to disk
5783 tabPtr.p->tabCopyStatus = TabRecord::CS_INVALIDATE_NODE_LCP;
5784 tabPtr.p->tabUpdateState = TabRecord::US_INVALIDATE_NODE_LCP;
5785 tabPtr.p->tabRemoveNode = nodeId;
5786 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
5787 signal->theData[1] = tabPtr.i;
5788 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
5789 return;
5792 jam();
5794 * Move to next table
5796 tabPtr.i++;
5797 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
5798 signal->theData[1] = nodeId;
5799 signal->theData[2] = tabPtr.i;
5800 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5801 return;
5802 }//Dbdih::invalidateNodeLCP()
5804 /*------------------------------------------------*/
5805 /* INPUT: TABPTR */
5806 /* TNODEID */
5807 /*------------------------------------------------*/
5808 void Dbdih::removeNodeFromTables(Signal* signal,
5809 Uint32 nodeId, Uint32 tableId)
5811 jamEntry();
5812 TabRecordPtr tabPtr;
5813 tabPtr.i = tableId;
5814 const Uint32 RT_BREAK = 64;
5815 for (Uint32 i = 0; i<RT_BREAK; i++) {
5816 jam();
5817 if (tabPtr.i >= ctabFileSize){
5818 jam();
5819 removeNodeFromTablesComplete(signal, nodeId);
5820 return;
5821 }//if
5823 ptrAss(tabPtr, tabRecord);
5824 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
5825 jam();
5826 removeNodeFromTable(signal, nodeId, tabPtr);
5827 return;
5828 }//if
5829 tabPtr.i++;
5830 }//for
5831 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5832 signal->theData[1] = nodeId;
5833 signal->theData[2] = tabPtr.i;
5834 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5837 void Dbdih::removeNodeFromTable(Signal* signal,
5838 Uint32 nodeId, TabRecordPtr tabPtr){
5841 * Check so that no one else is using the tab descriptior
5843 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
5844 jam();
5845 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5846 signal->theData[1] = nodeId;
5847 signal->theData[2] = tabPtr.i;
5848 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
5849 return;
5850 }//if
5852 NodeRecordPtr nodePtr;
5853 nodePtr.i = nodeId;
5854 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5855 const Uint32 lcpId = nodePtr.p->m_remove_node_from_table_lcp_id;
5858 * For each fragment
5860 Uint32 noOfRemovedReplicas = 0; // No of replicas removed
5861 Uint32 noOfRemovedLcpReplicas = 0; // No of replicas in LCP removed
5862 Uint32 noOfRemainingLcpReplicas = 0;// No of replicas in LCP remaining
5864 const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
5865 const bool unlogged = (tabPtr.p->tabStorage != TabRecord::ST_NORMAL);
5867 FragmentstorePtr fragPtr;
5868 for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
5869 jam();
5870 getFragstore(tabPtr.p, fragNo, fragPtr);
5873 * For each of replica record
5875 bool found = false;
5876 ReplicaRecordPtr replicaPtr;
5877 for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
5878 replicaPtr.i = replicaPtr.p->nextReplica) {
5879 jam();
5881 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
5882 if(replicaPtr.p->procNode == nodeId){
5883 jam();
5884 found = true;
5885 noOfRemovedReplicas++;
5886 removeNodeFromStored(nodeId, fragPtr, replicaPtr, unlogged);
5887 if(replicaPtr.p->lcpOngoingFlag){
5888 jam();
5890 * This replica is currently LCP:ed
5892 ndbrequire(fragPtr.p->noLcpReplicas > 0);
5893 fragPtr.p->noLcpReplicas --;
5895 noOfRemovedLcpReplicas ++;
5896 replicaPtr.p->lcpOngoingFlag = false;
5899 if (lcpId != RNIL)
5901 jam();
5902 Uint32 lcpNo = prevLcpNo(replicaPtr.p->nextLcp);
5903 if (replicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
5904 replicaPtr.p->lcpId[lcpNo] == SYSFILE->latestLCP_ID)
5906 jam();
5907 replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
5908 replicaPtr.p->lcpId[lcpNo] = 0;
5909 replicaPtr.p->nextLcp = lcpNo;
5910 ndbout_c("REMOVING lcp: %u from table: %u frag: %u node: %u",
5911 SYSFILE->latestLCP_ID,
5912 tabPtr.i, fragNo, nodeId);
5917 if (!found)
5919 jam();
5921 * Run updateNodeInfo to remove any dead nodes from list of activeNodes
5922 * see bug#15587
5924 updateNodeInfo(fragPtr);
5926 noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
5929 if(noOfRemovedReplicas == 0){
5930 jam();
5932 * The table had no replica on the failed node
5933 * continue with next table
5935 tabPtr.i++;
5936 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5937 signal->theData[1] = nodeId;
5938 signal->theData[2] = tabPtr.i;
5939 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5940 return;
5944 * We did remove at least one replica
5946 bool ok = false;
5947 switch(tabPtr.p->tabLcpStatus){
5948 case TabRecord::TLS_COMPLETED:
5949 ok = true;
5950 jam();
5952 * WE WILL WRITE THE TABLE DESCRIPTION TO DISK AT THIS TIME
5953 * INDEPENDENT OF WHAT THE LOCAL CHECKPOINT NEEDED.
5954 * THIS IS TO ENSURE THAT THE FAILED NODES ARE ALSO UPDATED ON DISK
5955 * IN THE DIH DATA STRUCTURES BEFORE WE COMPLETE HANDLING OF THE
5956 * NODE FAILURE.
5958 ndbrequire(noOfRemovedLcpReplicas == 0);
5960 tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
5961 tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
5962 tabPtr.p->tabRemoveNode = nodeId;
5963 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
5964 signal->theData[1] = tabPtr.i;
5965 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
5966 return;
5967 break;
5968 case TabRecord::TLS_ACTIVE:
5969 ok = true;
5970 jam();
5972 * The table is participating in an LCP currently
5974 // Fall through
5975 break;
5976 case TabRecord::TLS_WRITING_TO_FILE:
5977 ok = true;
5978 jam();
5980 * This should never happen since we in the beginning of this function
5981 * checks the tabCopyStatus
5983 ndbrequire(lcpOngoingFlag);
5984 ndbrequire(false);
5985 break;
5987 ndbrequire(ok);
5990 * The table is participating in an LCP currently
5991 * and we removed some replicas that should have been checkpointed
5993 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
5994 ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE);
5997 * Save the table
5999 tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
6000 tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
6001 tabPtr.p->tabRemoveNode = nodeId;
6002 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6003 signal->theData[1] = tabPtr.i;
6004 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6006 if(noOfRemainingLcpReplicas == 0){
6007 jam();
6009 * The removal on the failed node made the LCP complete
6011 tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
6012 checkLcpAllTablesDoneInLqh();
6016 void
6017 Dbdih::removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId){
6018 jam();
6021 * Check if we "accidently" completed a LCP
6023 checkLcpCompletedLab(signal);
6026 * Check if we (DIH) are finished with node fail handling
6028 checkLocalNodefailComplete(signal, nodeId, NF_REMOVE_NODE_FROM_TABLE);
6031 void
6032 Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
6033 NodefailHandlingStep step){
6034 jam();
6036 NodeRecordPtr nodePtr;
6037 nodePtr.i = failedNodeId;
6038 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6040 ndbrequire(nodePtr.p->m_nodefailSteps.get(step));
6041 nodePtr.p->m_nodefailSteps.clear(step);
6043 if(nodePtr.p->m_nodefailSteps.count() > 0){
6044 jam();
6045 return;
6048 if (ERROR_INSERTED(7030))
6050 g_eventLogger.info("Reenable GCP_PREPARE");
6051 CLEAR_ERROR_INSERT_VALUE;
6054 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
6055 nf->blockNo = DBDIH;
6056 nf->nodeId = cownNodeId;
6057 nf->failedNodeId = failedNodeId;
6058 nf->from = __LINE__;
6059 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
6060 NFCompleteRep::SignalLength, JBB);
6064 void
6065 Dbdih::setLocalNodefailHandling(Signal* signal, Uint32 failedNodeId,
6066 NodefailHandlingStep step){
6067 jam();
6069 NodeRecordPtr nodePtr;
6070 nodePtr.i = failedNodeId;
6071 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6073 ndbrequire(!nodePtr.p->m_nodefailSteps.get(step));
6074 nodePtr.p->m_nodefailSteps.set(step);
6077 void Dbdih::startLcpTakeOverLab(Signal* signal, Uint32 failedNodeId)
6079 /*--------------------------------------------------------------------*/
6080 // Start LCP master take over process. Consists of the following steps.
6081 // 1) Ensure that all LQH's have reported all fragments they have been
6082 // told to checkpoint. Can be a fairly long step time-wise.
6083 // 2) Query all nodes about their LCP status.
6084 // During the query process we do not want our own state to change.
6085 // This can change due to delayed reception of LCP_REPORT, completed
6086 // save of table on disk or reception of DIH_LCPCOMPLETE from other
6087 // node.
6088 /*--------------------------------------------------------------------*/
6089 }//Dbdih::startLcpTakeOver()
6091 void Dbdih::execEMPTY_LCP_CONF(Signal* signal)
6093 jamEntry();
6095 ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_EMPTY_LCP);
6097 const EmptyLcpConf * const conf = (EmptyLcpConf *)&signal->theData[0];
6098 Uint32 nodeId = conf->senderNodeId;
6100 if(!conf->idle){
6101 jam();
6102 if (conf->tableId < c_lcpMasterTakeOverState.minTableId) {
6103 jam();
6104 c_lcpMasterTakeOverState.minTableId = conf->tableId;
6105 c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6106 } else if (conf->tableId == c_lcpMasterTakeOverState.minTableId &&
6107 conf->fragmentId < c_lcpMasterTakeOverState.minFragId) {
6108 jam();
6109 c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6110 }//if
6111 if(isMaster()){
6112 jam();
6113 c_lcpState.m_LAST_LCP_FRAG_ORD.setWaitingFor(nodeId);
6117 receiveLoopMacro(EMPTY_LCP_REQ, nodeId);
6118 /*--------------------------------------------------------------------*/
6119 // Received all EMPTY_LCPCONF. We can continue with next phase of the
6120 // take over LCP master process.
6121 /*--------------------------------------------------------------------*/
6122 c_lcpMasterTakeOverState.set(LMTOS_WAIT_LCP_FRAG_REP, __LINE__);
6123 checkEmptyLcpComplete(signal);
6124 return;
6125 }//Dbdih::execEMPTY_LCPCONF()
6127 void
6128 Dbdih::checkEmptyLcpComplete(Signal *signal){
6130 ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_LCP_FRAG_REP);
6132 if(c_lcpState.noOfLcpFragRepOutstanding > 0){
6133 jam();
6134 return;
6137 if(isMaster()){
6138 jam();
6140 signal->theData[0] = NDB_LE_LCP_TakeoverStarted;
6141 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
6143 signal->theData[0] = 7012;
6144 execDUMP_STATE_ORD(signal);
6146 if (ERROR_INSERTED(7194))
6148 ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE");
6149 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6150 signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId;
6151 signal->theData[2] = 0; // Tab id
6152 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6155 c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__);
6156 MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6157 req->masterRef = reference();
6158 req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId;
6159 sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ);
6161 } else {
6162 sendMASTER_LCPCONF(signal);
6166 /*--------------------------------------------------*/
6167 /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
6168 /* QUERYING THIS NODE ABOUT THE STATE OF THE */
6169 /* LOCAL CHECKPOINT PROTOCOL. */
6170 /*--------------------------------------------------*/
6171 void Dbdih::execMASTER_LCPREQ(Signal* signal)
6173 const MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6174 jamEntry();
6175 const BlockReference newMasterBlockref = req->masterRef;
6177 if (newMasterBlockref != cmasterdihref)
6179 jam();
6180 ndbout_c("resending GSN_MASTER_LCPREQ");
6181 sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
6182 50, signal->getLength());
6183 return;
6185 Uint32 failedNodeId = req->failedNodeId;
6188 * There can be no take over with the same master
6190 ndbrequire(c_lcpState.m_masterLcpDihRef != newMasterBlockref);
6191 c_lcpState.m_masterLcpDihRef = newMasterBlockref;
6192 c_lcpState.m_MASTER_LCPREQ_Received = true;
6193 c_lcpState.m_MASTER_LCPREQ_FailedNodeId = failedNodeId;
6195 if(newMasterBlockref != cmasterdihref){
6196 jam();
6197 ndbrequire(0);
6200 sendMASTER_LCPCONF(signal);
6201 }//Dbdih::execMASTER_LCPREQ()
6203 void
6204 Dbdih::sendMASTER_LCPCONF(Signal * signal){
6206 if(!c_EMPTY_LCP_REQ_Counter.done()){
6208 * Have not received all EMPTY_LCP_REP
6209 * dare not answer MASTER_LCP_CONF yet
6211 jam();
6212 return;
6215 if(!c_lcpState.m_MASTER_LCPREQ_Received){
6216 jam();
6218 * Has not received MASTER_LCPREQ yet
6220 return;
6223 if(c_lcpState.lcpStatus == LCP_INIT_TABLES){
6224 jam();
6226 * Still aborting old initLcpLab
6228 return;
6231 if(c_lcpState.lcpStatus == LCP_COPY_GCI){
6232 jam();
6234 * Restart it
6236 //Uint32 lcpId = SYSFILE->latestLCP_ID;
6237 SYSFILE->latestLCP_ID--;
6238 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
6239 #if 0
6240 if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
6241 g_eventLogger.info("Dbdih: Also resetting c_copyGCISlave");
6242 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
6243 c_copyGCISlave.m_expectedNextWord = 0;
6245 #endif
6248 MasterLCPConf::State lcpState;
6249 switch (c_lcpState.lcpStatus) {
6250 case LCP_STATUS_IDLE:
6251 jam();
6252 /*------------------------------------------------*/
6253 /* LOCAL CHECKPOINT IS CURRENTLY NOT ACTIVE */
6254 /* SINCE NO COPY OF RESTART INFORMATION HAVE*/
6255 /* BEEN RECEIVED YET. ALSO THE PREVIOUS */
6256 /* CHECKPOINT HAVE BEEN FULLY COMPLETED. */
6257 /*------------------------------------------------*/
6258 lcpState = MasterLCPConf::LCP_STATUS_IDLE;
6259 break;
6260 case LCP_STATUS_ACTIVE:
6261 jam();
6262 /*--------------------------------------------------*/
6263 /* COPY OF RESTART INFORMATION HAS BEEN */
6264 /* PERFORMED AND ALSO RESPONSE HAVE BEEN SENT.*/
6265 /*--------------------------------------------------*/
6266 lcpState = MasterLCPConf::LCP_STATUS_ACTIVE;
6267 break;
6268 case LCP_TAB_COMPLETED:
6269 jam();
6270 /*--------------------------------------------------------*/
6271 /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
6272 /* ALL TABLES. SAVE OF AT LEAST ONE TABLE IS */
6273 /* ONGOING YET. */
6274 /*--------------------------------------------------------*/
6275 lcpState = MasterLCPConf::LCP_TAB_COMPLETED;
6276 break;
6277 case LCP_TAB_SAVED:
6278 jam();
6279 /*--------------------------------------------------------*/
6280 /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
6281 /* ALL TABLES. ALL TABLES HAVE ALSO BEEN SAVED */
6282 /* ALL OTHER NODES ARE NOT YET FINISHED WITH */
6283 /* THE LOCAL CHECKPOINT. */
6284 /*--------------------------------------------------------*/
6285 lcpState = MasterLCPConf::LCP_TAB_SAVED;
6286 break;
6287 case LCP_TCGET:
6288 case LCP_CALCULATE_KEEP_GCI:
6289 case LCP_TC_CLOPSIZE:
6290 case LCP_START_LCP_ROUND:
6292 * These should only exists on the master
6293 * but since this is master take over
6294 * it not allowed
6296 ndbrequire(false);
6297 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6298 break;
6299 case LCP_COPY_GCI:
6300 case LCP_INIT_TABLES:
6302 * These two states are handled by if statements above
6304 ndbrequire(false);
6305 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6306 break;
6307 default:
6308 ndbrequire(false);
6309 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6310 }//switch
6312 Uint32 failedNodeId = c_lcpState.m_MASTER_LCPREQ_FailedNodeId;
6313 MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
6314 conf->senderNodeId = cownNodeId;
6315 conf->lcpState = lcpState;
6316 conf->failedNodeId = failedNodeId;
6317 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_MASTER_LCPCONF,
6318 signal, MasterLCPConf::SignalLength, JBB);
6320 // Answer to MASTER_LCPREQ sent, reset flag so
6321 // that it's not sent again before another request comes in
6322 c_lcpState.m_MASTER_LCPREQ_Received = false;
6324 if(c_lcpState.lcpStatus == LCP_TAB_SAVED){
6325 #ifdef VM_TRACE
6326 g_eventLogger.info("Sending extra GSN_LCP_COMPLETE_REP to new master");
6327 #endif
6328 sendLCP_COMPLETE_REP(signal);
6331 if(!isMaster()){
6332 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
6333 checkLocalNodefailComplete(signal, failedNodeId, NF_LCP_TAKE_OVER);
6336 return;
6339 NdbOut&
6340 operator<<(NdbOut& out, const Dbdih::LcpMasterTakeOverState state){
6341 switch(state){
6342 case Dbdih::LMTOS_IDLE:
6343 out << "LMTOS_IDLE";
6344 break;
6345 case Dbdih::LMTOS_WAIT_EMPTY_LCP:
6346 out << "LMTOS_WAIT_EMPTY_LCP";
6347 break;
6348 case Dbdih::LMTOS_WAIT_LCP_FRAG_REP:
6349 out << "LMTOS_WAIT_EMPTY_LCP";
6350 break;
6351 case Dbdih::LMTOS_INITIAL:
6352 out << "LMTOS_INITIAL";
6353 break;
6354 case Dbdih::LMTOS_ALL_IDLE:
6355 out << "LMTOS_ALL_IDLE";
6356 break;
6357 case Dbdih::LMTOS_ALL_ACTIVE:
6358 out << "LMTOS_ALL_ACTIVE";
6359 break;
6360 case Dbdih::LMTOS_LCP_CONCLUDING:
6361 out << "LMTOS_LCP_CONCLUDING";
6362 break;
6363 case Dbdih::LMTOS_COPY_ONGOING:
6364 out << "LMTOS_COPY_ONGOING";
6365 break;
6367 return out;
6370 struct MASTERLCP_StateTransitions {
6371 Dbdih::LcpMasterTakeOverState CurrentState;
6372 MasterLCPConf::State ParticipantState;
6373 Dbdih::LcpMasterTakeOverState NewState;
6376 static const
6377 MASTERLCP_StateTransitions g_masterLCPTakeoverStateTransitions[] = {
6379 * Current = LMTOS_INITIAL
6381 { Dbdih::LMTOS_INITIAL,
6382 MasterLCPConf::LCP_STATUS_IDLE,
6383 Dbdih::LMTOS_ALL_IDLE },
6385 { Dbdih::LMTOS_INITIAL,
6386 MasterLCPConf::LCP_STATUS_ACTIVE,
6387 Dbdih::LMTOS_ALL_ACTIVE },
6389 { Dbdih::LMTOS_INITIAL,
6390 MasterLCPConf::LCP_TAB_COMPLETED,
6391 Dbdih::LMTOS_LCP_CONCLUDING },
6393 { Dbdih::LMTOS_INITIAL,
6394 MasterLCPConf::LCP_TAB_SAVED,
6395 Dbdih::LMTOS_LCP_CONCLUDING },
6398 * Current = LMTOS_ALL_IDLE
6400 { Dbdih::LMTOS_ALL_IDLE,
6401 MasterLCPConf::LCP_STATUS_IDLE,
6402 Dbdih::LMTOS_ALL_IDLE },
6404 { Dbdih::LMTOS_ALL_IDLE,
6405 MasterLCPConf::LCP_STATUS_ACTIVE,
6406 Dbdih::LMTOS_COPY_ONGOING },
6408 { Dbdih::LMTOS_ALL_IDLE,
6409 MasterLCPConf::LCP_TAB_COMPLETED,
6410 Dbdih::LMTOS_LCP_CONCLUDING },
6412 { Dbdih::LMTOS_ALL_IDLE,
6413 MasterLCPConf::LCP_TAB_SAVED,
6414 Dbdih::LMTOS_LCP_CONCLUDING },
6417 * Current = LMTOS_COPY_ONGOING
6419 { Dbdih::LMTOS_COPY_ONGOING,
6420 MasterLCPConf::LCP_STATUS_IDLE,
6421 Dbdih::LMTOS_COPY_ONGOING },
6423 { Dbdih::LMTOS_COPY_ONGOING,
6424 MasterLCPConf::LCP_STATUS_ACTIVE,
6425 Dbdih::LMTOS_COPY_ONGOING },
6428 * Current = LMTOS_ALL_ACTIVE
6430 { Dbdih::LMTOS_ALL_ACTIVE,
6431 MasterLCPConf::LCP_STATUS_IDLE,
6432 Dbdih::LMTOS_COPY_ONGOING },
6434 { Dbdih::LMTOS_ALL_ACTIVE,
6435 MasterLCPConf::LCP_STATUS_ACTIVE,
6436 Dbdih::LMTOS_ALL_ACTIVE },
6438 { Dbdih::LMTOS_ALL_ACTIVE,
6439 MasterLCPConf::LCP_TAB_COMPLETED,
6440 Dbdih::LMTOS_LCP_CONCLUDING },
6442 { Dbdih::LMTOS_ALL_ACTIVE,
6443 MasterLCPConf::LCP_TAB_SAVED,
6444 Dbdih::LMTOS_LCP_CONCLUDING },
6447 * Current = LMTOS_LCP_CONCLUDING
6449 { Dbdih::LMTOS_LCP_CONCLUDING,
6450 MasterLCPConf::LCP_STATUS_IDLE,
6451 Dbdih::LMTOS_LCP_CONCLUDING },
6453 { Dbdih::LMTOS_LCP_CONCLUDING,
6454 MasterLCPConf::LCP_STATUS_ACTIVE,
6455 Dbdih::LMTOS_LCP_CONCLUDING },
6457 { Dbdih::LMTOS_LCP_CONCLUDING,
6458 MasterLCPConf::LCP_TAB_COMPLETED,
6459 Dbdih::LMTOS_LCP_CONCLUDING },
6461 { Dbdih::LMTOS_LCP_CONCLUDING,
6462 MasterLCPConf::LCP_TAB_SAVED,
6463 Dbdih::LMTOS_LCP_CONCLUDING }
6466 const Uint32 g_masterLCPTakeoverStateTransitionsRows =
6467 sizeof(g_masterLCPTakeoverStateTransitions) / sizeof(struct MASTERLCP_StateTransitions);
6469 void Dbdih::execMASTER_LCPCONF(Signal* signal)
6471 const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
6472 jamEntry();
6474 if (ERROR_INSERTED(7194))
6476 ndbout_c("delaying MASTER_LCPCONF due to error 7194");
6477 sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal,
6478 300, signal->getLength());
6479 return;
6482 Uint32 senderNodeId = conf->senderNodeId;
6483 MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState;
6484 const Uint32 failedNodeId = conf->failedNodeId;
6485 NodeRecordPtr nodePtr;
6486 nodePtr.i = senderNodeId;
6487 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6488 nodePtr.p->lcpStateAtTakeOver = lcpState;
6490 CRASH_INSERTION(7180);
6492 #ifdef VM_TRACE
6493 g_eventLogger.info("MASTER_LCPCONF");
6494 printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
6495 #endif
6497 bool found = false;
6498 for(Uint32 i = 0; i<g_masterLCPTakeoverStateTransitionsRows; i++){
6499 const struct MASTERLCP_StateTransitions * valid =
6500 &g_masterLCPTakeoverStateTransitions[i];
6502 if(valid->CurrentState == c_lcpMasterTakeOverState.state &&
6503 valid->ParticipantState == lcpState){
6504 jam();
6505 found = true;
6506 c_lcpMasterTakeOverState.set(valid->NewState, __LINE__);
6507 break;
6510 ndbrequire(found);
6512 bool ok = false;
6513 switch(lcpState){
6514 case MasterLCPConf::LCP_STATUS_IDLE:
6515 ok = true;
6516 break;
6517 case MasterLCPConf::LCP_STATUS_ACTIVE:
6518 case MasterLCPConf::LCP_TAB_COMPLETED:
6519 case MasterLCPConf::LCP_TAB_SAVED:
6520 ok = true;
6521 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.setWaitingFor(nodePtr.i);
6522 break;
6524 ndbrequire(ok);
6526 receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
6527 /*-------------------------------------------------------------------------*/
6528 // We have now received all responses and are ready to take over the LCP
6529 // protocol as master.
6530 /*-------------------------------------------------------------------------*/
6531 MASTER_LCPhandling(signal, failedNodeId);
6532 }//Dbdih::execMASTER_LCPCONF()
6534 void Dbdih::execMASTER_LCPREF(Signal* signal)
6536 const MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
6537 jamEntry();
6538 receiveLoopMacro(MASTER_LCPREQ, ref->senderNodeId);
6539 /*-------------------------------------------------------------------------*/
6540 // We have now received all responses and are ready to take over the LCP
6541 // protocol as master.
6542 /*-------------------------------------------------------------------------*/
6543 MASTER_LCPhandling(signal, ref->failedNodeId);
6544 }//Dbdih::execMASTER_LCPREF()
6546 void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
6548 /*-------------------------------------------------------------------------
6550 * WE ARE NOW READY TO CONCLUDE THE TAKE OVER AS MASTER.
6551 * WE HAVE ENOUGH INFO TO START UP ACTIVITIES IN THE PROPER PLACE.
6552 * ALSO SET THE PROPER STATE VARIABLES.
6553 *------------------------------------------------------------------------*/
6554 c_lcpState.currentFragment.tableId = c_lcpMasterTakeOverState.minTableId;
6555 c_lcpState.currentFragment.fragmentId = c_lcpMasterTakeOverState.minFragId;
6556 c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH;
6558 NodeRecordPtr failedNodePtr;
6559 failedNodePtr.i = failedNodeId;
6560 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
6562 switch (c_lcpMasterTakeOverState.state) {
6563 case LMTOS_ALL_IDLE:
6564 jam();
6565 /* --------------------------------------------------------------------- */
6566 // All nodes were idle in the LCP protocol. Start checking for start of LCP
6567 // protocol.
6568 /* --------------------------------------------------------------------- */
6569 #ifdef VM_TRACE
6570 g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart");
6571 #endif
6572 checkLcpStart(signal, __LINE__);
6573 break;
6574 case LMTOS_COPY_ONGOING:
6575 jam();
6576 /* --------------------------------------------------------------------- */
6577 // We were in the starting process of the LCP protocol. We will restart the
6578 // protocol by calculating the keep gci and storing the new lcp id.
6579 /* --------------------------------------------------------------------- */
6580 #ifdef VM_TRACE
6581 g_eventLogger.info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId");
6582 #endif
6583 if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) {
6584 jam();
6585 /*---------------------------------------------------------------------*/
6586 /* WE NEED TO DECREASE THE LATEST LCP ID SINCE WE HAVE ALREADY */
6587 /* STARTED THIS */
6588 /* LOCAL CHECKPOINT. */
6589 /*---------------------------------------------------------------------*/
6590 Uint32 lcpId = SYSFILE->latestLCP_ID;
6591 #ifdef VM_TRACE
6592 g_eventLogger.info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1);
6593 #endif
6594 SYSFILE->latestLCP_ID--;
6595 }//if
6596 storeNewLcpIdLab(signal);
6597 break;
6598 case LMTOS_ALL_ACTIVE:
6600 jam();
6601 /* -------------------------------------------------------------------
6602 * Everybody was in the active phase. We will restart sending
6603 * LCP_FRAGORD to the nodes from the new master.
6604 * We also need to set dihLcpStatus to ZACTIVE
6605 * in the master node since the master will wait for all nodes to
6606 * complete before finalising the LCP process.
6607 * ------------------------------------------------------------------ */
6608 #ifdef VM_TRACE
6609 g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
6610 "startLcpRoundLoopLab(table=%u, fragment=%u)",
6611 c_lcpMasterTakeOverState.minTableId,
6612 c_lcpMasterTakeOverState.minFragId);
6613 #endif
6615 c_lcpState.keepGci = SYSFILE->keepGCI;
6616 startLcpRoundLoopLab(signal, 0, 0);
6617 break;
6619 case LMTOS_LCP_CONCLUDING:
6621 jam();
6622 /* ------------------------------------------------------------------- */
6623 // The LCP process is in the finalisation phase. We simply wait for it to
6624 // complete with signals arriving in. We need to check also if we should
6625 // change state due to table write completion during state
6626 // collection phase.
6627 /* ------------------------------------------------------------------- */
6628 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
6629 startLcpRoundLoopLab(signal, 0, 0);
6630 break;
6632 default:
6633 ndbrequire(false);
6634 break;
6635 }//switch
6636 signal->theData[0] = NDB_LE_LCP_TakeoverCompleted;
6637 signal->theData[1] = c_lcpMasterTakeOverState.state;
6638 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
6640 signal->theData[0] = 7012;
6641 execDUMP_STATE_ORD(signal);
6643 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
6645 checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
6648 /* ------------------------------------------------------------------------- */
6649 /* A BLOCK OR A NODE HAS COMPLETED THE HANDLING OF THE NODE FAILURE. */
6650 /* ------------------------------------------------------------------------- */
6651 void Dbdih::execNF_COMPLETEREP(Signal* signal)
6653 NodeRecordPtr failedNodePtr;
6654 NFCompleteRep * const nfCompleteRep = (NFCompleteRep *)&signal->theData[0];
6655 jamEntry();
6656 const Uint32 blockNo = nfCompleteRep->blockNo;
6657 Uint32 nodeId = nfCompleteRep->nodeId;
6658 failedNodePtr.i = nfCompleteRep->failedNodeId;
6660 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
6661 switch (blockNo) {
6662 case DBTC:
6663 jam();
6664 ndbrequire(failedNodePtr.p->dbtcFailCompleted == ZFALSE);
6665 /* -------------------------------------------------------------------- */
6666 // Report the event that DBTC completed node failure handling.
6667 /* -------------------------------------------------------------------- */
6668 signal->theData[0] = NDB_LE_NodeFailCompleted;
6669 signal->theData[1] = DBTC;
6670 signal->theData[2] = failedNodePtr.i;
6671 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
6673 failedNodePtr.p->dbtcFailCompleted = ZTRUE;
6674 break;
6675 case DBDICT:
6676 jam();
6677 ndbrequire(failedNodePtr.p->dbdictFailCompleted == ZFALSE);
6678 /* --------------------------------------------------------------------- */
6679 // Report the event that DBDICT completed node failure handling.
6680 /* --------------------------------------------------------------------- */
6681 signal->theData[0] = NDB_LE_NodeFailCompleted;
6682 signal->theData[1] = DBDICT;
6683 signal->theData[2] = failedNodePtr.i;
6684 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
6686 failedNodePtr.p->dbdictFailCompleted = ZTRUE;
6687 break;
6688 case DBDIH:
6689 jam();
6690 ndbrequire(failedNodePtr.p->dbdihFailCompleted == ZFALSE);
6691 /* --------------------------------------------------------------------- */
6692 // Report the event that DBDIH completed node failure handling.
6693 /* --------------------------------------------------------------------- */
6694 signal->theData[0] = NDB_LE_NodeFailCompleted;
6695 signal->theData[1] = DBDIH;
6696 signal->theData[2] = failedNodePtr.i;
6697 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
6699 failedNodePtr.p->dbdihFailCompleted = ZTRUE;
6700 break;
6701 case DBLQH:
6702 jam();
6703 ndbrequire(failedNodePtr.p->dblqhFailCompleted == ZFALSE);
6704 /* --------------------------------------------------------------------- */
6705 // Report the event that DBDIH completed node failure handling.
6706 /* --------------------------------------------------------------------- */
6707 signal->theData[0] = NDB_LE_NodeFailCompleted;
6708 signal->theData[1] = DBLQH;
6709 signal->theData[2] = failedNodePtr.i;
6710 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
6712 failedNodePtr.p->dblqhFailCompleted = ZTRUE;
6713 break;
6714 case 0: /* Node has finished */
6715 jam();
6716 ndbrequire(nodeId < MAX_NDB_NODES);
6718 if (failedNodePtr.p->recNODE_FAILREP == ZFALSE) {
6719 jam();
6720 /* ------------------------------------------------------------------- */
6721 // We received a report about completion of node failure before we
6722 // received the message about the NODE failure ourselves.
6723 // We will send the signal to ourselves with a small delay
6724 // (10 milliseconds).
6725 /* ------------------------------------------------------------------- */
6726 //nf->from = __LINE__;
6727 sendSignalWithDelay(reference(), GSN_NF_COMPLETEREP, signal, 10,
6728 signal->length());
6729 return;
6730 }//if
6732 if (!failedNodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(nodeId)){
6733 jam();
6734 return;
6737 failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor(nodeId);;
6739 /* -------------------------------------------------------------------- */
6740 // Report the event that nodeId has completed node failure handling.
6741 /* -------------------------------------------------------------------- */
6742 signal->theData[0] = NDB_LE_NodeFailCompleted;
6743 signal->theData[1] = 0;
6744 signal->theData[2] = failedNodePtr.i;
6745 signal->theData[3] = nodeId;
6746 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6748 nodeFailCompletedCheckLab(signal, failedNodePtr);
6749 return;
6750 break;
6751 default:
6752 ndbrequire(false);
6753 return;
6754 break;
6755 }//switch
6756 if (failedNodePtr.p->dbtcFailCompleted == ZFALSE) {
6757 jam();
6758 return;
6759 }//if
6760 if (failedNodePtr.p->dbdictFailCompleted == ZFALSE) {
6761 jam();
6762 return;
6763 }//if
6764 if (failedNodePtr.p->dbdihFailCompleted == ZFALSE) {
6765 jam();
6766 return;
6767 }//if
6768 if (failedNodePtr.p->dblqhFailCompleted == ZFALSE) {
6769 jam();
6770 return;
6771 }//if
6772 /* ----------------------------------------------------------------------- */
6773 /* ALL BLOCKS IN THIS NODE HAVE COMPLETED THEIR PART OF HANDLING THE */
6774 /* NODE FAILURE. WE CAN NOW REPORT THIS COMPLETION TO ALL OTHER NODES. */
6775 /* ----------------------------------------------------------------------- */
6776 NodeRecordPtr nodePtr;
6777 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
6778 jam();
6779 ptrAss(nodePtr, nodeRecord);
6780 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
6781 jam();
6782 BlockReference ref = calcDihBlockRef(nodePtr.i);
6783 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
6784 nf->blockNo = 0;
6785 nf->nodeId = cownNodeId;
6786 nf->failedNodeId = failedNodePtr.i;
6787 nf->from = __LINE__;
6788 sendSignal(ref, GSN_NF_COMPLETEREP, signal,
6789 NFCompleteRep::SignalLength, JBB);
6790 }//if
6791 }//for
6792 return;
6793 }//Dbdih::execNF_COMPLETEREP()
6795 void Dbdih::nodeFailCompletedCheckLab(Signal* signal,
6796 NodeRecordPtr failedNodePtr)
6798 jam();
6799 if (!failedNodePtr.p->m_NF_COMPLETE_REP.done()){
6800 jam();
6801 return;
6802 }//if
6803 /* ---------------------------------------------------------------------- */
6804 /* ALL BLOCKS IN ALL NODES HAVE NOW REPORTED COMPLETION OF THE NODE */
6805 /* FAILURE HANDLING. WE ARE NOW READY TO ACCEPT THAT THIS NODE STARTS */
6806 /* AGAIN. */
6807 /* ---------------------------------------------------------------------- */
6808 jam();
6809 failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
6810 failedNodePtr.p->recNODE_FAILREP = ZFALSE;
6812 /* ---------------------------------------------------------------------- */
6813 // Report the event that all nodes completed node failure handling.
6814 /* ---------------------------------------------------------------------- */
6815 signal->theData[0] = NDB_LE_NodeFailCompleted;
6816 signal->theData[1] = 0;
6817 signal->theData[2] = failedNodePtr.i;
6818 signal->theData[3] = 0;
6819 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6821 /* ---------------------------------------------------------------------- */
6822 // Report to QMGR that we have concluded recovery handling of this node.
6823 /* ---------------------------------------------------------------------- */
6824 signal->theData[0] = failedNodePtr.i;
6825 sendSignal(QMGR_REF, GSN_NDB_FAILCONF, signal, 1, JBB);
6827 if (isMaster()) {
6828 jam();
6829 /* --------------------------------------------------------------------- */
6830 /* IF WE ARE MASTER WE MUST CHECK IF COPY FRAGMENT WAS INTERRUPTED */
6831 /* BY THE FAILED NODES. */
6832 /* --------------------------------------------------------------------- */
6833 TakeOverRecordPtr takeOverPtr;
6834 takeOverPtr.i = 0;
6835 ptrAss(takeOverPtr, takeOverRecord);
6836 if ((takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG) &&
6837 (failedNodePtr.i == takeOverPtr.p->toCopyNode)) {
6838 jam();
6839 #ifdef VM_TRACE
6840 ndbrequire("Tell jonas" == 0);
6841 #endif
6842 /*------------------------------------------------------------------*/
6843 /* WE ARE CURRENTLY IN THE PROCESS OF COPYING A FRAGMENT. WE */
6844 /* WILL CHECK IF THE COPY NODE HAVE FAILED. */
6845 /*------------------------------------------------------------------*/
6846 takeOverPtr.p->toMasterStatus = TakeOverRecord::SELECTING_NEXT;
6847 startNextCopyFragment(signal, takeOverPtr.i);
6848 return;
6849 }//if
6850 checkStartTakeOver(signal);
6851 }//if
6852 return;
6853 }//Dbdih::nodeFailCompletedCheckLab()
6855 /*****************************************************************************/
6856 /* ********** SEIZING / RELEASING MODULE *************/
6857 /*****************************************************************************/
6859 3.4 L O C A L N O D E S E I Z E
6860 ************************************
6863 3.4.1 L O C A L N O D E S E I Z E R E Q U E S T
6864 ******************************************************
6866 void Dbdih::execDISEIZEREQ(Signal* signal)
6868 ConnectRecordPtr connectPtr;
6869 jamEntry();
6870 Uint32 userPtr = signal->theData[0];
6871 BlockReference userRef = signal->theData[1];
6872 ndbrequire(cfirstconnect != RNIL);
6873 connectPtr.i = cfirstconnect;
6874 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
6875 cfirstconnect = connectPtr.p->nfConnect;
6876 connectPtr.p->nfConnect = RNIL;
6877 connectPtr.p->userpointer = userPtr;
6878 connectPtr.p->userblockref = userRef;
6879 connectPtr.p->connectState = ConnectRecord::INUSE;
6880 signal->theData[0] = connectPtr.p->userpointer;
6881 signal->theData[1] = connectPtr.i;
6882 sendSignal(userRef, GSN_DISEIZECONF, signal, 2, JBB);
6883 }//Dbdih::execDISEIZEREQ()
6886 3.5 L O C A L N O D E R E L E A S E
6887 ****************************************
6890 3.5.1 L O C A L N O D E R E L E A S E R E Q U E S T
6891 *******************************************************=
6893 void Dbdih::execDIRELEASEREQ(Signal* signal)
6895 ConnectRecordPtr connectPtr;
6896 jamEntry();
6897 connectPtr.i = signal->theData[0];
6898 Uint32 userRef = signal->theData[2];
6899 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
6900 ndbrequire(connectPtr.p->connectState != ConnectRecord::FREE);
6901 ndbrequire(connectPtr.p->userblockref == userRef);
6902 signal->theData[0] = connectPtr.p->userpointer;
6903 sendSignal(connectPtr.p->userblockref, GSN_DIRELEASECONF, signal, 1, JBB);
6904 release_connect(connectPtr);
6905 }//Dbdih::execDIRELEASEREQ()
6908 3.7 A D D T A B L E
6909 **********************=
6911 /*****************************************************************************/
6912 /* ********** TABLE ADDING MODULE *************/
6913 /*****************************************************************************/
6915 3.7.1 A D D T A B L E M A I N L Y
6916 ***************************************
6919 static inline void inc_node_or_group(Uint32 &node, Uint32 max_node)
6921 Uint32 next = node + 1;
6922 node = (next == max_node ? 0 : next);
6926 Spread fragments in backwards compatible mode
6928 static void set_default_node_groups(Signal *signal, Uint32 noFrags)
6930 Uint16 *node_group_array = (Uint16*)&signal->theData[25];
6931 Uint32 i;
6932 node_group_array[0] = 0;
6933 for (i = 1; i < noFrags; i++)
6934 node_group_array[i] = UNDEF_NODEGROUP;
6936 void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal)
6938 Uint16 node_group_id[MAX_NDB_PARTITIONS];
6939 jamEntry();
6940 CreateFragmentationReq * const req =
6941 (CreateFragmentationReq*)signal->getDataPtr();
6943 const Uint32 senderRef = req->senderRef;
6944 const Uint32 senderData = req->senderData;
6945 Uint32 noOfFragments = req->noOfFragments;
6946 const Uint32 fragType = req->fragmentationType;
6947 const Uint32 primaryTableId = req->primaryTableId;
6949 Uint32 err = 0;
6951 do {
6952 NodeGroupRecordPtr NGPtr;
6953 TabRecordPtr primTabPtr;
6954 Uint32 count = 2;
6955 Uint16 noOfReplicas = cnoReplicas;
6956 Uint16 *fragments = (Uint16*)(signal->theData+25);
6957 if (primaryTableId == RNIL) {
6958 jam();
6959 switch ((DictTabInfo::FragmentType)fragType)
6962 Backward compatability and for all places in code not changed.
6964 case DictTabInfo::AllNodesSmallTable:
6965 jam();
6966 noOfFragments = csystemnodes;
6967 set_default_node_groups(signal, noOfFragments);
6968 break;
6969 case DictTabInfo::AllNodesMediumTable:
6970 jam();
6971 noOfFragments = 2 * csystemnodes;
6972 set_default_node_groups(signal, noOfFragments);
6973 break;
6974 case DictTabInfo::AllNodesLargeTable:
6975 jam();
6976 noOfFragments = 4 * csystemnodes;
6977 set_default_node_groups(signal, noOfFragments);
6978 break;
6979 case DictTabInfo::SingleFragment:
6980 jam();
6981 noOfFragments = 1;
6982 set_default_node_groups(signal, noOfFragments);
6983 break;
6984 case DictTabInfo::DistrKeyHash:
6985 jam();
6986 case DictTabInfo::DistrKeyLin:
6987 jam();
6988 if (noOfFragments == 0)
6990 jam();
6991 noOfFragments = csystemnodes;
6992 set_default_node_groups(signal, noOfFragments);
6994 break;
6995 default:
6996 jam();
6997 if (noOfFragments == 0)
6999 jam();
7000 err = CreateFragmentationRef::InvalidFragmentationType;
7002 break;
7004 if (err)
7005 break;
7007 When we come here the the exact partition is specified
7008 and there is an array of node groups sent along as well.
7010 memcpy(&node_group_id[0], &signal->theData[25], 2 * noOfFragments);
7011 Uint16 next_replica_node[MAX_NDB_NODES];
7012 memset(next_replica_node,0,sizeof(next_replica_node));
7013 Uint32 default_node_group= c_nextNodeGroup;
7014 for(Uint32 fragNo = 0; fragNo < noOfFragments; fragNo++)
7016 jam();
7017 NGPtr.i = node_group_id[fragNo];
7018 if (NGPtr.i == UNDEF_NODEGROUP)
7020 jam();
7021 NGPtr.i = default_node_group;
7023 if (NGPtr.i > cnoOfNodeGroups)
7025 jam();
7026 err = CreateFragmentationRef::InvalidNodeGroup;
7027 break;
7029 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
7030 const Uint32 max = NGPtr.p->nodeCount;
7032 fragments[count++] = c_nextLogPart++; // Store logpart first
7033 Uint32 tmp= next_replica_node[NGPtr.i];
7034 for(Uint32 replicaNo = 0; replicaNo < noOfReplicas; replicaNo++)
7036 jam();
7037 const Uint16 nodeId = NGPtr.p->nodesInGroup[tmp];
7038 fragments[count++]= nodeId;
7039 inc_node_or_group(tmp, max);
7041 inc_node_or_group(tmp, max);
7042 next_replica_node[NGPtr.i]= tmp;
7045 * Next node group for next fragment
7047 inc_node_or_group(default_node_group, cnoOfNodeGroups);
7049 if (err)
7051 jam();
7052 break;
7054 else
7056 jam();
7057 c_nextNodeGroup = default_node_group;
7059 } else {
7060 if (primaryTableId >= ctabFileSize) {
7061 jam();
7062 err = CreateFragmentationRef::InvalidPrimaryTable;
7063 break;
7065 primTabPtr.i = primaryTableId;
7066 ptrAss(primTabPtr, tabRecord);
7067 if (primTabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
7068 jam();
7069 err = CreateFragmentationRef::InvalidPrimaryTable;
7070 break;
7072 noOfFragments= primTabPtr.p->totalfragments;
7073 for (Uint32 fragNo = 0;
7074 fragNo < noOfFragments; fragNo++) {
7075 jam();
7076 FragmentstorePtr fragPtr;
7077 ReplicaRecordPtr replicaPtr;
7078 getFragstore(primTabPtr.p, fragNo, fragPtr);
7079 fragments[count++] = fragPtr.p->m_log_part_id;
7080 fragments[count++] = fragPtr.p->preferredPrimary;
7081 for (replicaPtr.i = fragPtr.p->storedReplicas;
7082 replicaPtr.i != RNIL;
7083 replicaPtr.i = replicaPtr.p->nextReplica) {
7084 jam();
7085 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7086 if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7087 jam();
7088 fragments[count++]= replicaPtr.p->procNode;
7091 for (replicaPtr.i = fragPtr.p->oldStoredReplicas;
7092 replicaPtr.i != RNIL;
7093 replicaPtr.i = replicaPtr.p->nextReplica) {
7094 jam();
7095 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7096 if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7097 jam();
7098 fragments[count++]= replicaPtr.p->procNode;
7103 if(count != (2U + (1 + noOfReplicas) * noOfFragments)){
7104 char buf[255];
7105 BaseString::snprintf(buf, sizeof(buf),
7106 "Illegal configuration change: NoOfReplicas."
7107 " Can't be applied online ");
7108 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
7111 CreateFragmentationConf * const conf =
7112 (CreateFragmentationConf*)signal->getDataPtrSend();
7113 conf->senderRef = reference();
7114 conf->senderData = senderData;
7115 conf->noOfReplicas = (Uint32)noOfReplicas;
7116 conf->noOfFragments = (Uint32)noOfFragments;
7118 fragments[0]= noOfReplicas;
7119 fragments[1]= noOfFragments;
7121 if(senderRef != 0)
7123 jam();
7124 LinearSectionPtr ptr[3];
7125 ptr[0].p = (Uint32*)&fragments[0];
7126 ptr[0].sz = (count + 1) / 2;
7127 sendSignal(senderRef,
7128 GSN_CREATE_FRAGMENTATION_CONF,
7129 signal,
7130 CreateFragmentationConf::SignalLength,
7131 JBB,
7132 ptr,
7135 // Always ACK/NACK (here ACK)
7136 signal->theData[0] = 0;
7137 return;
7138 } while(false);
7139 // Always ACK/NACK (here NACK)
7140 signal->theData[0] = err;
7143 void Dbdih::execDIADDTABREQ(Signal* signal)
7145 Uint32 fragType;
7146 jamEntry();
7148 DiAddTabReq * const req = (DiAddTabReq*)signal->getDataPtr();
7150 // Seize connect record
7151 ndbrequire(cfirstconnect != RNIL);
7152 ConnectRecordPtr connectPtr;
7153 connectPtr.i = cfirstconnect;
7154 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7155 cfirstconnect = connectPtr.p->nfConnect;
7157 const Uint32 userPtr = req->connectPtr;
7158 const BlockReference userRef = signal->getSendersBlockRef();
7159 connectPtr.p->nfConnect = RNIL;
7160 connectPtr.p->userpointer = userPtr;
7161 connectPtr.p->userblockref = userRef;
7162 connectPtr.p->connectState = ConnectRecord::INUSE;
7163 connectPtr.p->table = req->tableId;
7165 TabRecordPtr tabPtr;
7166 tabPtr.i = req->tableId;
7167 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7168 tabPtr.p->connectrec = connectPtr.i;
7169 tabPtr.p->tableType = req->tableType;
7170 fragType= req->fragType;
7171 tabPtr.p->schemaVersion = req->schemaVersion;
7172 tabPtr.p->primaryTableId = req->primaryTableId;
7174 if(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE){
7175 jam();
7176 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7177 sendAddFragreq(signal, connectPtr, tabPtr, 0);
7178 return;
7181 if(getNodeState().getSystemRestartInProgress() &&
7182 tabPtr.p->tabStatus == TabRecord::TS_IDLE){
7183 jam();
7185 ndbrequire(cmasterNodeId == getOwnNodeId());
7186 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7188 initTableFile(tabPtr);
7189 FileRecordPtr filePtr;
7190 filePtr.i = tabPtr.p->tabFile[0];
7191 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
7192 openFileRw(signal, filePtr);
7193 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
7194 return;
7197 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7198 /* AT THE TIME OF INITIATING THE FILE OF TABLE */
7199 /* DESCRIPTION IS CREATED FOR APPROPRIATE SIZE. EACH */
7200 /* EACH RECORD IN THIS FILE HAS THE INFORMATION ABOUT */
7201 /* ONE TABLE. THE POINTER TO THIS RECORD IS THE TABLE */
7202 /* REFERENCE. IN THE BEGINNING ALL RECORDS ARE CREATED */
7203 /* BUT THEY DO NOT HAVE ANY INFORMATION ABOUT ANY TABLE*/
7204 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7205 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7206 if(req->loggedTable)
7207 tabPtr.p->tabStorage= TabRecord::ST_NORMAL;
7208 else if(req->temporaryTable)
7209 tabPtr.p->tabStorage= TabRecord::ST_TEMPORARY;
7210 else
7211 tabPtr.p->tabStorage= TabRecord::ST_NOLOGGING;
7212 tabPtr.p->kvalue = req->kValue;
7214 switch ((DictTabInfo::FragmentType)fragType)
7216 case DictTabInfo::AllNodesSmallTable:
7217 case DictTabInfo::AllNodesMediumTable:
7218 case DictTabInfo::AllNodesLargeTable:
7219 case DictTabInfo::SingleFragment:
7220 jam();
7221 case DictTabInfo::DistrKeyLin:
7222 jam();
7223 tabPtr.p->method= TabRecord::LINEAR_HASH;
7224 break;
7225 case DictTabInfo::DistrKeyHash:
7226 case DictTabInfo::DistrKeyUniqueHashIndex:
7227 case DictTabInfo::DistrKeyOrderedIndex:
7228 jam();
7229 tabPtr.p->method= TabRecord::NORMAL_HASH;
7230 break;
7231 case DictTabInfo::UserDefined:
7232 jam();
7233 tabPtr.p->method= TabRecord::USER_DEFINED;
7234 break;
7235 default:
7236 ndbrequire(false);
7239 union {
7240 Uint16 fragments[2 + MAX_FRAG_PER_NODE*MAX_REPLICAS*MAX_NDB_NODES];
7241 Uint32 align;
7243 SegmentedSectionPtr fragDataPtr;
7244 LINT_INIT(fragDataPtr.i);
7245 LINT_INIT(fragDataPtr.sz);
7246 signal->getSection(fragDataPtr, DiAddTabReq::FRAGMENTATION);
7247 copy((Uint32*)fragments, fragDataPtr);
7248 releaseSections(signal);
7250 const Uint32 noReplicas = fragments[0];
7251 const Uint32 noFragments = fragments[1];
7253 tabPtr.p->noOfBackups = noReplicas - 1;
7254 tabPtr.p->totalfragments = noFragments;
7255 ndbrequire(noReplicas == cnoReplicas); // Only allowed
7257 if (ERROR_INSERTED(7173)) {
7258 CLEAR_ERROR_INSERT_VALUE;
7259 addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7260 return;
7262 if ((noReplicas * noFragments) > cnoFreeReplicaRec) {
7263 jam();
7264 addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7265 return;
7266 }//if
7267 if (noFragments > cremainingfrags) {
7268 jam();
7269 addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7270 return;
7271 }//if
7273 Uint32 logTotalFragments = 1;
7274 while (logTotalFragments <= tabPtr.p->totalfragments) {
7275 jam();
7276 logTotalFragments <<= 1;
7278 logTotalFragments >>= 1;
7279 tabPtr.p->mask = logTotalFragments - 1;
7280 tabPtr.p->hashpointer = tabPtr.p->totalfragments - logTotalFragments;
7281 allocFragments(tabPtr.p->totalfragments, tabPtr);
7283 Uint32 index = 2;
7284 for (Uint32 fragId = 0; fragId < noFragments; fragId++) {
7285 jam();
7286 FragmentstorePtr fragPtr;
7287 Uint32 activeIndex = 0;
7288 getFragstore(tabPtr.p, fragId, fragPtr);
7289 fragPtr.p->m_log_part_id = fragments[index++];
7290 fragPtr.p->preferredPrimary = fragments[index];
7292 for (Uint32 i = 0; i<noReplicas; i++) {
7293 const Uint32 nodeId = fragments[index++];
7294 ReplicaRecordPtr replicaPtr;
7295 allocStoredReplica(fragPtr, replicaPtr, nodeId);
7296 if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
7297 jam();
7298 ndbrequire(activeIndex < MAX_REPLICAS);
7299 fragPtr.p->activeNodes[activeIndex] = nodeId;
7300 activeIndex++;
7301 } else {
7302 jam();
7303 removeStoredReplica(fragPtr, replicaPtr);
7304 linkOldStoredReplica(fragPtr, replicaPtr);
7305 }//if
7306 }//for
7307 fragPtr.p->fragReplicas = activeIndex;
7308 ndbrequire(activeIndex > 0 && fragPtr.p->storedReplicas != RNIL);
7310 initTableFile(tabPtr);
7311 tabPtr.p->tabCopyStatus = TabRecord::CS_ADD_TABLE_MASTER;
7312 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
7313 signal->theData[1] = tabPtr.i;
7314 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
7317 void
7318 Dbdih::addTable_closeConf(Signal * signal, Uint32 tabPtrI){
7319 TabRecordPtr tabPtr;
7320 tabPtr.i = tabPtrI;
7321 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7323 ConnectRecordPtr connectPtr;
7324 connectPtr.i = tabPtr.p->connectrec;
7325 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7327 sendAddFragreq(signal, connectPtr, tabPtr, 0);
7330 void
7331 Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr,
7332 TabRecordPtr tabPtr, Uint32 fragId){
7333 jam();
7334 const Uint32 fragCount = tabPtr.p->totalfragments;
7335 ReplicaRecordPtr replicaPtr;
7336 LINT_INIT(replicaPtr.p);
7337 replicaPtr.i = RNIL;
7338 FragmentstorePtr fragPtr;
7339 for(; fragId<fragCount; fragId++){
7340 jam();
7341 getFragstore(tabPtr.p, fragId, fragPtr);
7343 replicaPtr.i = fragPtr.p->storedReplicas;
7344 while(replicaPtr.i != RNIL){
7345 jam();
7346 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7347 if(replicaPtr.p->procNode == getOwnNodeId()){
7348 break;
7350 replicaPtr.i = replicaPtr.p->nextReplica;
7353 if(replicaPtr.i != RNIL){
7354 jam();
7355 break;
7358 replicaPtr.i = fragPtr.p->oldStoredReplicas;
7359 while(replicaPtr.i != RNIL){
7360 jam();
7361 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7362 if(replicaPtr.p->procNode == getOwnNodeId()){
7363 break;
7365 replicaPtr.i = replicaPtr.p->nextReplica;
7368 if(replicaPtr.i != RNIL){
7369 jam();
7370 break;
7374 if(replicaPtr.i != RNIL){
7375 jam();
7376 ndbrequire(fragId < fragCount);
7377 ndbrequire(replicaPtr.p->procNode == getOwnNodeId());
7379 Uint32 requestInfo = 0;
7380 if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
7381 requestInfo |= LqhFragReq::TemporaryTable;
7384 if(getNodeState().getNodeRestartInProgress()){
7385 requestInfo |= LqhFragReq::CreateInRunning;
7388 AddFragReq* const req = (AddFragReq*)signal->getDataPtr();
7389 req->dihPtr = connectPtr.i;
7390 req->senderData = connectPtr.p->userpointer;
7391 req->fragmentId = fragId;
7392 req->requestInfo = requestInfo;
7393 req->tableId = tabPtr.i;
7394 req->nextLCP = 0;
7395 req->nodeId = getOwnNodeId();
7396 req->totalFragments = fragCount;
7397 req->startGci = SYSFILE->newestRestorableGCI;
7398 req->logPartId = fragPtr.p->m_log_part_id;
7399 sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal,
7400 AddFragReq::SignalLength, JBB);
7401 return;
7404 // Done
7405 DiAddTabConf * const conf = (DiAddTabConf*)signal->getDataPtr();
7406 conf->senderData = connectPtr.p->userpointer;
7407 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABCONF, signal,
7408 DiAddTabConf::SignalLength, JBB);
7410 // Release
7411 release_connect(connectPtr);
7413 void
7414 Dbdih::release_connect(ConnectRecordPtr ptr)
7416 ptr.p->userblockref = ZNIL;
7417 ptr.p->userpointer = RNIL;
7418 ptr.p->connectState = ConnectRecord::FREE;
7419 ptr.p->nfConnect = cfirstconnect;
7420 cfirstconnect = ptr.i;
7423 void
7424 Dbdih::execADD_FRAGCONF(Signal* signal){
7425 jamEntry();
7426 AddFragConf * const conf = (AddFragConf*)signal->getDataPtr();
7428 ConnectRecordPtr connectPtr;
7429 connectPtr.i = conf->dihPtr;
7430 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7432 TabRecordPtr tabPtr;
7433 tabPtr.i = connectPtr.p->table;
7434 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7436 sendAddFragreq(signal, connectPtr, tabPtr, conf->fragId + 1);
7439 void
7440 Dbdih::execADD_FRAGREF(Signal* signal){
7441 jamEntry();
7442 AddFragRef * const ref = (AddFragRef*)signal->getDataPtr();
7444 ConnectRecordPtr connectPtr;
7445 connectPtr.i = ref->dihPtr;
7446 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7449 DiAddTabRef * const ref = (DiAddTabRef*)signal->getDataPtr();
7450 ref->senderData = connectPtr.p->userpointer;
7451 ref->errorCode = ~0;
7452 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal,
7453 DiAddTabRef::SignalLength, JBB);
7456 // Release
7457 release_connect(connectPtr);
7461 3.7.1.3 R E F U S E
7462 *********************
7464 void Dbdih::addtabrefuseLab(Signal* signal, ConnectRecordPtr connectPtr, Uint32 errorCode)
7466 signal->theData[0] = connectPtr.p->userpointer;
7467 signal->theData[1] = errorCode;
7468 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal, 2, JBB);
7469 release_connect(connectPtr);
7470 return;
7471 }//Dbdih::addtabrefuseLab()
7474 3.7.2 A D D T A B L E D U P L I C A T I O N
7475 *************************************************
7478 3.7.2.1 A D D T A B L E D U P L I C A T I O N R E Q U E S T
7479 *******************************************************************=
7483 D E L E T E T A B L E
7484 **********************=
7486 /*****************************************************************************/
7487 /*********** DELETE TABLE MODULE *************/
7488 /*****************************************************************************/
7489 void
7490 Dbdih::execDROP_TAB_REQ(Signal* signal){
7491 jamEntry();
7492 DropTabReq* req = (DropTabReq*)signal->getDataPtr();
7494 TabRecordPtr tabPtr;
7495 tabPtr.i = req->tableId;
7496 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7498 tabPtr.p->m_dropTab.tabUserRef = req->senderRef;
7499 tabPtr.p->m_dropTab.tabUserPtr = req->senderData;
7501 DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
7503 switch(rt){
7504 case DropTabReq::OnlineDropTab:
7505 jam();
7506 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
7507 releaseTable(tabPtr);
7508 break;
7509 case DropTabReq::CreateTabDrop:
7510 jam();
7511 releaseTable(tabPtr);
7512 break;
7513 case DropTabReq::RestartDropTab:
7514 break;
7517 startDeleteFile(signal, tabPtr);
7520 void Dbdih::startDeleteFile(Signal* signal, TabRecordPtr tabPtr)
7522 if (tabPtr.p->tabFile[0] == RNIL) {
7523 jam();
7524 initTableFile(tabPtr);
7525 }//if
7526 openTableFileForDelete(signal, tabPtr.p->tabFile[0]);
7527 }//Dbdih::startDeleteFile()
7529 void Dbdih::openTableFileForDelete(Signal* signal, Uint32 fileIndex)
7531 FileRecordPtr filePtr;
7532 filePtr.i = fileIndex;
7533 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
7534 openFileRw(signal, filePtr);
7535 filePtr.p->reqStatus = FileRecord::TABLE_OPEN_FOR_DELETE;
7536 }//Dbdih::openTableFileForDelete()
7538 void Dbdih::tableOpenLab(Signal* signal, FileRecordPtr filePtr)
7540 closeFileDelete(signal, filePtr);
7541 filePtr.p->reqStatus = FileRecord::TABLE_CLOSE_DELETE;
7542 return;
7543 }//Dbdih::tableOpenLab()
7545 void Dbdih::tableDeleteLab(Signal* signal, FileRecordPtr filePtr)
7547 TabRecordPtr tabPtr;
7548 tabPtr.i = filePtr.p->tabRef;
7549 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7550 if (filePtr.i == tabPtr.p->tabFile[0]) {
7551 jam();
7552 openTableFileForDelete(signal, tabPtr.p->tabFile[1]);
7553 return;
7554 }//if
7555 ndbrequire(filePtr.i == tabPtr.p->tabFile[1]);
7557 releaseFile(tabPtr.p->tabFile[0]);
7558 releaseFile(tabPtr.p->tabFile[1]);
7559 tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
7561 tabPtr.p->tabStatus = TabRecord::TS_IDLE;
7563 DropTabConf * const dropConf = (DropTabConf *)signal->getDataPtrSend();
7564 dropConf->senderRef = reference();
7565 dropConf->senderData = tabPtr.p->m_dropTab.tabUserPtr;
7566 dropConf->tableId = tabPtr.i;
7567 sendSignal(tabPtr.p->m_dropTab.tabUserRef, GSN_DROP_TAB_CONF,
7568 signal, DropTabConf::SignalLength, JBB);
7570 tabPtr.p->m_dropTab.tabUserPtr = RNIL;
7571 tabPtr.p->m_dropTab.tabUserRef = 0;
7572 }//Dbdih::tableDeleteLab()
7575 void Dbdih::releaseTable(TabRecordPtr tabPtr)
7577 FragmentstorePtr fragPtr;
7578 if (tabPtr.p->noOfFragChunks > 0) {
7579 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
7580 jam();
7581 getFragstore(tabPtr.p, fragId, fragPtr);
7582 releaseReplicas(fragPtr.p->storedReplicas);
7583 releaseReplicas(fragPtr.p->oldStoredReplicas);
7584 }//for
7585 releaseFragments(tabPtr);
7587 if (tabPtr.p->tabFile[0] != RNIL) {
7588 jam();
7589 releaseFile(tabPtr.p->tabFile[0]);
7590 releaseFile(tabPtr.p->tabFile[1]);
7591 tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
7592 }//if
7593 }//Dbdih::releaseTable()
7595 void Dbdih::releaseReplicas(Uint32 replicaPtrI)
7597 ReplicaRecordPtr replicaPtr;
7598 replicaPtr.i = replicaPtrI;
7599 jam();
7600 while (replicaPtr.i != RNIL) {
7601 jam();
7602 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7603 Uint32 tmp = replicaPtr.p->nextReplica;
7604 replicaPtr.p->nextReplica = cfirstfreeReplica;
7605 cfirstfreeReplica = replicaPtr.i;
7606 replicaPtr.i = tmp;
7607 cnoFreeReplicaRec++;
7608 }//while
7609 }//Dbdih::releaseReplicas()
7611 void Dbdih::seizeReplicaRec(ReplicaRecordPtr& replicaPtr)
7613 replicaPtr.i = cfirstfreeReplica;
7614 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7615 cfirstfreeReplica = replicaPtr.p->nextReplica;
7616 cnoFreeReplicaRec--;
7617 replicaPtr.p->nextReplica = RNIL;
7618 }//Dbdih::seizeReplicaRec()
7620 void Dbdih::releaseFile(Uint32 fileIndex)
7622 FileRecordPtr filePtr;
7623 filePtr.i = fileIndex;
7624 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
7625 filePtr.p->nextFile = cfirstfreeFile;
7626 cfirstfreeFile = filePtr.i;
7627 }//Dbdih::releaseFile()
7630 void Dbdih::execALTER_TAB_REQ(Signal * signal)
7632 AlterTabReq* const req = (AlterTabReq*)signal->getDataPtr();
7633 const Uint32 senderRef = req->senderRef;
7634 const Uint32 senderData = req->senderData;
7635 const Uint32 changeMask = req->changeMask;
7636 const Uint32 tableId = req->tableId;
7637 const Uint32 tableVersion = req->tableVersion;
7638 const Uint32 gci = req->gci;
7639 AlterTabReq::RequestType requestType =
7640 (AlterTabReq::RequestType) req->requestType;
7642 TabRecordPtr tabPtr;
7643 tabPtr.i = tableId;
7644 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7645 tabPtr.p->schemaVersion = tableVersion;
7647 // Request handled successfully
7648 AlterTabConf * conf = (AlterTabConf*)signal->getDataPtrSend();
7649 conf->senderRef = reference();
7650 conf->senderData = senderData;
7651 conf->changeMask = changeMask;
7652 conf->tableId = tableId;
7653 conf->tableVersion = tableVersion;
7654 conf->gci = gci;
7655 conf->requestType = requestType;
7656 sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
7657 AlterTabConf::SignalLength, JBB);
7661 G E T N O D E S
7662 **********************=
7664 /*****************************************************************************/
7665 /* ********** TRANSACTION HANDLING MODULE *************/
7666 /*****************************************************************************/
7668 3.8.1 G E T N O D E S R E Q U E S T
7669 ******************************************
7670 Asks what nodes should be part of a transaction.
7672 void Dbdih::execDIGETNODESREQ(Signal* signal)
7674 const DiGetNodesReq * const req = (DiGetNodesReq *)&signal->theData[0];
7675 FragmentstorePtr fragPtr;
7676 TabRecordPtr tabPtr;
7677 tabPtr.i = req->tableId;
7678 Uint32 hashValue = req->hashValue;
7679 Uint32 ttabFileSize = ctabFileSize;
7680 Uint32 fragId;
7681 DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
7682 TabRecord* regTabDesc = tabRecord;
7683 jamEntry();
7684 ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc);
7685 if (tabPtr.p->method == TabRecord::LINEAR_HASH)
7687 jam();
7688 fragId = hashValue & tabPtr.p->mask;
7689 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
7690 if (fragId < tabPtr.p->hashpointer) {
7691 jam();
7692 fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
7693 }//if
7695 else if (tabPtr.p->method == TabRecord::NORMAL_HASH)
7697 jam();
7698 fragId= hashValue % tabPtr.p->totalfragments;
7700 else
7702 jam();
7703 ndbassert(tabPtr.p->method == TabRecord::USER_DEFINED);
7704 fragId= hashValue;
7705 if (fragId >= tabPtr.p->totalfragments)
7707 jam();
7708 conf->zero= 1; //Indicate error;
7709 signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
7710 return;
7713 getFragstore(tabPtr.p, fragId, fragPtr);
7714 Uint32 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes);
7715 Uint32 sig2 = (nodeCount - 1) +
7716 (fragPtr.p->distributionKey << 16);
7717 conf->zero = 0;
7718 conf->reqinfo = sig2;
7719 conf->fragId = fragId;
7720 }//Dbdih::execDIGETNODESREQ()
7722 Uint32 Dbdih::extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[])
7724 Uint32 nodeCount = 0;
7725 for (Uint32 i = 0; i < fragPtr->fragReplicas; i++) {
7726 jam();
7727 NodeRecordPtr nodePtr;
7728 ndbrequire(i < MAX_REPLICAS);
7729 nodePtr.i = fragPtr->activeNodes[i];
7730 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
7731 if (nodePtr.p->useInTransactions) {
7732 jam();
7733 nodes[nodeCount] = nodePtr.i;
7734 nodeCount++;
7735 }//if
7736 }//for
7737 ndbrequire(nodeCount > 0);
7738 return nodeCount;
7739 }//Dbdih::extractNodeInfo()
7741 void
7742 Dbdih::getFragstore(TabRecord * tab, //In parameter
7743 Uint32 fragNo, //In parameter
7744 FragmentstorePtr & fragptr) //Out parameter
7746 FragmentstorePtr fragPtr;
7747 Uint32 chunkNo = fragNo >> LOG_NO_OF_FRAGS_PER_CHUNK;
7748 Uint32 chunkIndex = fragNo & (NO_OF_FRAGS_PER_CHUNK - 1);
7749 Uint32 TfragstoreFileSize = cfragstoreFileSize;
7750 Fragmentstore* TfragStore = fragmentstore;
7751 if (chunkNo < MAX_NDB_NODES) {
7752 fragPtr.i = tab->startFid[chunkNo] + chunkIndex;
7753 ptrCheckGuard(fragPtr, TfragstoreFileSize, TfragStore);
7754 fragptr = fragPtr;
7755 return;
7756 }//if
7757 ndbrequire(false);
7758 }//Dbdih::getFragstore()
7760 void Dbdih::allocFragments(Uint32 noOfFragments, TabRecordPtr tabPtr)
7762 FragmentstorePtr fragPtr;
7763 Uint32 noOfChunks = (noOfFragments + (NO_OF_FRAGS_PER_CHUNK - 1)) >> LOG_NO_OF_FRAGS_PER_CHUNK;
7764 ndbrequire(cremainingfrags >= noOfFragments);
7765 for (Uint32 i = 0; i < noOfChunks; i++) {
7766 jam();
7767 Uint32 baseFrag = cfirstfragstore;
7768 tabPtr.p->startFid[i] = baseFrag;
7769 fragPtr.i = baseFrag;
7770 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
7771 cfirstfragstore = fragPtr.p->nextFragmentChunk;
7772 cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
7773 for (Uint32 j = 0; j < NO_OF_FRAGS_PER_CHUNK; j++) {
7774 jam();
7775 fragPtr.i = baseFrag + j;
7776 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
7777 initFragstore(fragPtr);
7778 }//if
7779 }//for
7780 tabPtr.p->noOfFragChunks = noOfChunks;
7781 }//Dbdih::allocFragments()
7783 void Dbdih::releaseFragments(TabRecordPtr tabPtr)
7785 FragmentstorePtr fragPtr;
7786 for (Uint32 i = 0; i < tabPtr.p->noOfFragChunks; i++) {
7787 jam();
7788 Uint32 baseFrag = tabPtr.p->startFid[i];
7789 fragPtr.i = baseFrag;
7790 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
7791 fragPtr.p->nextFragmentChunk = cfirstfragstore;
7792 cfirstfragstore = baseFrag;
7793 tabPtr.p->startFid[i] = RNIL;
7794 cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
7795 }//for
7796 tabPtr.p->noOfFragChunks = 0;
7797 }//Dbdih::releaseFragments()
7799 void Dbdih::initialiseFragstore()
7801 Uint32 i;
7802 FragmentstorePtr fragPtr;
7803 for (i = 0; i < cfragstoreFileSize; i++) {
7804 fragPtr.i = i;
7805 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
7806 initFragstore(fragPtr);
7807 }//for
7808 Uint32 noOfChunks = cfragstoreFileSize >> LOG_NO_OF_FRAGS_PER_CHUNK;
7809 fragPtr.i = 0;
7810 cfirstfragstore = RNIL;
7811 cremainingfrags = 0;
7812 for (i = 0; i < noOfChunks; i++) {
7813 refresh_watch_dog();
7814 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
7815 fragPtr.p->nextFragmentChunk = cfirstfragstore;
7816 cfirstfragstore = fragPtr.i;
7817 fragPtr.i += NO_OF_FRAGS_PER_CHUNK;
7818 cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
7819 }//for
7820 }//Dbdih::initialiseFragstore()
7823 3.9 V E R I F I C A T I O N
7824 ****************************=
7826 /****************************************************************************/
7827 /* ********** VERIFICATION SUB-MODULE *************/
7828 /****************************************************************************/
7830 3.9.1 R E C E I V I N G O F V E R I F I C A T I O N R E Q U E S T
7831 *************************************************************************
7833 void Dbdih::execDIVERIFYREQ(Signal* signal)
7836 jamEntry();
7837 if ((getBlockCommit() == false) &&
7838 (cfirstVerifyQueue == RNIL)) {
7839 jam();
7840 /*-----------------------------------------------------------------------*/
7841 // We are not blocked and the verify queue was empty currently so we can
7842 // simply reply back to TC immediately. The method was called with
7843 // EXECUTE_DIRECT so we reply back by setting signal data and returning.
7844 // theData[0] already contains the correct information so
7845 // we need not touch it.
7846 /*-----------------------------------------------------------------------*/
7847 signal->theData[1] = currentgcp;
7848 signal->theData[2] = 0;
7849 return;
7850 }//if
7851 /*-------------------------------------------------------------------------*/
7852 // Since we are blocked we need to put this operation last in the verify
7853 // queue to ensure that operation starts up in the correct order.
7854 /*-------------------------------------------------------------------------*/
7855 ApiConnectRecordPtr tmpApiConnectptr;
7856 ApiConnectRecordPtr localApiConnectptr;
7858 cverifyQueueCounter++;
7859 localApiConnectptr.i = signal->theData[0];
7860 tmpApiConnectptr.i = clastVerifyQueue;
7861 ptrCheckGuard(localApiConnectptr, capiConnectFileSize, apiConnectRecord);
7862 localApiConnectptr.p->apiGci = cnewgcp;
7863 localApiConnectptr.p->nextApi = RNIL;
7864 clastVerifyQueue = localApiConnectptr.i;
7865 if (tmpApiConnectptr.i == RNIL) {
7866 jam();
7867 cfirstVerifyQueue = localApiConnectptr.i;
7868 } else {
7869 jam();
7870 ptrCheckGuard(tmpApiConnectptr, capiConnectFileSize, apiConnectRecord);
7871 tmpApiConnectptr.p->nextApi = localApiConnectptr.i;
7872 }//if
7873 emptyverificbuffer(signal, false);
7874 signal->theData[2] = 1; // Indicate no immediate return
7875 return;
7876 }//Dbdih::execDIVERIFYREQ()
7878 void Dbdih::execDI_FCOUNTREQ(Signal* signal)
7880 DihFragCountReq * const req = (DihFragCountReq*)signal->getDataPtr();
7881 ConnectRecordPtr connectPtr;
7882 TabRecordPtr tabPtr;
7883 const BlockReference senderRef = signal->senderBlockRef();
7884 const Uint32 senderData = req->m_senderData;
7885 jamEntry();
7886 connectPtr.i = req->m_connectionData;
7887 tabPtr.i = req->m_tableRef;
7888 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7890 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
7892 DihFragCountRef* ref = (DihFragCountRef*)signal->getDataPtrSend();
7893 //connectPtr.i == RNIL -> question without connect record
7894 if(connectPtr.i == RNIL)
7895 ref->m_connectionData = RNIL;
7896 else
7898 jam();
7899 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7900 ref->m_connectionData = connectPtr.p->userpointer;
7902 ref->m_tableRef = tabPtr.i;
7903 ref->m_senderData = senderData;
7904 ref->m_error = DihFragCountRef::ErroneousTableState;
7905 ref->m_tableStatus = tabPtr.p->tabStatus;
7906 sendSignal(senderRef, GSN_DI_FCOUNTREF, signal,
7907 DihFragCountRef::SignalLength, JBB);
7908 return;
7911 if(connectPtr.i != RNIL){
7912 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7913 if (connectPtr.p->connectState == ConnectRecord::INUSE) {
7914 jam();
7915 DihFragCountConf* conf = (DihFragCountConf*)signal->getDataPtrSend();
7916 conf->m_connectionData = connectPtr.p->userpointer;
7917 conf->m_tableRef = tabPtr.i;
7918 conf->m_senderData = senderData;
7919 conf->m_fragmentCount = tabPtr.p->totalfragments;
7920 conf->m_noOfBackups = tabPtr.p->noOfBackups;
7921 sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTCONF, signal,
7922 DihFragCountConf::SignalLength, JBB);
7923 return;
7924 }//if
7925 DihFragCountRef* ref = (DihFragCountRef*)signal->getDataPtrSend();
7926 ref->m_connectionData = connectPtr.p->userpointer;
7927 ref->m_tableRef = tabPtr.i;
7928 ref->m_senderData = senderData;
7929 ref->m_error = DihFragCountRef::ErroneousTableState;
7930 ref->m_tableStatus = tabPtr.p->tabStatus;
7931 sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTREF, signal,
7932 DihFragCountRef::SignalLength, JBB);
7933 return;
7934 }//if
7935 DihFragCountConf* conf = (DihFragCountConf*)signal->getDataPtrSend();
7936 //connectPtr.i == RNIL -> question without connect record
7937 conf->m_connectionData = RNIL;
7938 conf->m_tableRef = tabPtr.i;
7939 conf->m_senderData = senderData;
7940 conf->m_fragmentCount = tabPtr.p->totalfragments;
7941 conf->m_noOfBackups = tabPtr.p->noOfBackups;
7942 sendSignal(senderRef, GSN_DI_FCOUNTCONF, signal,
7943 DihFragCountConf::SignalLength, JBB);
7944 }//Dbdih::execDI_FCOUNTREQ()
7946 void Dbdih::execDIGETPRIMREQ(Signal* signal)
7948 FragmentstorePtr fragPtr;
7949 ConnectRecordPtr connectPtr;
7950 TabRecordPtr tabPtr;
7951 jamEntry();
7952 Uint32 passThrough = signal->theData[1];
7953 tabPtr.i = signal->theData[2];
7954 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7955 if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType)) {
7956 jam();
7957 tabPtr.i = tabPtr.p->primaryTableId;
7958 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7960 Uint32 fragId = signal->theData[3];
7962 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
7963 connectPtr.i = signal->theData[0];
7964 if(connectPtr.i != RNIL)
7966 jam();
7967 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7968 signal->theData[0] = connectPtr.p->userpointer;
7970 else
7972 jam();
7973 signal->theData[0] = RNIL;
7976 Uint32 nodes[MAX_REPLICAS];
7977 getFragstore(tabPtr.p, fragId, fragPtr);
7978 Uint32 count = extractNodeInfo(fragPtr.p, nodes);
7980 signal->theData[1] = passThrough;
7981 signal->theData[2] = nodes[0];
7982 signal->theData[3] = nodes[1];
7983 signal->theData[4] = nodes[2];
7984 signal->theData[5] = nodes[3];
7985 signal->theData[6] = count;
7986 signal->theData[7] = tabPtr.i;
7987 signal->theData[8] = fragId;
7989 const BlockReference senderRef = signal->senderBlockRef();
7990 sendSignal(senderRef, GSN_DIGETPRIMCONF, signal, 9, JBB);
7991 }//Dbdih::execDIGETPRIMREQ()
7993 /****************************************************************************/
7994 /* ********** GLOBAL-CHECK-POINT HANDLING MODULE *************/
7995 /****************************************************************************/
7997 3.10 G L O B A L C H E C K P O I N T ( IN M A S T E R R O L E)
7998 *******************************************************************
8000 void Dbdih::checkGcpStopLab(Signal* signal)
8002 Uint32 tgcpStatus;
8004 tgcpStatus = cgcpStatus;
8005 if (tgcpStatus == coldGcpStatus) {
8006 jam();
8007 if (coldGcpId == cnewgcp) {
8008 jam();
8009 if (cgcpStatus != GCP_READY) {
8010 jam();
8011 cgcpSameCounter++;
8012 if (cgcpSameCounter == 1200) {
8013 jam();
8014 #ifdef VM_TRACE
8015 g_eventLogger.error("System crash due to GCP Stop in state = %u",
8016 (Uint32) cgcpStatus);
8017 #endif
8018 crashSystemAtGcpStop(signal, false);
8019 return;
8020 }//if
8021 } else {
8022 jam();
8023 if (cgcpOrderBlocked == 0) {
8024 jam();
8025 cgcpSameCounter++;
8026 if (cgcpSameCounter == 1200) {
8027 jam();
8028 #ifdef VM_TRACE
8029 g_eventLogger.error("System crash due to GCP Stop in state = %u",
8030 (Uint32) cgcpStatus);
8031 #endif
8032 crashSystemAtGcpStop(signal, false);
8033 return;
8034 }//if
8035 } else {
8036 jam();
8037 cgcpSameCounter = 0;
8038 }//if
8039 }//if
8040 } else {
8041 jam();
8042 cgcpSameCounter = 0;
8043 }//if
8044 } else {
8045 jam();
8046 cgcpSameCounter = 0;
8047 }//if
8048 signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
8049 signal->theData[1] = coldGcpStatus;
8050 signal->theData[2] = cgcpStatus;
8051 signal->theData[3] = coldGcpId;
8052 signal->theData[4] = cnewgcp;
8053 signal->theData[5] = cgcpSameCounter;
8054 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 6);
8055 coldGcpStatus = cgcpStatus;
8056 coldGcpId = cnewgcp;
8057 return;
8058 }//Dbdih::checkGcpStopLab()
8060 void Dbdih::startGcpLab(Signal* signal, Uint32 aWaitTime)
8062 if ((cgcpOrderBlocked == 1) ||
8063 (c_nodeStartMaster.blockGcp == true) ||
8064 (cfirstVerifyQueue != RNIL)) {
8065 /*************************************************************************/
8066 // 1: Global Checkpoint has been stopped by management command
8067 // 2: Global Checkpoint is blocked by node recovery activity
8068 // 3: Previous global checkpoint is not yet completed.
8069 // All this means that global checkpoint cannot start now.
8070 /*************************************************************************/
8071 jam();
8072 cgcpStartCounter++;
8073 signal->theData[0] = DihContinueB::ZSTART_GCP;
8074 signal->theData[1] = aWaitTime > 100 ? (aWaitTime - 100) : 0;
8075 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
8076 return;
8077 }//if
8078 if (cstartGcpNow == false && aWaitTime > 100){
8079 /*************************************************************************/
8080 // We still have more than 100 milliseconds before we start the next and
8081 // nobody has ordered immediate start of a global checkpoint.
8082 // During initial start we will use continuos global checkpoints to
8083 // speed it up since we need to complete a global checkpoint after
8084 // inserting a lot of records.
8085 /*************************************************************************/
8086 jam();
8087 cgcpStartCounter++;
8088 signal->theData[0] = DihContinueB::ZSTART_GCP;
8089 signal->theData[1] = (aWaitTime - 100);
8090 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
8091 return;
8092 }//if
8093 cgcpStartCounter = 0;
8094 cstartGcpNow = false;
8095 /***************************************************************************/
8096 // Report the event that a global checkpoint has started.
8097 /***************************************************************************/
8098 signal->theData[0] = NDB_LE_GlobalCheckpointStarted; //Event type
8099 signal->theData[1] = cnewgcp;
8100 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
8102 CRASH_INSERTION(7000);
8103 cnewgcp++;
8104 signal->setTrace(TestOrd::TraceGlobalCheckpoint);
8105 sendLoopMacro(GCP_PREPARE, sendGCP_PREPARE);
8106 cgcpStatus = GCP_PREPARE_SENT;
8107 }//Dbdih::startGcpLab()
8109 void Dbdih::execGCP_PREPARECONF(Signal* signal)
8111 jamEntry();
8112 Uint32 senderNodeId = signal->theData[0];
8113 Uint32 gci = signal->theData[1];
8114 ndbrequire(gci == cnewgcp);
8115 receiveLoopMacro(GCP_PREPARE, senderNodeId);
8116 //-------------------------------------------------------------
8117 // We have now received all replies. We are ready to continue
8118 // with committing the global checkpoint.
8119 //-------------------------------------------------------------
8120 gcpcommitreqLab(signal);
8121 }//Dbdih::execGCP_PREPARECONF()
8123 void Dbdih::gcpcommitreqLab(Signal* signal)
8125 CRASH_INSERTION(7001);
8126 sendLoopMacro(GCP_COMMIT, sendGCP_COMMIT);
8127 cgcpStatus = GCP_COMMIT_SENT;
8128 return;
8129 }//Dbdih::gcpcommitreqLab()
8131 void Dbdih::execGCP_NODEFINISH(Signal* signal)
8133 jamEntry();
8134 const Uint32 senderNodeId = signal->theData[0];
8135 const Uint32 gci = signal->theData[1];
8136 const Uint32 failureNr = signal->theData[2];
8137 if (!isMaster()) {
8138 jam();
8139 ndbrequire(failureNr > cfailurenr);
8140 //-------------------------------------------------------------
8141 // Another node thinks we are master. This could happen when he
8142 // has heard of a node failure which I have not heard of. Ignore
8143 // signal in this case since we will discover it by sending
8144 // MASTER_GCPREQ to the node.
8145 //-------------------------------------------------------------
8146 return;
8147 } else if (cmasterState == MASTER_TAKE_OVER_GCP) {
8148 jam();
8149 //-------------------------------------------------------------
8150 // We are currently taking over as master. Ignore
8151 // signal in this case since we will discover it in reception of
8152 // MASTER_GCPCONF.
8153 //-------------------------------------------------------------
8154 return;
8155 } else {
8156 ndbrequire(cmasterState == MASTER_ACTIVE);
8157 }//if
8158 ndbrequire(gci == coldgcp);
8159 receiveLoopMacro(GCP_COMMIT, senderNodeId);
8160 //-------------------------------------------------------------
8161 // We have now received all replies. We are ready to continue
8162 // with saving the global checkpoint to disk.
8163 //-------------------------------------------------------------
8164 CRASH_INSERTION(7002);
8165 gcpsavereqLab(signal);
8166 return;
8167 }//Dbdih::execGCP_NODEFINISH()
8169 void Dbdih::gcpsavereqLab(Signal* signal)
8171 sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ);
8172 cgcpStatus = GCP_NODE_FINISHED;
8173 }//Dbdih::gcpsavereqLab()
8175 void Dbdih::execGCP_SAVECONF(Signal* signal)
8177 jamEntry();
8178 const GCPSaveConf * const saveConf = (GCPSaveConf*)&signal->theData[0];
8179 ndbrequire(saveConf->gci == coldgcp);
8180 ndbrequire(saveConf->nodeId == saveConf->dihPtr);
8181 SYSFILE->lastCompletedGCI[saveConf->nodeId] = saveConf->gci;
8182 GCP_SAVEhandling(signal, saveConf->nodeId);
8183 }//Dbdih::execGCP_SAVECONF()
8185 void Dbdih::execGCP_SAVEREF(Signal* signal)
8187 jamEntry();
8188 const GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
8189 ndbrequire(saveRef->gci == coldgcp);
8190 ndbrequire(saveRef->nodeId == saveRef->dihPtr);
8192 * Only allow reason not to save
8194 ndbrequire(saveRef->errorCode == GCPSaveRef::NodeShutdownInProgress ||
8195 saveRef->errorCode == GCPSaveRef::FakedSignalDueToNodeFailure ||
8196 saveRef->errorCode == GCPSaveRef::NodeRestartInProgress);
8197 GCP_SAVEhandling(signal, saveRef->nodeId);
8198 }//Dbdih::execGCP_SAVEREF()
8200 void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId)
8202 receiveLoopMacro(GCP_SAVEREQ, nodeId);
8203 /*-------------------------------------------------------------------------*/
8204 // All nodes have replied. We are ready to update the system file.
8205 /*-------------------------------------------------------------------------*/
8206 cgcpStatus = GCP_SAVE_LQH_FINISHED;
8207 CRASH_INSERTION(7003);
8208 checkToCopy();
8209 /**------------------------------------------------------------------------
8210 * SET NEW RECOVERABLE GCI. ALSO RESET RESTART COUNTER TO ZERO.
8211 * THIS INDICATES THAT THE SYSTEM HAS BEEN RECOVERED AND SURVIVED AT
8212 * LEAST ONE GLOBAL CHECKPOINT PERIOD. WE WILL USE THIS PARAMETER TO
8213 * SET BACK THE RESTART GCI IF WE ENCOUNTER MORE THAN ONE UNSUCCESSFUL
8214 * RESTART.
8215 *------------------------------------------------------------------------*/
8216 SYSFILE->newestRestorableGCI = coldgcp;
8217 if(Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) &&
8218 getNodeState().startLevel == NodeState::SL_STARTED){
8219 jam();
8220 #if 0
8221 g_eventLogger.info("Dbdih: Clearing initial start ongoing");
8222 #endif
8223 Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits);
8225 copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
8226 }//Dbdih::GCP_SAVEhandling()
8229 3.11 G L O B A L C H E C K P O I N T (N O T - M A S T E R)
8230 *************************************************************
8232 void Dbdih::execGCP_PREPARE(Signal* signal)
8234 jamEntry();
8235 CRASH_INSERTION(7005);
8237 if (ERROR_INSERTED(7030))
8239 cgckptflag = true;
8240 g_eventLogger.info("Delayed GCP_PREPARE 5s");
8241 sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
8242 signal->getLength());
8243 return;
8246 Uint32 masterNodeId = signal->theData[0];
8247 Uint32 gci = signal->theData[1];
8248 BlockReference retRef = calcDihBlockRef(masterNodeId);
8250 ndbrequire (cmasterdihref == retRef);
8251 ndbrequire (cgcpParticipantState == GCP_PARTICIPANT_READY);
8252 ndbrequire (gci == (currentgcp + 1));
8254 cgckptflag = true;
8255 cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED;
8256 cnewgcp = gci;
8258 if (ERROR_INSERTED(7031))
8260 g_eventLogger.info("Crashing delayed in GCP_PREPARE 3s");
8261 signal->theData[0] = 9999;
8262 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
8263 return;
8266 signal->theData[0] = cownNodeId;
8267 signal->theData[1] = gci;
8268 sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA);
8269 return;
8270 }//Dbdih::execGCP_PREPARE()
8272 void Dbdih::execGCP_COMMIT(Signal* signal)
8274 jamEntry();
8275 CRASH_INSERTION(7006);
8276 Uint32 masterNodeId = signal->theData[0];
8277 Uint32 gci = signal->theData[1];
8279 ndbrequire(gci == (currentgcp + 1));
8280 ndbrequire(masterNodeId = cmasterNodeId);
8281 ndbrequire(cgcpParticipantState == GCP_PARTICIPANT_PREPARE_RECEIVED);
8283 coldgcp = currentgcp;
8284 currentgcp = cnewgcp;
8285 cgckptflag = false;
8286 emptyverificbuffer(signal, true);
8287 cgcpParticipantState = GCP_PARTICIPANT_COMMIT_RECEIVED;
8288 signal->theData[0] = calcDihBlockRef(masterNodeId);
8289 signal->theData[1] = coldgcp;
8290 sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal, 2, JBB);
8291 return;
8292 }//Dbdih::execGCP_COMMIT()
8294 void Dbdih::execGCP_TCFINISHED(Signal* signal)
8296 jamEntry();
8297 CRASH_INSERTION(7007);
8298 Uint32 retRef = signal->theData[0];
8299 Uint32 gci = signal->theData[1];
8300 ndbrequire(gci == coldgcp);
8302 if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182))
8304 c_error_7181_ref = retRef; // Save ref
8305 ndbout_c("killing %d", refToNode(cmasterdihref));
8306 signal->theData[0] = 9999;
8307 sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
8308 GSN_NDB_TAMPER, signal, 1, JBB);
8309 return;
8312 cgcpParticipantState = GCP_PARTICIPANT_TC_FINISHED;
8313 signal->theData[0] = cownNodeId;
8314 signal->theData[1] = coldgcp;
8315 signal->theData[2] = cfailurenr;
8316 sendSignal(retRef, GSN_GCP_NODEFINISH, signal, 3, JBB);
8317 }//Dbdih::execGCP_TCFINISHED()
8319 /*****************************************************************************/
8320 //****** RECEIVING TAMPER REQUEST FROM NDBAPI ******
8321 /*****************************************************************************/
8322 void Dbdih::execDIHNDBTAMPER(Signal* signal)
8324 jamEntry();
8325 Uint32 tcgcpblocked = signal->theData[0];
8326 /* ACTION TO BE TAKEN BY DIH */
8327 Uint32 tuserpointer = signal->theData[1];
8328 BlockReference tuserblockref = signal->theData[2];
8329 switch (tcgcpblocked) {
8330 case 1:
8331 jam();
8332 if (isMaster()) {
8333 jam();
8334 cgcpOrderBlocked = 1;
8335 } else {
8336 jam();
8337 /* TRANSFER THE REQUEST */
8338 /* TO MASTER*/
8339 signal->theData[0] = tcgcpblocked;
8340 signal->theData[1] = tuserpointer;
8341 signal->theData[2] = tuserblockref;
8342 sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
8343 }//if
8344 break;
8345 case 2:
8346 jam();
8347 if (isMaster()) {
8348 jam();
8349 cgcpOrderBlocked = 0;
8350 } else {
8351 jam();
8352 /* TRANSFER THE REQUEST */
8353 /* TO MASTER*/
8354 signal->theData[0] = tcgcpblocked;
8355 signal->theData[1] = tuserpointer;
8356 signal->theData[2] = tuserblockref;
8357 sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
8358 }//if
8359 break;
8360 case 3:
8361 ndbrequire(false);
8362 return;
8363 break;
8364 case 4:
8365 jam();
8366 signal->theData[0] = tuserpointer;
8367 signal->theData[1] = crestartGci;
8368 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 2, JBB);
8369 break;
8370 #ifdef ERROR_INSERT
8371 case 5:
8372 jam();
8373 if(tuserpointer == 0)
8375 jam();
8376 signal->theData[0] = 0;
8377 sendSignal(QMGR_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8378 sendSignal(NDBCNTR_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8379 sendSignal(NDBFS_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8380 sendSignal(DBACC_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8381 sendSignal(DBTUP_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8382 sendSignal(DBLQH_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8383 sendSignal(DBDICT_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8384 sendSignal(DBDIH_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8385 sendSignal(DBTC_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8386 sendSignal(CMVMI_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8387 return;
8389 /*----------------------------------------------------------------------*/
8390 // Insert errors.
8391 /*----------------------------------------------------------------------*/
8392 if (tuserpointer < 1000) {
8393 /*--------------------------------------------------------------------*/
8394 // Insert errors into QMGR.
8395 /*--------------------------------------------------------------------*/
8396 jam();
8397 tuserblockref = QMGR_REF;
8398 } else if (tuserpointer < 2000) {
8399 /*--------------------------------------------------------------------*/
8400 // Insert errors into NDBCNTR.
8401 /*--------------------------------------------------------------------*/
8402 jam();
8403 tuserblockref = NDBCNTR_REF;
8404 } else if (tuserpointer < 3000) {
8405 /*--------------------------------------------------------------------*/
8406 // Insert errors into NDBFS.
8407 /*--------------------------------------------------------------------*/
8408 jam();
8409 tuserblockref = NDBFS_REF;
8410 } else if (tuserpointer < 4000) {
8411 /*--------------------------------------------------------------------*/
8412 // Insert errors into DBACC.
8413 /*--------------------------------------------------------------------*/
8414 jam();
8415 tuserblockref = DBACC_REF;
8416 } else if (tuserpointer < 5000) {
8417 /*--------------------------------------------------------------------*/
8418 // Insert errors into DBTUP.
8419 /*--------------------------------------------------------------------*/
8420 jam();
8421 tuserblockref = DBTUP_REF;
8422 } else if (tuserpointer < 6000) {
8423 /*---------------------------------------------------------------------*/
8424 // Insert errors into DBLQH.
8425 /*---------------------------------------------------------------------*/
8426 jam();
8427 tuserblockref = DBLQH_REF;
8428 } else if (tuserpointer < 7000) {
8429 /*---------------------------------------------------------------------*/
8430 // Insert errors into DBDICT.
8431 /*---------------------------------------------------------------------*/
8432 jam();
8433 tuserblockref = DBDICT_REF;
8434 } else if (tuserpointer < 8000) {
8435 /*---------------------------------------------------------------------*/
8436 // Insert errors into DBDIH.
8437 /*--------------------------------------------------------------------*/
8438 jam();
8439 tuserblockref = DBDIH_REF;
8440 } else if (tuserpointer < 9000) {
8441 /*--------------------------------------------------------------------*/
8442 // Insert errors into DBTC.
8443 /*--------------------------------------------------------------------*/
8444 jam();
8445 tuserblockref = DBTC_REF;
8446 } else if (tuserpointer < 10000) {
8447 /*--------------------------------------------------------------------*/
8448 // Insert errors into CMVMI.
8449 /*--------------------------------------------------------------------*/
8450 jam();
8451 tuserblockref = CMVMI_REF;
8452 } else if (tuserpointer < 11000) {
8453 jam();
8454 tuserblockref = BACKUP_REF;
8455 } else if (tuserpointer < 12000) {
8456 // DBUTIL_REF ?
8457 jam();
8458 } else if (tuserpointer < 13000) {
8459 jam();
8460 tuserblockref = DBTUX_REF;
8461 } else if (tuserpointer < 14000) {
8462 jam();
8463 tuserblockref = SUMA_REF;
8464 } else if (tuserpointer < 15000) {
8465 jam();
8466 tuserblockref = DBDICT_REF;
8467 } else if (tuserpointer < 16000) {
8468 jam();
8469 tuserblockref = LGMAN_REF;
8470 } else if (tuserpointer < 17000) {
8471 jam();
8472 tuserblockref = TSMAN_REF;
8473 } else if (tuserpointer < 30000) {
8474 /*--------------------------------------------------------------------*/
8475 // Ignore errors in the 20000-range.
8476 /*--------------------------------------------------------------------*/
8477 jam();
8478 return;
8479 } else if (tuserpointer < 40000) {
8480 jam();
8481 /*--------------------------------------------------------------------*/
8482 // Redirect errors to master DIH in the 30000-range.
8483 /*--------------------------------------------------------------------*/
8484 tuserblockref = cmasterdihref;
8485 tuserpointer -= 30000;
8486 signal->theData[0] = 5;
8487 signal->theData[1] = tuserpointer;
8488 signal->theData[2] = tuserblockref;
8489 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
8490 return;
8491 } else if (tuserpointer < 50000) {
8492 NodeRecordPtr localNodeptr;
8493 Uint32 Tfound = 0;
8494 jam();
8495 /*--------------------------------------------------------------------*/
8496 // Redirect errors to non-master DIH in the 40000-range.
8497 /*--------------------------------------------------------------------*/
8498 tuserpointer -= 40000;
8499 for (localNodeptr.i = 1;
8500 localNodeptr.i < MAX_NDB_NODES;
8501 localNodeptr.i++) {
8502 jam();
8503 ptrAss(localNodeptr, nodeRecord);
8504 if ((localNodeptr.p->nodeStatus == NodeRecord::ALIVE) &&
8505 (localNodeptr.i != cmasterNodeId)) {
8506 jam();
8507 tuserblockref = calcDihBlockRef(localNodeptr.i);
8508 Tfound = 1;
8509 break;
8510 }//if
8511 }//for
8512 if (Tfound == 0) {
8513 jam();
8514 /*-------------------------------------------------------------------*/
8515 // Ignore since no non-master node existed.
8516 /*-------------------------------------------------------------------*/
8517 return;
8518 }//if
8519 signal->theData[0] = 5;
8520 signal->theData[1] = tuserpointer;
8521 signal->theData[2] = tuserblockref;
8522 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
8523 return;
8524 } else {
8525 jam();
8526 return;
8527 }//if
8528 signal->theData[0] = tuserpointer;
8529 if (tuserpointer != 0) {
8530 sendSignal(tuserblockref, GSN_NDB_TAMPER, signal, 1, JBB);
8531 } else {
8532 sendSignal(QMGR_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8533 sendSignal(NDBCNTR_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8534 sendSignal(NDBFS_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8535 sendSignal(DBACC_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8536 sendSignal(DBTUP_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8537 sendSignal(DBLQH_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8538 sendSignal(DBDICT_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8539 sendSignal(DBDIH_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8540 sendSignal(DBTC_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8541 sendSignal(CMVMI_REF, GSN_NDB_TAMPER, signal, 1, JBB);
8542 }//if
8543 break;
8544 #endif
8545 default:
8546 ndbrequire(false);
8547 break;
8548 }//switch
8549 return;
8550 }//Dbdih::execDIHNDBTAMPER()
8552 /*****************************************************************************/
8553 /* ********** FILE HANDLING MODULE *************/
8554 /*****************************************************************************/
8555 void Dbdih::copyGciLab(Signal* signal, CopyGCIReq::CopyReason reason)
8557 if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE){
8559 * There can currently only be one waiting
8561 ndbrequire(c_copyGCIMaster.m_waiting == CopyGCIReq::IDLE);
8562 c_copyGCIMaster.m_waiting = reason;
8563 return;
8565 c_copyGCIMaster.m_copyReason = reason;
8566 sendLoopMacro(COPY_GCIREQ, sendCOPY_GCIREQ);
8568 }//Dbdih::copyGciLab()
8570 /* ------------------------------------------------------------------------- */
8571 /* COPY_GCICONF RESPONSE TO COPY_GCIREQ */
8572 /* ------------------------------------------------------------------------- */
8573 void Dbdih::execCOPY_GCICONF(Signal* signal)
8575 jamEntry();
8576 NodeRecordPtr senderNodePtr;
8577 senderNodePtr.i = signal->theData[0];
8578 receiveLoopMacro(COPY_GCIREQ, senderNodePtr.i);
8580 CopyGCIReq::CopyReason waiting = c_copyGCIMaster.m_waiting;
8581 CopyGCIReq::CopyReason current = c_copyGCIMaster.m_copyReason;
8583 c_copyGCIMaster.m_copyReason = CopyGCIReq::IDLE;
8584 c_copyGCIMaster.m_waiting = CopyGCIReq::IDLE;
8586 bool ok = false;
8587 switch(current){
8588 case CopyGCIReq::RESTART:{
8589 ok = true;
8590 jam();
8591 DictStartReq * req = (DictStartReq*)&signal->theData[0];
8592 req->restartGci = SYSFILE->newestRestorableGCI;
8593 req->senderRef = reference();
8594 sendSignal(cdictblockref, GSN_DICTSTARTREQ,
8595 signal, DictStartReq::SignalLength, JBB);
8596 break;
8598 case CopyGCIReq::LOCAL_CHECKPOINT:{
8599 ok = true;
8600 jam();
8601 startLcpRoundLab(signal);
8602 break;
8604 case CopyGCIReq::GLOBAL_CHECKPOINT:
8605 ok = true;
8606 jam();
8607 checkToCopyCompleted(signal);
8609 /************************************************************************/
8610 // Report the event that a global checkpoint has completed.
8611 /************************************************************************/
8612 signal->setTrace(0);
8613 signal->theData[0] = NDB_LE_GlobalCheckpointCompleted; //Event type
8614 signal->theData[1] = coldgcp;
8615 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
8617 c_newest_restorable_gci = coldgcp;
8619 CRASH_INSERTION(7004);
8620 emptyWaitGCPMasterQueue(signal);
8621 cgcpStatus = GCP_READY;
8622 signal->theData[0] = DihContinueB::ZSTART_GCP;
8623 signal->theData[1] = cgcpDelay;
8624 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
8625 if (c_nodeStartMaster.blockGcp == true) {
8626 jam();
8627 /* ------------------------------------------------------------------ */
8628 /* A NEW NODE WANTS IN AND WE MUST ALLOW IT TO COME IN NOW SINCE THE */
8629 /* GCP IS COMPLETED. */
8630 /* ------------------------------------------------------------------ */
8631 gcpBlockedLab(signal);
8632 }//if
8633 break;
8634 case CopyGCIReq::INITIAL_START_COMPLETED:
8635 ok = true;
8636 jam();
8637 initialStartCompletedLab(signal);
8638 break;
8639 case CopyGCIReq::IDLE:
8640 ok = false;
8641 jam();
8643 ndbrequire(ok);
8646 * Pop queue
8648 if(waiting != CopyGCIReq::IDLE){
8649 c_copyGCIMaster.m_copyReason = waiting;
8650 signal->theData[0] = DihContinueB::ZCOPY_GCI;
8651 signal->theData[1] = waiting;
8652 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
8654 }//Dbdih::execCOPY_GCICONF()
8656 void Dbdih::invalidateLcpInfoAfterSr()
8658 NodeRecordPtr nodePtr;
8659 SYSFILE->latestLCP_ID--;
8660 Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
8661 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
8662 jam();
8663 ptrAss(nodePtr, nodeRecord);
8664 if (!NdbNodeBitmask::get(SYSFILE->lcpActive, nodePtr.i)){
8665 jam();
8666 /* ------------------------------------------------------------------- */
8667 // The node was not active in the local checkpoint.
8668 // To avoid that we step the active status too fast to not
8669 // active we step back one step from Sysfile::NS_ActiveMissed_x.
8670 /* ------------------------------------------------------------------- */
8671 switch (nodePtr.p->activeStatus) {
8672 case Sysfile::NS_Active:
8673 /* ----------------------------------------------------------------- */
8674 // When not active in ongoing LCP and still active is a contradiction.
8675 /* ----------------------------------------------------------------- */
8676 ndbrequire(false);
8677 case Sysfile::NS_ActiveMissed_1:
8678 jam();
8679 nodePtr.p->activeStatus = Sysfile::NS_Active;
8680 break;
8681 case Sysfile::NS_ActiveMissed_2:
8682 jam();
8683 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
8684 break;
8685 default:
8686 jam();
8687 break;
8688 }//switch
8689 }//if
8690 }//for
8691 setNodeRestartInfoBits();
8692 }//Dbdih::invalidateLcpInfoAfterSr()
8694 /* ------------------------------------------------------------------------- */
8695 /* THE NEXT STEP IS TO WRITE THE FILE. */
8696 /* ------------------------------------------------------------------------- */
8697 void Dbdih::openingCopyGciSkipInitLab(Signal* signal, FileRecordPtr filePtr)
8699 writeRestorableGci(signal, filePtr);
8700 filePtr.p->reqStatus = FileRecord::WRITING_COPY_GCI;
8701 return;
8702 }//Dbdih::openingCopyGciSkipInitLab()
8704 void Dbdih::writingCopyGciLab(Signal* signal, FileRecordPtr filePtr)
8706 /* ----------------------------------------------------------------------- */
8707 /* WE HAVE NOW WRITTEN THIS FILE. WRITE ALSO NEXT FILE IF THIS IS NOT */
8708 /* ALREADY THE LAST. */
8709 /* ----------------------------------------------------------------------- */
8710 filePtr.p->reqStatus = FileRecord::IDLE;
8711 if (filePtr.i == crestartInfoFile[0]) {
8712 jam();
8713 filePtr.i = crestartInfoFile[1];
8714 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8715 if (filePtr.p->fileStatus == FileRecord::OPEN) {
8716 jam();
8717 openingCopyGciSkipInitLab(signal, filePtr);
8718 return;
8719 }//if
8720 openFileRw(signal, filePtr);
8721 filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
8722 return;
8723 }//if
8724 /* ----------------------------------------------------------------------- */
8725 /* WE HAVE COMPLETED WRITING BOTH FILES SUCCESSFULLY. NOW REPORT OUR */
8726 /* SUCCESS TO THE MASTER DIH. BUT FIRST WE NEED TO RESET A NUMBER OF */
8727 /* VARIABLES USED BY THE LOCAL CHECKPOINT PROCESS (ONLY IF TRIGGERED */
8728 /* BY LOCAL CHECKPOINT PROCESS. */
8729 /* ----------------------------------------------------------------------- */
8730 CopyGCIReq::CopyReason reason = c_copyGCISlave.m_copyReason;
8732 if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) {
8733 jam();
8734 cgcpParticipantState = GCP_PARTICIPANT_READY;
8736 SubGcpCompleteRep * const rep = (SubGcpCompleteRep*)signal->getDataPtr();
8737 rep->gci = coldgcp;
8738 sendSignal(SUMA_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
8739 SubGcpCompleteRep::SignalLength, JBB);
8741 EXECUTE_DIRECT(LGMAN, GSN_SUB_GCP_COMPLETE_REP, signal,
8742 SubGcpCompleteRep::SignalLength);
8743 jamEntry();
8746 jam();
8747 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
8749 if(c_copyGCISlave.m_senderRef == cmasterdihref){
8750 jam();
8752 * Only if same master
8754 signal->theData[0] = c_copyGCISlave.m_senderData;
8755 sendSignal(c_copyGCISlave.m_senderRef, GSN_COPY_GCICONF, signal, 1, JBB);
8758 return;
8759 }//Dbdih::writingCopyGciLab()
8761 void Dbdih::execSTART_LCP_REQ(Signal* signal){
8762 StartLcpReq * req = (StartLcpReq*)signal->getDataPtr();
8764 CRASH_INSERTION2(7021, isMaster());
8765 CRASH_INSERTION2(7022, !isMaster());
8767 ndbrequire(c_lcpState.m_masterLcpDihRef = req->senderRef);
8768 c_lcpState.m_participatingDIH = req->participatingDIH;
8769 c_lcpState.m_participatingLQH = req->participatingLQH;
8771 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH = req->participatingLQH;
8772 if(isMaster()){
8773 jam();
8774 ndbrequire(isActiveMaster());
8775 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH = req->participatingDIH;
8777 } else {
8778 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor();
8781 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = false;
8783 c_lcpState.setLcpStatus(LCP_INIT_TABLES, __LINE__);
8785 signal->theData[0] = DihContinueB::ZINIT_LCP;
8786 signal->theData[1] = c_lcpState.m_masterLcpDihRef;
8787 signal->theData[2] = 0;
8788 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
8791 void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
8793 TabRecordPtr tabPtr;
8794 tabPtr.i = tableId;
8796 if(c_lcpState.m_masterLcpDihRef != senderRef){
8797 jam();
8799 * This is LCP master takeover
8801 #ifdef VM_TRACE
8802 g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 1");
8803 #endif
8804 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
8805 sendMASTER_LCPCONF(signal);
8806 return;
8809 if(c_lcpState.m_masterLcpDihRef != cmasterdihref){
8810 jam();
8812 * Master take over but has not yet received MASTER_LCPREQ
8814 #ifdef VM_TRACE
8815 g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 2");
8816 #endif
8817 return;
8820 //const Uint32 lcpId = SYSFILE->latestLCP_ID;
8822 for(; tabPtr.i < ctabFileSize; tabPtr.i++){
8824 ptrAss(tabPtr, tabRecord);
8826 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
8827 jam();
8828 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
8829 continue;
8832 if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
8834 * Table is not logged
8836 jam();
8837 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
8838 continue;
8841 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
8842 /* ----------------------------------------------------------------- */
8843 // We protect the updates of table data structures by this variable.
8844 /* ----------------------------------------------------------------- */
8845 jam();
8846 signal->theData[0] = DihContinueB::ZINIT_LCP;
8847 signal->theData[1] = senderRef;
8848 signal->theData[2] = tabPtr.i;
8849 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
8850 return;
8851 }//if
8854 * Found a table
8856 tabPtr.p->tabLcpStatus = TabRecord::TLS_ACTIVE;
8859 * For each fragment
8861 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
8862 jam();
8863 FragmentstorePtr fragPtr;
8864 getFragstore(tabPtr.p, fragId, fragPtr);
8867 * For each of replica record
8869 Uint32 replicaCount = 0;
8870 ReplicaRecordPtr replicaPtr;
8871 for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
8872 replicaPtr.i = replicaPtr.p->nextReplica) {
8873 jam();
8875 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
8876 Uint32 nodeId = replicaPtr.p->procNode;
8877 if(c_lcpState.m_participatingLQH.get(nodeId)){
8878 jam();
8879 replicaCount++;
8880 replicaPtr.p->lcpOngoingFlag = true;
8884 fragPtr.p->noLcpReplicas = replicaCount;
8885 }//for
8887 signal->theData[0] = DihContinueB::ZINIT_LCP;
8888 signal->theData[1] = senderRef;
8889 signal->theData[2] = tabPtr.i + 1;
8890 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
8891 return;
8895 * No more tables
8897 jam();
8899 if (c_lcpState.m_masterLcpDihRef != reference()){
8900 jam();
8901 ndbrequire(!isMaster());
8902 c_lcpState.setLcpStatus(LCP_STATUS_ACTIVE, __LINE__);
8903 } else {
8904 jam();
8905 ndbrequire(isMaster());
8908 CRASH_INSERTION2(7023, isMaster());
8909 CRASH_INSERTION2(7024, !isMaster());
8911 jam();
8912 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
8913 conf->senderRef = reference();
8914 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_START_LCP_CONF, signal,
8915 StartLcpConf::SignalLength, JBB);
8916 return;
8917 }//Dbdih::initLcpLab()
8919 /* ------------------------------------------------------------------------- */
8920 /* ERROR HANDLING FOR COPY RESTORABLE GCI FILE. */
8921 /* ------------------------------------------------------------------------- */
8922 void Dbdih::openingCopyGciErrorLab(Signal* signal, FileRecordPtr filePtr)
8924 createFileRw(signal, filePtr);
8925 /* ------------------------------------------------------------------------- */
8926 /* ERROR IN OPENING FILE. WE WILL TRY BY CREATING FILE INSTEAD. */
8927 /* ------------------------------------------------------------------------- */
8928 filePtr.p->reqStatus = FileRecord::CREATING_COPY_GCI;
8929 return;
8930 }//Dbdih::openingCopyGciErrorLab()
8932 /* ------------------------------------------------------------------------- */
8933 /* ENTER DICTSTARTCONF WITH */
8934 /* TBLOCKREF */
8935 /* ------------------------------------------------------------------------- */
8936 void Dbdih::dictStartConfLab(Signal* signal)
8938 /* ----------------------------------------------------------------------- */
8939 /* WE HAVE NOW RECEIVED ALL THE TABLES TO RESTART. */
8940 /* ----------------------------------------------------------------------- */
8941 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
8942 signal->theData[1] = 0; /* START WITH TABLE 0 */
8943 signal->theData[2] = 0; /* AND FRAGMENT 0 */
8944 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
8945 return;
8946 }//Dbdih::dictStartConfLab()
8949 void Dbdih::openingTableLab(Signal* signal, FileRecordPtr filePtr)
8951 /* ---------------------------------------------------------------------- */
8952 /* SUCCESSFULLY OPENED A FILE. READ THE FIRST PAGE OF THIS FILE. */
8953 /* ---------------------------------------------------------------------- */
8954 TabRecordPtr tabPtr;
8955 PageRecordPtr pagePtr;
8957 tabPtr.i = filePtr.p->tabRef;
8958 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8959 tabPtr.p->noPages = 1;
8960 allocpage(pagePtr);
8961 tabPtr.p->pageRef[0] = pagePtr.i;
8962 readTabfile(signal, tabPtr.p, filePtr);
8963 filePtr.p->reqStatus = FileRecord::READING_TABLE;
8964 return;
8965 }//Dbdih::openingTableLab()
8967 void Dbdih::openingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
8969 TabRecordPtr tabPtr;
8970 tabPtr.i = filePtr.p->tabRef;
8971 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8972 /* ---------------------------------------------------------------------- */
8973 /* WE FAILED IN OPENING A FILE. IF THE FIRST FILE THEN TRY WITH THE */
8974 /* DUPLICATE FILE, OTHERWISE WE REPORT AN ERROR IN THE SYSTEM RESTART. */
8975 /* ---------------------------------------------------------------------- */
8976 if (filePtr.i == tabPtr.p->tabFile[0])
8978 filePtr.i = tabPtr.p->tabFile[1];
8979 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8980 openFileRw(signal, filePtr);
8981 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
8983 else
8985 char buf[256];
8986 BaseString::snprintf(buf, sizeof(buf),
8987 "Error opening DIH schema files for table: %d",
8988 tabPtr.i);
8989 progError(__LINE__, NDBD_EXIT_AFS_NO_SUCH_FILE, buf);
8991 }//Dbdih::openingTableErrorLab()
8993 void Dbdih::readingTableLab(Signal* signal, FileRecordPtr filePtr)
8995 TabRecordPtr tabPtr;
8996 PageRecordPtr pagePtr;
8997 /* ---------------------------------------------------------------------- */
8998 /* WE HAVE SUCCESSFULLY READ A NUMBER OF PAGES IN THE TABLE FILE. IF */
8999 /* MORE PAGES EXIST IN THE FILE THEN READ ALL PAGES IN THE FILE. */
9000 /* ---------------------------------------------------------------------- */
9001 filePtr.p->reqStatus = FileRecord::IDLE;
9002 tabPtr.i = filePtr.p->tabRef;
9003 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9004 pagePtr.i = tabPtr.p->pageRef[0];
9005 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
9006 Uint32 noOfStoredPages = pagePtr.p->word[33];
9007 if (tabPtr.p->noPages < noOfStoredPages) {
9008 jam();
9009 ndbrequire(noOfStoredPages <= 8);
9010 for (Uint32 i = tabPtr.p->noPages; i < noOfStoredPages; i++) {
9011 jam();
9012 allocpage(pagePtr);
9013 tabPtr.p->pageRef[i] = pagePtr.i;
9014 }//for
9015 tabPtr.p->noPages = noOfStoredPages;
9016 readTabfile(signal, tabPtr.p, filePtr);
9017 filePtr.p->reqStatus = FileRecord::READING_TABLE;
9018 } else {
9019 ndbrequire(tabPtr.p->noPages == pagePtr.p->word[33]);
9020 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
9021 jam();
9022 /* --------------------------------------------------------------------- */
9023 /* WE HAVE READ ALL PAGES. NOW READ FROM PAGES INTO TABLE AND FRAGMENT */
9024 /* DATA STRUCTURES. */
9025 /* --------------------------------------------------------------------- */
9026 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE1_READ_PAGES;
9027 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
9028 signal->theData[1] = tabPtr.i;
9029 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9030 return;
9031 }//if
9032 return;
9033 }//Dbdih::readingTableLab()
9035 void Dbdih::readTableFromPagesLab(Signal* signal, TabRecordPtr tabPtr)
9037 FileRecordPtr filePtr;
9038 filePtr.i = tabPtr.p->tabFile[0];
9039 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
9040 /* ---------------------------------------------------------------------- */
9041 /* WE HAVE NOW COPIED TO OUR NODE. WE HAVE NOW COMPLETED RESTORING */
9042 /* THIS TABLE. CONTINUE WITH THE NEXT TABLE. */
9043 /* WE ALSO NEED TO CLOSE THE TABLE FILE. */
9044 /* ---------------------------------------------------------------------- */
9045 if (filePtr.p->fileStatus != FileRecord::OPEN) {
9046 jam();
9047 filePtr.i = tabPtr.p->tabFile[1];
9048 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
9049 }//if
9050 closeFile(signal, filePtr);
9051 filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_SR;
9052 return;
9053 }//Dbdih::readTableFromPagesLab()
9055 void Dbdih::closingTableSrLab(Signal* signal, FileRecordPtr filePtr)
9058 * Update table/fragment info
9060 TabRecordPtr tabPtr;
9061 tabPtr.i = filePtr.p->tabRef;
9062 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9063 resetReplicaSr(tabPtr);
9065 signal->theData[0] = DihContinueB::ZCOPY_TABLE;
9066 signal->theData[1] = filePtr.p->tabRef;
9067 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9069 return;
9070 }//Dbdih::closingTableSrLab()
9072 void
9073 Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
9075 const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
9077 for(Uint32 i = 0; i<tabPtr.p->totalfragments; i++){
9078 FragmentstorePtr fragPtr;
9079 getFragstore(tabPtr.p, i, fragPtr);
9082 * 1) Start by moving all replicas into oldStoredReplicas
9084 prepareReplicas(fragPtr);
9087 * 2) Move all "alive" replicas into storedReplicas
9088 * + update noCrashedReplicas...
9090 ReplicaRecordPtr replicaPtr;
9091 replicaPtr.i = fragPtr.p->oldStoredReplicas;
9092 while (replicaPtr.i != RNIL) {
9093 jam();
9094 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
9095 const Uint32 nextReplicaPtrI = replicaPtr.p->nextReplica;
9097 NodeRecordPtr nodePtr;
9098 nodePtr.i = replicaPtr.p->procNode;
9099 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9101 const Uint32 noCrashedReplicas = replicaPtr.p->noCrashedReplicas;
9102 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
9103 jam();
9104 switch (nodePtr.p->activeStatus) {
9105 case Sysfile::NS_Active:
9106 case Sysfile::NS_ActiveMissed_1:
9107 case Sysfile::NS_ActiveMissed_2:{
9108 jam();
9109 /* --------------------------------------------------------------- */
9110 /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT. */
9111 /* --------------------------------------------------------------- */
9112 arrGuardErr(noCrashedReplicas, 8, NDBD_EXIT_MAX_CRASHED_REPLICAS);
9113 Uint32 lastGci = replicaPtr.p->replicaLastGci[noCrashedReplicas];
9114 if(lastGci >= newestRestorableGCI){
9115 jam();
9116 /** -------------------------------------------------------------
9117 * THE REPLICA WAS ALIVE AT THE SYSTEM FAILURE. WE WILL SET THE
9118 * LAST REPLICA GCI TO MINUS ONE SINCE IT HASN'T FAILED YET IN THE
9119 * NEW SYSTEM.
9120 *-------------------------------------------------------------- */
9121 replicaPtr.p->replicaLastGci[noCrashedReplicas] = (Uint32)-1;
9122 } else {
9123 jam();
9124 /*--------------------------------------------------------------
9125 * SINCE IT WAS NOT ALIVE AT THE TIME OF THE SYSTEM CRASH THIS IS
9126 * A COMPLETELY NEW REPLICA. WE WILL SET THE CREATE GCI TO BE THE
9127 * NEXT GCI TO BE EXECUTED.
9128 *--------_----------------------------------------------------- */
9129 const Uint32 nextCrashed = noCrashedReplicas + 1;
9130 replicaPtr.p->noCrashedReplicas = nextCrashed;
9131 arrGuardErr(nextCrashed, 8, NDBD_EXIT_MAX_CRASHED_REPLICAS);
9132 replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1;
9133 ndbrequire(newestRestorableGCI + 1 != 0xF1F1F1F1);
9134 replicaPtr.p->replicaLastGci[nextCrashed] = (Uint32)-1;
9135 }//if
9137 resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
9140 * Make sure we can also find REDO for restoring replica...
9143 CreateReplicaRecord createReplica;
9144 ConstPtr<ReplicaRecord> constReplicaPtr;
9145 constReplicaPtr.i = replicaPtr.i;
9146 constReplicaPtr.p = replicaPtr.p;
9147 if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL ||
9148 setup_create_replica(fragPtr,
9149 &createReplica, constReplicaPtr))
9151 jam();
9152 removeOldStoredReplica(fragPtr, replicaPtr);
9153 linkStoredReplica(fragPtr, replicaPtr);
9155 else
9157 jam();
9158 infoEvent("Forcing take-over of node %d due to unsufficient REDO"
9159 " for table %d fragment: %d",
9160 nodePtr.i, tabPtr.i, i);
9162 setNodeActiveStatus(nodePtr.i,
9163 Sysfile::NS_NotActive_NotTakenOver);
9167 default:
9168 jam();
9169 /*empty*/;
9170 break;
9173 replicaPtr.i = nextReplicaPtrI;
9174 }//while
9175 updateNodeInfo(fragPtr);
9179 void
9180 Dbdih::resetReplicaLcp(ReplicaRecord * replicaP, Uint32 stopGci){
9182 Uint32 lcpNo = replicaP->nextLcp;
9183 const Uint32 startLcpNo = lcpNo;
9184 do {
9185 lcpNo = prevLcpNo(lcpNo);
9186 ndbrequire(lcpNo < MAX_LCP_STORED);
9187 if (replicaP->lcpStatus[lcpNo] == ZVALID) {
9188 if (replicaP->maxGciStarted[lcpNo] < stopGci) {
9189 jam();
9190 /* ----------------------------------------------------------------- */
9191 /* WE HAVE FOUND A USEFUL LOCAL CHECKPOINT THAT CAN BE USED FOR */
9192 /* RESTARTING THIS FRAGMENT REPLICA. */
9193 /* ----------------------------------------------------------------- */
9194 return ;
9195 }//if
9196 }//if
9199 * WE COULD NOT USE THIS LOCAL CHECKPOINT. IT WAS TOO
9200 * RECENT OR SIMPLY NOT A VALID CHECKPOINT.
9201 * WE SHOULD THUS REMOVE THIS LOCAL CHECKPOINT SINCE IT WILL NEVER
9202 * AGAIN BE USED. SET LCP_STATUS TO INVALID.
9204 replicaP->nextLcp = lcpNo;
9205 replicaP->lcpId[lcpNo] = 0;
9206 replicaP->lcpStatus[lcpNo] = ZINVALID;
9207 } while (lcpNo != startLcpNo);
9209 replicaP->nextLcp = 0;
9212 void Dbdih::readingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
9214 TabRecordPtr tabPtr;
9215 tabPtr.i = filePtr.p->tabRef;
9216 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9217 /* ---------------------------------------------------------------------- */
9218 /* READING THIS FILE FAILED. CLOSE IT AFTER RELEASING ALL PAGES. */
9219 /* ---------------------------------------------------------------------- */
9220 ndbrequire(tabPtr.p->noPages <= 8);
9221 for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
9222 jam();
9223 releasePage(tabPtr.p->pageRef[i]);
9224 }//for
9225 closeFile(signal, filePtr);
9226 filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_CRASH;
9227 return;
9228 }//Dbdih::readingTableErrorLab()
9230 void Dbdih::closingTableCrashLab(Signal* signal, FileRecordPtr filePtr)
9232 TabRecordPtr tabPtr;
9233 /* ---------------------------------------------------------------------- */
9234 /* WE HAVE NOW CLOSED A FILE WHICH WE HAD A READ ERROR WITH. PROCEED */
9235 /* WITH NEXT FILE IF NOT THE LAST OTHERWISE REPORT ERROR. */
9236 /* ---------------------------------------------------------------------- */
9237 tabPtr.i = filePtr.p->tabRef;
9238 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9239 ndbrequire(filePtr.i == tabPtr.p->tabFile[0]);
9240 filePtr.i = tabPtr.p->tabFile[1];
9241 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
9242 openFileRw(signal, filePtr);
9243 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
9244 }//Dbdih::closingTableCrashLab()
9246 /*****************************************************************************/
9247 /* ********** COPY TABLE MODULE *************/
9248 /*****************************************************************************/
9249 void Dbdih::execCOPY_TABREQ(Signal* signal)
9251 CRASH_INSERTION(7172);
9253 TabRecordPtr tabPtr;
9254 PageRecordPtr pagePtr;
9255 jamEntry();
9256 BlockReference ref = signal->theData[0];
9257 Uint32 reqinfo = signal->theData[1];
9258 tabPtr.i = signal->theData[2];
9259 Uint32 schemaVersion = signal->theData[3];
9260 Uint32 noOfWords = signal->theData[4];
9261 ndbrequire(ref == cmasterdihref);
9262 ndbrequire(!isMaster());
9263 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9264 if (reqinfo == 1) {
9265 jam();
9266 tabPtr.p->schemaVersion = schemaVersion;
9267 initTableFile(tabPtr);
9268 }//if
9269 ndbrequire(tabPtr.p->noPages < 8);
9270 if (tabPtr.p->noOfWords == 0) {
9271 jam();
9272 allocpage(pagePtr);
9273 tabPtr.p->pageRef[tabPtr.p->noPages] = pagePtr.i;
9274 tabPtr.p->noPages++;
9275 } else {
9276 jam();
9277 pagePtr.i = tabPtr.p->pageRef[tabPtr.p->noPages - 1];
9278 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
9279 }//if
9280 ndbrequire(tabPtr.p->noOfWords + 15 < 2048);
9281 ndbrequire(tabPtr.p->noOfWords < 2048);
9282 MEMCOPY_NO_WORDS(&pagePtr.p->word[tabPtr.p->noOfWords], &signal->theData[5], 16);
9283 tabPtr.p->noOfWords += 16;
9284 if (tabPtr.p->noOfWords == 2048) {
9285 jam();
9286 tabPtr.p->noOfWords = 0;
9287 }//if
9288 if (noOfWords > 16) {
9289 jam();
9290 return;
9291 }//if
9292 tabPtr.p->noOfWords = 0;
9293 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
9294 tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_TAB_REQ;
9295 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
9296 signal->theData[1] = tabPtr.i;
9297 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9298 }//Dbdih::execCOPY_TABREQ()
9300 void
9301 Dbdih::copyTabReq_complete(Signal* signal, TabRecordPtr tabPtr){
9302 if (!isMaster()) {
9303 jam();
9304 //----------------------------------------------------------------------------
9305 // In this particular case we do not release table pages if we are master. The
9306 // reason is that the master could still be sending the table info to another
9307 // node.
9308 //----------------------------------------------------------------------------
9309 releaseTabPages(tabPtr.i);
9310 tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
9311 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
9312 jam();
9313 FragmentstorePtr fragPtr;
9314 getFragstore(tabPtr.p, fragId, fragPtr);
9315 updateNodeInfo(fragPtr);
9316 }//for
9317 }//if
9318 signal->theData[0] = cownNodeId;
9319 signal->theData[1] = tabPtr.i;
9320 sendSignal(cmasterdihref, GSN_COPY_TABCONF, signal, 2, JBB);
9323 /*****************************************************************************/
9324 /* ****** READ FROM A NUMBER OF PAGES INTO THE TABLE DATA STRUCTURES ********/
9325 /*****************************************************************************/
9326 void Dbdih::readPagesIntoTableLab(Signal* signal, Uint32 tableId)
9328 RWFragment rf;
9329 rf.wordIndex = 35;
9330 rf.pageIndex = 0;
9331 rf.rwfTabPtr.i = tableId;
9332 ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
9333 rf.rwfPageptr.i = rf.rwfTabPtr.p->pageRef[0];
9334 ptrCheckGuard(rf.rwfPageptr, cpageFileSize, pageRecord);
9335 rf.rwfTabPtr.p->totalfragments = readPageWord(&rf);
9336 rf.rwfTabPtr.p->noOfBackups = readPageWord(&rf);
9337 rf.rwfTabPtr.p->hashpointer = readPageWord(&rf);
9338 rf.rwfTabPtr.p->kvalue = readPageWord(&rf);
9339 rf.rwfTabPtr.p->mask = readPageWord(&rf);
9340 rf.rwfTabPtr.p->method = (TabRecord::Method)readPageWord(&rf);
9341 /* ------------- */
9342 /* Type of table */
9343 /* ------------- */
9344 rf.rwfTabPtr.p->tabStorage = (TabRecord::Storage)(readPageWord(&rf));
9346 Uint32 noOfFrags = rf.rwfTabPtr.p->totalfragments;
9347 ndbrequire(noOfFrags > 0);
9348 ndbrequire((noOfFrags * (rf.rwfTabPtr.p->noOfBackups + 1)) <= cnoFreeReplicaRec);
9349 allocFragments(noOfFrags, rf.rwfTabPtr);
9351 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
9352 signal->theData[1] = rf.rwfTabPtr.i;
9353 signal->theData[2] = 0;
9354 signal->theData[3] = rf.pageIndex;
9355 signal->theData[4] = rf.wordIndex;
9356 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
9357 return;
9358 }//Dbdih::readPagesIntoTableLab()
9360 void Dbdih::readPagesIntoFragLab(Signal* signal, RWFragment* rf)
9362 ndbrequire(rf->pageIndex < 8);
9363 rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
9364 ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
9365 FragmentstorePtr fragPtr;
9366 getFragstore(rf->rwfTabPtr.p, rf->fragId, fragPtr);
9367 readFragment(rf, fragPtr);
9368 readReplicas(rf, fragPtr);
9369 rf->fragId++;
9370 if (rf->fragId == rf->rwfTabPtr.p->totalfragments) {
9371 jam();
9372 switch (rf->rwfTabPtr.p->tabCopyStatus) {
9373 case TabRecord::CS_SR_PHASE1_READ_PAGES:
9374 jam();
9375 releaseTabPages(rf->rwfTabPtr.i);
9376 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9377 signal->theData[0] = DihContinueB::ZREAD_TABLE_FROM_PAGES;
9378 signal->theData[1] = rf->rwfTabPtr.i;
9379 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9380 return;
9381 break;
9382 case TabRecord::CS_COPY_TAB_REQ:
9383 jam();
9384 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9385 if(getNodeState().getSystemRestartInProgress()){
9386 jam();
9387 copyTabReq_complete(signal, rf->rwfTabPtr);
9388 return;
9390 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9391 rf->rwfTabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
9392 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
9393 signal->theData[1] = rf->rwfTabPtr.i;
9394 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9395 return;
9396 break;
9397 default:
9398 ndbrequire(false);
9399 return;
9400 break;
9401 }//switch
9402 } else {
9403 jam();
9404 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
9405 signal->theData[1] = rf->rwfTabPtr.i;
9406 signal->theData[2] = rf->fragId;
9407 signal->theData[3] = rf->pageIndex;
9408 signal->theData[4] = rf->wordIndex;
9409 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
9410 }//if
9411 return;
9412 }//Dbdih::readPagesIntoFragLab()
9414 /*****************************************************************************/
9415 /***** WRITING FROM TABLE DATA STRUCTURES INTO A SET OF PAGES ******/
9416 // execCONTINUEB(ZPACK_TABLE_INTO_PAGES)
9417 /*****************************************************************************/
9418 void Dbdih::packTableIntoPagesLab(Signal* signal, Uint32 tableId)
9420 RWFragment wf;
9421 TabRecordPtr tabPtr;
9422 allocpage(wf.rwfPageptr);
9423 tabPtr.i = tableId;
9424 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9425 tabPtr.p->pageRef[0] = wf.rwfPageptr.i;
9426 tabPtr.p->noPages = 1;
9427 wf.wordIndex = 35;
9428 wf.pageIndex = 0;
9429 writePageWord(&wf, tabPtr.p->totalfragments);
9430 writePageWord(&wf, tabPtr.p->noOfBackups);
9431 writePageWord(&wf, tabPtr.p->hashpointer);
9432 writePageWord(&wf, tabPtr.p->kvalue);
9433 writePageWord(&wf, tabPtr.p->mask);
9434 writePageWord(&wf, tabPtr.p->method);
9435 writePageWord(&wf, tabPtr.p->tabStorage);
9437 signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
9438 signal->theData[1] = tabPtr.i;
9439 signal->theData[2] = 0;
9440 signal->theData[3] = wf.pageIndex;
9441 signal->theData[4] = wf.wordIndex;
9442 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
9443 }//Dbdih::packTableIntoPagesLab()
9445 /*****************************************************************************/
9446 // execCONTINUEB(ZPACK_FRAG_INTO_PAGES)
9447 /*****************************************************************************/
9448 void Dbdih::packFragIntoPagesLab(Signal* signal, RWFragment* wf)
9450 ndbrequire(wf->pageIndex < 8);
9451 wf->rwfPageptr.i = wf->rwfTabPtr.p->pageRef[wf->pageIndex];
9452 ptrCheckGuard(wf->rwfPageptr, cpageFileSize, pageRecord);
9453 FragmentstorePtr fragPtr;
9454 getFragstore(wf->rwfTabPtr.p, wf->fragId, fragPtr);
9455 writeFragment(wf, fragPtr);
9456 writeReplicas(wf, fragPtr.p->storedReplicas);
9457 writeReplicas(wf, fragPtr.p->oldStoredReplicas);
9458 wf->fragId++;
9459 if (wf->fragId == wf->rwfTabPtr.p->totalfragments) {
9460 jam();
9461 PageRecordPtr pagePtr;
9462 pagePtr.i = wf->rwfTabPtr.p->pageRef[0];
9463 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
9464 pagePtr.p->word[33] = wf->rwfTabPtr.p->noPages;
9465 pagePtr.p->word[34] = ((wf->rwfTabPtr.p->noPages - 1) * 2048) + wf->wordIndex;
9466 switch (wf->rwfTabPtr.p->tabCopyStatus) {
9467 case TabRecord::CS_SR_PHASE2_READ_TABLE:
9468 /* -------------------------------------------------------------------*/
9469 // We are performing a system restart and we are now ready to copy the
9470 // table from this node (the master) to all other nodes.
9471 /* -------------------------------------------------------------------*/
9472 jam();
9473 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9474 signal->theData[0] = DihContinueB::ZSR_PHASE2_READ_TABLE;
9475 signal->theData[1] = wf->rwfTabPtr.i;
9476 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9477 return;
9478 break;
9479 case TabRecord::CS_COPY_NODE_STATE:
9480 jam();
9481 tableCopyNodeLab(signal, wf->rwfTabPtr);
9482 return;
9483 break;
9484 case TabRecord::CS_LCP_READ_TABLE:
9485 jam();
9486 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
9487 signal->theData[1] = wf->rwfTabPtr.i;
9488 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9489 return;
9490 break;
9491 case TabRecord::CS_REMOVE_NODE:
9492 case TabRecord::CS_INVALIDATE_NODE_LCP:
9493 jam();
9494 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
9495 signal->theData[1] = wf->rwfTabPtr.i;
9496 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9497 return;
9498 break;
9499 case TabRecord::CS_ADD_TABLE_MASTER:
9500 jam();
9501 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9502 signal->theData[0] = DihContinueB::ZADD_TABLE_MASTER_PAGES;
9503 signal->theData[1] = wf->rwfTabPtr.i;
9504 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9505 return;
9506 break;
9507 case TabRecord::CS_ADD_TABLE_SLAVE:
9508 jam();
9509 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
9510 signal->theData[0] = DihContinueB::ZADD_TABLE_SLAVE_PAGES;
9511 signal->theData[1] = wf->rwfTabPtr.i;
9512 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9513 return;
9514 break;
9515 default:
9516 ndbrequire(false);
9517 return;
9518 break;
9519 }//switch
9520 } else {
9521 jam();
9522 signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
9523 signal->theData[1] = wf->rwfTabPtr.i;
9524 signal->theData[2] = wf->fragId;
9525 signal->theData[3] = wf->pageIndex;
9526 signal->theData[4] = wf->wordIndex;
9527 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
9528 }//if
9529 return;
9530 }//Dbdih::packFragIntoPagesLab()
9532 /*****************************************************************************/
9533 /* ********** START FRAGMENT MODULE *************/
9534 /*****************************************************************************/
9535 void
9536 Dbdih::dump_replica_info()
9538 TabRecordPtr tabPtr;
9539 FragmentstorePtr fragPtr;
9541 for(tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
9543 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9544 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
9545 continue;
9547 for(Uint32 fid = 0; fid<tabPtr.p->totalfragments; fid++)
9549 getFragstore(tabPtr.p, fid, fragPtr);
9550 ndbout_c("tab: %d frag: %d gci: %d\n -- storedReplicas:",
9551 tabPtr.i, fid, SYSFILE->newestRestorableGCI);
9553 Uint32 i;
9554 ReplicaRecordPtr replicaPtr;
9555 replicaPtr.i = fragPtr.p->storedReplicas;
9556 for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
9558 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
9559 ndbout_c(" node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
9560 replicaPtr.p->procNode,
9561 replicaPtr.p->initialGci,
9562 replicaPtr.p->nextLcp,
9563 replicaPtr.p->noCrashedReplicas);
9564 for(i = 0; i<MAX_LCP_STORED; i++)
9566 ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
9568 (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
9569 replicaPtr.p->lcpId[i],
9570 replicaPtr.p->maxGciCompleted[i],
9571 replicaPtr.p->maxGciStarted[i]);
9574 for (i = 0; i < 8; i++)
9576 ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
9578 replicaPtr.p->replicaLastGci[i],
9579 replicaPtr.p->createGci[i]);
9582 ndbout_c(" -- oldStoredReplicas");
9583 replicaPtr.i = fragPtr.p->oldStoredReplicas;
9584 for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
9586 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
9587 for(i = 0; i<MAX_LCP_STORED; i++)
9589 ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
9591 (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
9592 replicaPtr.p->lcpId[i],
9593 replicaPtr.p->maxGciCompleted[i],
9594 replicaPtr.p->maxGciStarted[i]);
9597 for (i = 0; i < 8; i++)
9599 ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
9601 replicaPtr.p->replicaLastGci[i],
9602 replicaPtr.p->createGci[i]);
9609 void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
9611 Uint32 TloopCount = 0;
9612 TabRecordPtr tabPtr;
9613 while (true) {
9614 if (TloopCount > 100) {
9615 jam();
9616 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
9617 signal->theData[1] = tableId;
9618 signal->theData[2] = 0;
9619 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
9620 return;
9623 if (tableId >= ctabFileSize) {
9624 jam();
9625 signal->theData[0] = DihContinueB::ZCOMPLETE_RESTART;
9626 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
9627 return;
9628 }//if
9630 tabPtr.i = tableId;
9631 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9632 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
9633 jam();
9634 TloopCount++;
9635 tableId++;
9636 fragId = 0;
9637 continue;
9640 if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
9641 jam();
9642 TloopCount++;
9643 tableId++;
9644 fragId = 0;
9645 continue;
9648 jam();
9649 break;
9650 }//while
9652 FragmentstorePtr fragPtr;
9653 getFragstore(tabPtr.p, fragId, fragPtr);
9654 /* ----------------------------------------------------------------------- */
9655 /* WE NEED TO RESET THE REPLICA DATA STRUCTURES. THIS MEANS THAT WE */
9656 /* MUST REMOVE REPLICAS THAT WAS NOT STARTED AT THE GCI TO RESTORE. WE */
9657 /* NEED TO PUT ALL STORED REPLICAS ON THE LIST OF OLD STORED REPLICAS */
9658 /* RESET THE NUMBER OF REPLICAS TO CREATE. */
9659 /* ----------------------------------------------------------------------- */
9660 cnoOfCreateReplicas = 0;
9661 /* ----------------------------------------------------------------------- */
9662 /* WE WILL NEVER START MORE THAN FOUR FRAGMENT REPLICAS WHATEVER THE */
9663 /* DESIRED REPLICATION IS. */
9664 /* ----------------------------------------------------------------------- */
9665 ndbrequire(tabPtr.p->noOfBackups < 4);
9666 /* ----------------------------------------------------------------------- */
9667 /* SEARCH FOR STORED REPLICAS THAT CAN BE USED TO RESTART THE SYSTEM. */
9668 /* ----------------------------------------------------------------------- */
9669 searchStoredReplicas(fragPtr);
9671 if (cnoOfCreateReplicas == 0) {
9672 /* --------------------------------------------------------------------- */
9673 /* THERE WERE NO STORED REPLICAS AVAILABLE THAT CAN SERVE AS REPLICA TO*/
9674 /* RESTART THE SYSTEM FROM. IN A LATER RELEASE WE WILL ADD */
9675 /* FUNCTIONALITY TO CHECK IF THERE ARE ANY STANDBY NODES THAT COULD DO */
9676 /* THIS TASK INSTEAD IN THIS IMPLEMENTATION WE SIMPLY CRASH THE SYSTEM.*/
9677 /* THIS WILL DECREASE THE GCI TO RESTORE WHICH HOPEFULLY WILL MAKE IT */
9678 /* POSSIBLE TO RESTORE THE SYSTEM. */
9679 /* --------------------------------------------------------------------- */
9680 char buf[64];
9681 BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d",
9682 tableId, fragId, SYSFILE->newestRestorableGCI);
9684 ndbout_c(buf);
9685 dump_replica_info();
9687 progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf);
9688 ndbrequire(false);
9689 return;
9690 }//if
9692 /* ----------------------------------------------------------------------- */
9693 /* WE HAVE CHANGED THE NODE TO BE PRIMARY REPLICA AND THE NODES TO BE */
9694 /* BACKUP NODES. WE MUST UPDATE THIS NODES DATA STRUCTURE SINCE WE */
9695 /* WILL NOT COPY THE TABLE DATA TO OURSELF. */
9696 /* ----------------------------------------------------------------------- */
9697 updateNodeInfo(fragPtr);
9698 /* ----------------------------------------------------------------------- */
9699 /* NOW WE HAVE COLLECTED ALL THE REPLICAS WE COULD GET. WE WILL NOW */
9700 /* RESTART THE FRAGMENT REPLICAS WE HAVE FOUND IRRESPECTIVE OF IF THERE*/
9701 /* ARE ENOUGH ACCORDING TO THE DESIRED REPLICATION. */
9702 /* ----------------------------------------------------------------------- */
9703 /* WE START BY SENDING ADD_FRAGREQ FOR THOSE REPLICAS THAT NEED IT. */
9704 /* ----------------------------------------------------------------------- */
9705 CreateReplicaRecordPtr createReplicaPtr;
9706 for (createReplicaPtr.i = 0;
9707 createReplicaPtr.i < cnoOfCreateReplicas;
9708 createReplicaPtr.i++) {
9709 jam();
9710 ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
9711 createReplicaPtr.p->hotSpareUse = false;
9712 }//for
9714 sendStartFragreq(signal, tabPtr, fragId);
9717 * Don't wait for START_FRAGCONF
9719 fragId++;
9720 if (fragId >= tabPtr.p->totalfragments) {
9721 jam();
9722 tabPtr.i++;
9723 fragId = 0;
9724 }//if
9725 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
9726 signal->theData[1] = tabPtr.i;
9727 signal->theData[2] = fragId;
9728 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
9730 return;
9731 }//Dbdih::startFragmentLab()
9734 /*****************************************************************************/
9735 /* ********** COMPLETE RESTART MODULE *************/
9736 /*****************************************************************************/
9737 void Dbdih::completeRestartLab(Signal* signal)
9739 sendLoopMacro(START_RECREQ, sendSTART_RECREQ);
9740 }//completeRestartLab()
9742 /* ------------------------------------------------------------------------- */
9743 // SYSTEM RESTART:
9744 /* A NODE HAS COMPLETED RESTORING ALL DATABASE FRAGMENTS. */
9745 // NODE RESTART:
9746 // THE STARTING NODE HAS PREPARED ITS LOG FILES TO ENABLE EXECUTION
9747 // OF TRANSACTIONS.
9748 // Precondition:
9749 // This signal must be received by the master node.
9750 /* ------------------------------------------------------------------------- */
9751 void Dbdih::execSTART_RECCONF(Signal* signal)
9753 jamEntry();
9754 Uint32 senderNodeId = signal->theData[0];
9755 ndbrequire(isMaster());
9756 if (getNodeState().startLevel >= NodeState::SL_STARTED){
9757 /* --------------------------------------------------------------------- */
9758 // Since our node is already up and running this must be a node restart.
9759 // This means that we should be the master node,
9760 // otherwise we have a problem.
9761 /* --------------------------------------------------------------------- */
9762 jam();
9763 ndbout_c("startNextCopyFragment");
9764 startNextCopyFragment(signal, findTakeOver(senderNodeId));
9765 return;
9766 } else {
9767 /* --------------------------------------------------------------------- */
9768 // This was the system restart case. We set the state indicating that the
9769 // node has completed restoration of all fragments.
9770 /* --------------------------------------------------------------------- */
9771 receiveLoopMacro(START_RECREQ, senderNodeId);
9773 signal->theData[0] = reference();
9774 sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal, 1, JBB);
9775 return;
9776 }//if
9777 }//Dbdih::execSTART_RECCONF()
9779 void Dbdih::copyNodeLab(Signal* signal, Uint32 tableId)
9781 /* ----------------------------------------------------------------------- */
9782 // This code is executed by the master to assist a node restart in receiving
9783 // the data in the master.
9784 /* ----------------------------------------------------------------------- */
9785 Uint32 TloopCount = 0;
9787 if (!c_nodeStartMaster.activeState) {
9788 jam();
9789 /* --------------------------------------------------------------------- */
9790 // Obviously the node crashed in the middle of its node restart. We will
9791 // stop this process simply by returning after resetting the wait indicator.
9792 /* ---------------------------------------------------------------------- */
9793 c_nodeStartMaster.wait = ZFALSE;
9794 return;
9795 }//if
9796 TabRecordPtr tabPtr;
9797 tabPtr.i = tableId;
9798 while (tabPtr.i < ctabFileSize) {
9799 ptrAss(tabPtr, tabRecord);
9800 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
9801 /* -------------------------------------------------------------------- */
9802 // The table is defined. We will start by packing the table into pages.
9803 // The tabCopyStatus indicates to the CONTINUEB(ZPACK_TABLE_INTO_PAGES)
9804 // who called it. After packing the table into page(s) it will be sent to
9805 // the starting node by COPY_TABREQ signals. After returning from the
9806 // starting node we will return to this subroutine and continue
9807 // with the next table.
9808 /* -------------------------------------------------------------------- */
9809 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
9810 tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_NODE_STATE;
9811 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
9812 signal->theData[1] = tabPtr.i;
9813 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9814 return;
9815 } else {
9816 jam();
9817 if (TloopCount > 100) {
9818 /* ------------------------------------------------------------------ */
9819 // Introduce real-time break after looping through 100 not copied tables
9820 /* ----------------------------------------------------------------- */
9821 jam();
9822 signal->theData[0] = DihContinueB::ZCOPY_NODE;
9823 signal->theData[1] = tabPtr.i + 1;
9824 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9825 return;
9826 } else {
9827 jam();
9828 TloopCount++;
9829 tabPtr.i++;
9830 }//if
9831 }//if
9832 }//while
9833 dihCopyCompletedLab(signal);
9834 return;
9835 }//Dbdih::copyNodeLab()
9837 void Dbdih::tableCopyNodeLab(Signal* signal, TabRecordPtr tabPtr)
9839 /* ----------------------------------------------------------------------- */
9840 /* COPY PAGES READ TO STARTING NODE. */
9841 /* ----------------------------------------------------------------------- */
9842 if (!c_nodeStartMaster.activeState) {
9843 jam();
9844 releaseTabPages(tabPtr.i);
9845 c_nodeStartMaster.wait = ZFALSE;
9846 return;
9847 }//if
9848 NodeRecordPtr copyNodePtr;
9849 PageRecordPtr pagePtr;
9850 copyNodePtr.i = c_nodeStartMaster.startNode;
9851 ptrCheckGuard(copyNodePtr, MAX_NDB_NODES, nodeRecord);
9853 copyNodePtr.p->activeTabptr = tabPtr.i;
9854 pagePtr.i = tabPtr.p->pageRef[0];
9855 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
9857 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
9858 signal->theData[1] = tabPtr.i;
9859 signal->theData[2] = copyNodePtr.i;
9860 signal->theData[3] = 0;
9861 signal->theData[4] = 0;
9862 signal->theData[5] = pagePtr.p->word[34];
9863 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
9864 }//Dbdih::tableCopyNodeLab()
9866 /* ------------------------------------------------------------------------- */
9867 // execCONTINUEB(ZCOPY_TABLE)
9868 // This routine is used to copy the table descriptions from the master to
9869 // other nodes. It is used in the system restart to copy from master to all
9870 // starting nodes.
9871 /* ------------------------------------------------------------------------- */
9872 void Dbdih::copyTableLab(Signal* signal, Uint32 tableId)
9874 TabRecordPtr tabPtr;
9875 tabPtr.i = tableId;
9876 ptrAss(tabPtr, tabRecord);
9878 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
9879 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE2_READ_TABLE;
9880 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
9881 signal->theData[1] = tabPtr.i;
9882 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9883 return;
9884 }//Dbdih::copyTableLab()
9886 /* ------------------------------------------------------------------------- */
9887 // execCONTINUEB(ZSR_PHASE2_READ_TABLE)
9888 /* ------------------------------------------------------------------------- */
9889 void Dbdih::srPhase2ReadTableLab(Signal* signal, TabRecordPtr tabPtr)
9891 /* ----------------------------------------------------------------------- */
9892 // We set the sendCOPY_TABREQState to ZACTIVE for all nodes since it is a long
9893 // process to send off all table descriptions. Thus we ensure that we do
9894 // not encounter race conditions where one node is completed before the
9895 // sending process is completed. This could lead to that we start off the
9896 // system before we actually finished all copying of table descriptions
9897 // and could lead to strange errors.
9898 /* ----------------------------------------------------------------------- */
9900 //sendLoopMacro(COPY_TABREQ, nullRoutine);
9902 breakCopyTableLab(signal, tabPtr, cfirstAliveNode);
9903 return;
9904 }//Dbdih::srPhase2ReadTableLab()
9906 /* ------------------------------------------------------------------------- */
9907 /* COPY PAGES READ TO ALL NODES. */
9908 /* ------------------------------------------------------------------------- */
9909 void Dbdih::breakCopyTableLab(Signal* signal, TabRecordPtr tabPtr, Uint32 nodeId)
9911 NodeRecordPtr nodePtr;
9912 nodePtr.i = nodeId;
9913 while (nodePtr.i != RNIL) {
9914 jam();
9915 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9916 if (nodePtr.i == getOwnNodeId()){
9917 jam();
9918 /* ------------------------------------------------------------------- */
9919 /* NOT NECESSARY TO COPY TO MY OWN NODE. I ALREADY HAVE THE PAGES. */
9920 /* I DO HOWEVER NEED TO STORE THE TABLE DESCRIPTION ONTO DISK. */
9921 /* ------------------------------------------------------------------- */
9922 /* IF WE ARE MASTER WE ONLY NEED TO SAVE THE TABLE ON DISK. WE ALREADY */
9923 /* HAVE THE TABLE DESCRIPTION IN THE DATA STRUCTURES. */
9924 // AFTER COMPLETING THE WRITE TO DISK THE MASTER WILL ALSO SEND
9925 // COPY_TABCONF AS ALL THE OTHER NODES.
9926 /* ------------------------------------------------------------------- */
9927 c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
9928 tabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
9929 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
9930 signal->theData[1] = tabPtr.i;
9931 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9932 nodePtr.i = nodePtr.p->nextNode;
9933 } else {
9934 PageRecordPtr pagePtr;
9935 /* -------------------------------------------------------------------- */
9936 // RATHER THAN SENDING ALL COPY_TABREQ IN PARALLEL WE WILL SERIALISE THIS
9937 // ACTIVITY AND WILL THUS CALL breakCopyTableLab AGAIN WHEN COMPLETED THE
9938 // SENDING OF COPY_TABREQ'S.
9939 /* -------------------------------------------------------------------- */
9940 jam();
9941 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE3_COPY_TABLE;
9942 pagePtr.i = tabPtr.p->pageRef[0];
9943 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
9944 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
9945 signal->theData[1] = tabPtr.i;
9946 signal->theData[2] = nodePtr.i;
9947 signal->theData[3] = 0;
9948 signal->theData[4] = 0;
9949 signal->theData[5] = pagePtr.p->word[34];
9950 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
9951 return;
9952 }//if
9953 }//while
9954 /* ----------------------------------------------------------------------- */
9955 /* WE HAVE NOW SENT THE TABLE PAGES TO ALL NODES. EXIT AND WAIT FOR ALL */
9956 /* REPLIES. */
9957 /* ----------------------------------------------------------------------- */
9958 return;
9959 }//Dbdih::breakCopyTableLab()
9961 /* ------------------------------------------------------------------------- */
9962 // execCONTINUEB(ZCOPY_TABLE_NODE)
9963 /* ------------------------------------------------------------------------- */
9964 void Dbdih::copyTableNode(Signal* signal,
9965 CopyTableNode* ctn, NodeRecordPtr nodePtr)
9967 if (getNodeState().startLevel >= NodeState::SL_STARTED){
9968 /* --------------------------------------------------------------------- */
9969 // We are in the process of performing a node restart and are copying a
9970 // table description to a starting node. We will check that no nodes have
9971 // crashed in this process.
9972 /* --------------------------------------------------------------------- */
9973 if (!c_nodeStartMaster.activeState) {
9974 jam();
9975 /** ------------------------------------------------------------------
9976 * The starting node crashed. We will release table pages and stop this
9977 * copy process and allow new node restarts to start.
9978 * ------------------------------------------------------------------ */
9979 releaseTabPages(ctn->ctnTabPtr.i);
9980 c_nodeStartMaster.wait = ZFALSE;
9981 return;
9982 }//if
9983 }//if
9984 ndbrequire(ctn->pageIndex < 8);
9985 ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
9986 ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
9988 * If first page & firstWord reqinfo = 1 (first signal)
9990 Uint32 reqinfo = (ctn->pageIndex == 0) && (ctn->wordIndex == 0);
9991 if(reqinfo == 1){
9992 c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
9995 for (Uint32 i = 0; i < 16; i++) {
9996 jam();
9997 sendCopyTable(signal, ctn, calcDihBlockRef(nodePtr.i), reqinfo);
9998 reqinfo = 0;
9999 if (ctn->noOfWords <= 16) {
10000 jam();
10001 switch (ctn->ctnTabPtr.p->tabCopyStatus) {
10002 case TabRecord::CS_SR_PHASE3_COPY_TABLE:
10003 /* ------------------------------------------------------------------ */
10004 // We have copied the table description to this node.
10005 // We will now proceed
10006 // with sending the table description to the next node in the node list.
10007 /* ------------------------------------------------------------------ */
10008 jam();
10009 ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
10010 breakCopyTableLab(signal, ctn->ctnTabPtr, nodePtr.p->nextNode);
10011 return;
10012 break;
10013 case TabRecord::CS_COPY_NODE_STATE:
10014 jam();
10015 ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
10016 return;
10017 break;
10018 default:
10019 ndbrequire(false);
10020 break;
10021 }//switch
10022 } else {
10023 jam();
10024 ctn->wordIndex += 16;
10025 if (ctn->wordIndex == 2048) {
10026 jam();
10027 ctn->wordIndex = 0;
10028 ctn->pageIndex++;
10029 ndbrequire(ctn->pageIndex < 8);
10030 ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
10031 ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
10032 }//if
10033 ctn->noOfWords -= 16;
10034 }//if
10035 }//for
10036 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
10037 signal->theData[1] = ctn->ctnTabPtr.i;
10038 signal->theData[2] = nodePtr.i;
10039 signal->theData[3] = ctn->pageIndex;
10040 signal->theData[4] = ctn->wordIndex;
10041 signal->theData[5] = ctn->noOfWords;
10042 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
10043 }//Dbdih::copyTableNodeLab()
10045 void Dbdih::sendCopyTable(Signal* signal, CopyTableNode* ctn,
10046 BlockReference ref, Uint32 reqinfo)
10048 signal->theData[0] = reference();
10049 signal->theData[1] = reqinfo;
10050 signal->theData[2] = ctn->ctnTabPtr.i;
10051 signal->theData[3] = ctn->ctnTabPtr.p->schemaVersion;
10052 signal->theData[4] = ctn->noOfWords;
10053 ndbrequire(ctn->wordIndex + 15 < 2048);
10054 MEMCOPY_NO_WORDS(&signal->theData[5], &ctn->ctnPageptr.p->word[ctn->wordIndex], 16);
10055 sendSignal(ref, GSN_COPY_TABREQ, signal, 21, JBB);
10056 }//Dbdih::sendCopyTable()
10058 void Dbdih::execCOPY_TABCONF(Signal* signal)
10060 NodeRecordPtr nodePtr;
10061 jamEntry();
10062 nodePtr.i = signal->theData[0];
10063 Uint32 tableId = signal->theData[1];
10064 if (getNodeState().startLevel >= NodeState::SL_STARTED){
10065 /* --------------------------------------------------------------------- */
10066 // We are in the process of performing a node restart. Continue by copying
10067 // the next table to the starting node.
10068 /* --------------------------------------------------------------------- */
10069 jam();
10070 NodeRecordPtr nodePtr;
10071 nodePtr.i = signal->theData[0];
10072 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
10073 c_COPY_TABREQ_Counter.clearWaitingFor(nodePtr.i);
10075 releaseTabPages(tableId);
10076 signal->theData[0] = DihContinueB::ZCOPY_NODE;
10077 signal->theData[1] = tableId + 1;
10078 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
10079 return;
10080 } else {
10081 /* --------------------------------------------------------------------- */
10082 // We are in the process of performing a system restart. Check if all nodes
10083 // have saved the new table description to file and then continue with the
10084 // next table.
10085 /* --------------------------------------------------------------------- */
10086 receiveLoopMacro(COPY_TABREQ, nodePtr.i);
10087 /* --------------------------------------------------------------------- */
10088 /* WE HAVE NOW COPIED TO ALL NODES. WE HAVE NOW COMPLETED RESTORING */
10089 /* THIS TABLE. CONTINUE WITH THE NEXT TABLE. */
10090 /* WE NEED TO RELEASE THE PAGES IN THE TABLE IN THIS NODE HERE. */
10091 /* WE ALSO NEED TO CLOSE THE TABLE FILE. */
10092 /* --------------------------------------------------------------------- */
10093 releaseTabPages(tableId);
10095 TabRecordPtr tabPtr;
10096 tabPtr.i = tableId;
10097 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
10099 ConnectRecordPtr connectPtr;
10100 connectPtr.i = tabPtr.p->connectrec;
10101 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
10103 sendAddFragreq(signal, connectPtr, tabPtr, 0);
10104 return;
10105 }//if
10106 }//Dbdih::execCOPY_TABCONF()
10109 3.13 L O C A L C H E C K P O I N T (M A S T E R)
10110 ****************************************************
10112 /*****************************************************************************/
10113 /* ********** LOCAL-CHECK-POINT-HANDLING MODULE *************/
10114 /*****************************************************************************/
10115 /* ------------------------------------------------------------------------- */
10116 /* IT IS TIME TO CHECK IF IT IS TIME TO START A LOCAL CHECKPOINT. */
10117 /* WE WILL EITHER START AFTER 1 MILLION WORDS HAVE ARRIVED OR WE WILL */
10118 /* EXECUTE AFTER ABOUT 16 MINUTES HAVE PASSED BY. */
10119 /* ------------------------------------------------------------------------- */
10120 void Dbdih::checkTcCounterLab(Signal* signal)
10122 CRASH_INSERTION(7009);
10123 if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) {
10124 g_eventLogger.error("lcpStatus = %u"
10125 "lcpStatusUpdatedPlace = %d",
10126 (Uint32) c_lcpState.lcpStatus,
10127 c_lcpState.lcpStatusUpdatedPlace);
10128 ndbrequire(false);
10129 return;
10130 }//if
10131 c_lcpState.ctimer += 32;
10132 if ((c_nodeStartMaster.blockLcp == true) ||
10133 (c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
10134 jam();
10135 /* --------------------------------------------------------------------- */
10136 // No reason to start juggling the states and checking for start of LCP if
10137 // we are blocked to start an LCP anyway.
10138 // We also block LCP start if we have not completed one global checkpoints
10139 // before starting another local checkpoint.
10140 /* --------------------------------------------------------------------- */
10141 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
10142 signal->theData[1] = __LINE__;
10143 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
10144 return;
10145 }//if
10146 c_lcpState.setLcpStatus(LCP_TCGET, __LINE__);
10148 c_lcpState.ctcCounter = c_lcpState.ctimer;
10149 sendLoopMacro(TCGETOPSIZEREQ, sendTCGETOPSIZEREQ);
10150 }//Dbdih::checkTcCounterLab()
10152 void Dbdih::checkLcpStart(Signal* signal, Uint32 lineNo)
10154 /* ----------------------------------------------------------------------- */
10155 // Verify that we are not attempting to start another instance of the LCP
10156 // when it is not alright to do so.
10157 /* ----------------------------------------------------------------------- */
10158 ndbrequire(c_lcpState.lcpStart == ZIDLE);
10159 c_lcpState.lcpStart = ZACTIVE;
10160 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
10161 signal->theData[1] = lineNo;
10162 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 2);
10163 }//Dbdih::checkLcpStart()
10165 /* ------------------------------------------------------------------------- */
10166 /*TCGETOPSIZECONF HOW MUCH OPERATION SIZE HAVE BEEN EXECUTED BY TC */
10167 /* ------------------------------------------------------------------------- */
10168 void Dbdih::execTCGETOPSIZECONF(Signal* signal)
10170 jamEntry();
10171 Uint32 senderNodeId = signal->theData[0];
10172 c_lcpState.ctcCounter += signal->theData[1];
10174 receiveLoopMacro(TCGETOPSIZEREQ, senderNodeId);
10176 ndbrequire(c_lcpState.lcpStatus == LCP_TCGET);
10177 ndbrequire(c_lcpState.lcpStart == ZACTIVE);
10178 /* ----------------------------------------------------------------------- */
10179 // We are not actively starting another LCP, still we receive this signal.
10180 // This is not ok.
10181 /* ---------------------------------------------------------------------- */
10182 /* ALL TC'S HAVE RESPONDED NOW. NOW WE WILL CHECK IF ENOUGH OPERATIONS */
10183 /* HAVE EXECUTED TO ENABLE US TO START A NEW LOCAL CHECKPOINT. */
10184 /* WHILE COPYING DICTIONARY AND DISTRIBUTION INFO TO A STARTING NODE */
10185 /* WE WILL ALSO NOT ALLOW THE LOCAL CHECKPOINT TO PROCEED. */
10186 /*----------------------------------------------------------------------- */
10187 if (c_lcpState.immediateLcpStart == false) {
10188 if ((c_lcpState.ctcCounter <
10189 ((Uint32)1 << c_lcpState.clcpDelay)) ||
10190 (c_nodeStartMaster.blockLcp == true)) {
10191 jam();
10192 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
10194 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
10195 signal->theData[1] = __LINE__;
10196 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
10197 return;
10198 }//if
10199 }//if
10200 c_lcpState.lcpStart = ZIDLE;
10201 c_lcpState.immediateLcpStart = false;
10202 /* -----------------------------------------------------------------------
10203 * Now the initial lcp is started,
10204 * we can reset the delay to its orginal value
10205 * --------------------------------------------------------------------- */
10206 CRASH_INSERTION(7010);
10207 /* ----------------------------------------------------------------------- */
10208 /* IF MORE THAN 1 MILLION WORDS PASSED THROUGH THE TC'S THEN WE WILL */
10209 /* START A NEW LOCAL CHECKPOINT. CLEAR CTIMER. START CHECKPOINT */
10210 /* ACTIVITY BY CALCULATING THE KEEP GLOBAL CHECKPOINT. */
10211 // Also remember the current global checkpoint to ensure that we run at least
10212 // one global checkpoints between each local checkpoint that we start up.
10213 /* ----------------------------------------------------------------------- */
10214 c_lcpState.ctimer = 0;
10215 c_lcpState.keepGci = coldgcp;
10216 /* ----------------------------------------------------------------------- */
10217 /* UPDATE THE NEW LATEST LOCAL CHECKPOINT ID. */
10218 /* ----------------------------------------------------------------------- */
10219 cnoOfActiveTables = 0;
10220 c_lcpState.setLcpStatus(LCP_CALCULATE_KEEP_GCI, __LINE__);
10221 c_lcpState.oldestRestorableGci = SYSFILE->oldestRestorableGCI;
10222 ndbrequire(((int)c_lcpState.oldestRestorableGci) > 0);
10224 if (ERROR_INSERTED(7011)) {
10225 signal->theData[0] = NDB_LE_LCPStoppedInCalcKeepGci;
10226 signal->theData[1] = 0;
10227 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
10228 return;
10229 }//if
10230 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
10231 signal->theData[1] = 0; /* TABLE ID = 0 */
10232 signal->theData[2] = 0; /* FRAGMENT ID = 0 */
10233 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
10234 return;
10235 }//Dbdih::execTCGETOPSIZECONF()
10237 /* ------------------------------------------------------------------------- */
10238 /* WE NEED TO CALCULATE THE OLDEST GLOBAL CHECKPOINT THAT WILL BE */
10239 /* COMPLETELY RESTORABLE AFTER EXECUTING THIS LOCAL CHECKPOINT. */
10240 /* ------------------------------------------------------------------------- */
10241 void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
10243 TabRecordPtr tabPtr;
10244 Uint32 TloopCount = 1;
10245 tabPtr.i = tableId;
10246 do {
10247 if (tabPtr.i >= ctabFileSize) {
10248 if (cnoOfActiveTables > 0) {
10249 jam();
10250 signal->theData[0] = DihContinueB::ZSTORE_NEW_LCP_ID;
10251 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
10252 return;
10253 } else {
10254 jam();
10255 /* ------------------------------------------------------------------ */
10256 /* THERE ARE NO TABLES TO CHECKPOINT. WE STOP THE CHECKPOINT ALREADY */
10257 /* HERE TO AVOID STRANGE PROBLEMS LATER. */
10258 /* ------------------------------------------------------------------ */
10259 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
10260 checkLcpStart(signal, __LINE__);
10261 return;
10262 }//if
10263 }//if
10264 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
10265 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
10266 tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
10267 if (TloopCount > 100) {
10268 jam();
10269 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
10270 signal->theData[1] = tabPtr.i + 1;
10271 signal->theData[2] = 0;
10272 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
10273 return;
10274 } else {
10275 jam();
10276 TloopCount++;
10277 tabPtr.i++;
10278 }//if
10279 } else {
10280 jam();
10281 TloopCount = 0;
10282 }//if
10283 } while (TloopCount != 0);
10284 cnoOfActiveTables++;
10285 FragmentstorePtr fragPtr;
10286 getFragstore(tabPtr.p, fragId, fragPtr);
10287 checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
10288 checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
10289 fragId++;
10290 if (fragId >= tabPtr.p->totalfragments) {
10291 jam();
10292 tabPtr.i++;
10293 fragId = 0;
10294 }//if
10295 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
10296 signal->theData[1] = tabPtr.i;
10297 signal->theData[2] = fragId;
10298 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
10299 return;
10300 }//Dbdih::calculateKeepGciLab()
10302 /* ------------------------------------------------------------------------- */
10303 /* WE NEED TO STORE ON DISK THE FACT THAT WE ARE STARTING THIS LOCAL */
10304 /* CHECKPOINT ROUND. THIS WILL INVALIDATE ALL THE LOCAL CHECKPOINTS */
10305 /* THAT WILL EVENTUALLY BE OVERWRITTEN AS PART OF THIS LOCAL CHECKPOINT*/
10306 /* ------------------------------------------------------------------------- */
10307 void Dbdih::storeNewLcpIdLab(Signal* signal)
10309 /***************************************************************************/
10310 // Report the event that a local checkpoint has started.
10311 /***************************************************************************/
10312 signal->theData[0] = NDB_LE_LocalCheckpointStarted; //Event type
10313 signal->theData[1] = SYSFILE->latestLCP_ID + 1;
10314 signal->theData[2] = c_lcpState.keepGci;
10315 signal->theData[3] = c_lcpState.oldestRestorableGci;
10316 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
10318 signal->setTrace(TestOrd::TraceLocalCheckpoint);
10320 CRASH_INSERTION(7013);
10321 SYSFILE->keepGCI = c_lcpState.keepGci;
10322 //Uint32 lcpId = SYSFILE->latestLCP_ID;
10323 SYSFILE->latestLCP_ID++;
10324 SYSFILE->oldestRestorableGCI = c_lcpState.oldestRestorableGci;
10326 const Uint32 oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
10327 //const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
10328 //ndbrequire(newestRestorableGCI >= oldestRestorableGCI);
10330 Int32 val = oldestRestorableGCI;
10331 ndbrequire(val > 0);
10333 /* ----------------------------------------------------------------------- */
10334 /* SET BIT INDICATING THAT LOCAL CHECKPOINT IS ONGOING. THIS IS CLEARED */
10335 /* AT THE END OF A LOCAL CHECKPOINT. */
10336 /* ----------------------------------------------------------------------- */
10337 SYSFILE->setLCPOngoing(SYSFILE->systemRestartBits);
10338 /* ---------------------------------------------------------------------- */
10339 /* CHECK IF ANY NODE MUST BE TAKEN OUT OF SERVICE AND REFILLED WITH */
10340 /* NEW FRESH DATA FROM AN ACTIVE NODE. */
10341 /* ---------------------------------------------------------------------- */
10342 setLcpActiveStatusStart(signal);
10343 c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
10344 //#ifdef VM_TRACE
10345 // infoEvent("LocalCheckpoint %d started", SYSFILE->latestLCP_ID);
10346 // signal->theData[0] = 7012;
10347 // execDUMP_STATE_ORD(signal);
10348 //#endif
10350 copyGciLab(signal, CopyGCIReq::LOCAL_CHECKPOINT);
10351 }//Dbdih::storeNewLcpIdLab()
10353 void Dbdih::startLcpRoundLab(Signal* signal) {
10354 jam();
10356 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
10357 Callback c = { safe_cast(&Dbdih::startLcpMutex_locked), 0 };
10358 ndbrequire(mutex.lock(c));
10361 void
10362 Dbdih::startLcpMutex_locked(Signal* signal, Uint32 senderData, Uint32 retVal){
10363 jamEntry();
10364 ndbrequire(retVal == 0);
10366 StartLcpReq* req = (StartLcpReq*)signal->getDataPtrSend();
10367 req->senderRef = reference();
10368 req->lcpId = SYSFILE->latestLCP_ID;
10369 req->participatingLQH = c_lcpState.m_participatingLQH;
10370 req->participatingDIH = c_lcpState.m_participatingDIH;
10371 sendLoopMacro(START_LCP_REQ, sendSTART_LCP_REQ);
10373 void
10374 Dbdih::sendSTART_LCP_REQ(Signal* signal, Uint32 nodeId){
10375 BlockReference ref = calcDihBlockRef(nodeId);
10376 sendSignal(ref, GSN_START_LCP_REQ, signal, StartLcpReq::SignalLength, JBB);
10379 void
10380 Dbdih::execSTART_LCP_CONF(Signal* signal){
10381 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtr();
10383 Uint32 nodeId = refToNode(conf->senderRef);
10384 receiveLoopMacro(START_LCP_REQ, nodeId);
10386 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
10387 Callback c = { safe_cast(&Dbdih::startLcpMutex_unlocked), 0 };
10388 mutex.unlock(c);
10391 void
10392 Dbdih::startLcpMutex_unlocked(Signal* signal, Uint32 data, Uint32 retVal){
10393 jamEntry();
10394 ndbrequire(retVal == 0);
10396 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
10397 mutex.release();
10399 CRASH_INSERTION(7014);
10400 c_lcpState.setLcpStatus(LCP_TC_CLOPSIZE, __LINE__);
10401 sendLoopMacro(TC_CLOPSIZEREQ, sendTC_CLOPSIZEREQ);
10404 void Dbdih::execTC_CLOPSIZECONF(Signal* signal) {
10405 jamEntry();
10406 Uint32 senderNodeId = signal->theData[0];
10407 receiveLoopMacro(TC_CLOPSIZEREQ, senderNodeId);
10409 ndbrequire(c_lcpState.lcpStatus == LCP_TC_CLOPSIZE);
10410 /* ----------------------------------------------------------------------- */
10411 /* ALL TC'S HAVE CLEARED THEIR OPERATION SIZE COUNTERS. NOW PROCEED BY */
10412 /* STARTING THE LOCAL CHECKPOINT IN EACH LQH. */
10413 /* ----------------------------------------------------------------------- */
10414 c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_participatingLQH;
10416 CRASH_INSERTION(7015);
10417 c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__);
10418 startLcpRoundLoopLab(signal, 0, 0);
10419 }//Dbdih::execTC_CLOPSIZECONF()
10421 void Dbdih::startLcpRoundLoopLab(Signal* signal,
10422 Uint32 startTableId, Uint32 startFragId)
10424 NodeRecordPtr nodePtr;
10425 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
10426 ptrAss(nodePtr, nodeRecord);
10427 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
10428 ndbrequire(nodePtr.p->noOfStartedChkpt == 0);
10429 ndbrequire(nodePtr.p->noOfQueuedChkpt == 0);
10430 }//if
10431 }//if
10432 c_lcpState.currentFragment.tableId = startTableId;
10433 c_lcpState.currentFragment.fragmentId = startFragId;
10434 startNextChkpt(signal);
10435 }//Dbdih::startLcpRoundLoopLab()
10437 void Dbdih::startNextChkpt(Signal* signal)
10439 Uint32 lcpId = SYSFILE->latestLCP_ID;
10441 NdbNodeBitmask busyNodes;
10442 busyNodes.clear();
10443 const Uint32 lcpNodes = c_lcpState.m_participatingLQH.count();
10445 bool save = true;
10446 LcpState::CurrentFragment curr = c_lcpState.currentFragment;
10448 while (curr.tableId < ctabFileSize) {
10449 TabRecordPtr tabPtr;
10450 tabPtr.i = curr.tableId;
10451 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
10452 if ((tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) ||
10453 (tabPtr.p->tabLcpStatus != TabRecord::TLS_ACTIVE)) {
10454 curr.tableId++;
10455 curr.fragmentId = 0;
10456 continue;
10457 }//if
10459 FragmentstorePtr fragPtr;
10460 getFragstore(tabPtr.p, curr.fragmentId, fragPtr);
10462 ReplicaRecordPtr replicaPtr;
10463 for(replicaPtr.i = fragPtr.p->storedReplicas;
10464 replicaPtr.i != RNIL ;
10465 replicaPtr.i = replicaPtr.p->nextReplica){
10467 jam();
10468 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
10470 NodeRecordPtr nodePtr;
10471 nodePtr.i = replicaPtr.p->procNode;
10472 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
10474 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
10476 if (replicaPtr.p->lcpOngoingFlag &&
10477 replicaPtr.p->lcpIdStarted < lcpId)
10479 jam();
10480 //-------------------------------------------------------------------
10481 // We have found a replica on a node that performs local checkpoint
10482 // that is alive and that have not yet been started.
10483 //-------------------------------------------------------------------
10485 if (nodePtr.p->noOfStartedChkpt < 2)
10487 jam();
10489 * Send LCP_FRAG_ORD to LQH
10493 * Mark the replica so with lcpIdStarted == true
10495 replicaPtr.p->lcpIdStarted = lcpId;
10497 Uint32 i = nodePtr.p->noOfStartedChkpt;
10498 nodePtr.p->startedChkpt[i].tableId = tabPtr.i;
10499 nodePtr.p->startedChkpt[i].fragId = curr.fragmentId;
10500 nodePtr.p->startedChkpt[i].replicaPtr = replicaPtr.i;
10501 nodePtr.p->noOfStartedChkpt = i + 1;
10503 sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
10505 else if (nodePtr.p->noOfQueuedChkpt < 2)
10507 jam();
10509 * Put LCP_FRAG_ORD "in queue"
10513 * Mark the replica so with lcpIdStarted == true
10515 replicaPtr.p->lcpIdStarted = lcpId;
10517 Uint32 i = nodePtr.p->noOfQueuedChkpt;
10518 nodePtr.p->queuedChkpt[i].tableId = tabPtr.i;
10519 nodePtr.p->queuedChkpt[i].fragId = curr.fragmentId;
10520 nodePtr.p->queuedChkpt[i].replicaPtr = replicaPtr.i;
10521 nodePtr.p->noOfQueuedChkpt = i + 1;
10523 else
10525 jam();
10527 if(save)
10530 * Stop increasing value on first that was "full"
10532 c_lcpState.currentFragment = curr;
10533 save = false;
10536 busyNodes.set(nodePtr.i);
10537 if(busyNodes.count() == lcpNodes)
10540 * There were no possibility to start the local checkpoint
10541 * and it was not possible to queue it up. In this case we
10542 * stop the start of local checkpoints until the nodes with a
10543 * backlog have performed more checkpoints. We will return and
10544 * will not continue the process of starting any more checkpoints.
10546 return;
10547 }//if
10548 }//if
10550 }//while
10552 curr.fragmentId++;
10553 if (curr.fragmentId >= tabPtr.p->totalfragments) {
10554 jam();
10555 curr.fragmentId = 0;
10556 curr.tableId++;
10557 }//if
10558 }//while
10560 sendLastLCP_FRAG_ORD(signal);
10561 }//Dbdih::startNextChkpt()
10563 void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal)
10565 LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
10566 lcpFragOrd->tableId = RNIL;
10567 lcpFragOrd->fragmentId = 0;
10568 lcpFragOrd->lcpId = SYSFILE->latestLCP_ID;
10569 lcpFragOrd->lcpNo = 0;
10570 lcpFragOrd->keepGci = c_lcpState.keepGci;
10571 lcpFragOrd->lastFragmentFlag = true;
10573 NodeRecordPtr nodePtr;
10574 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
10575 jam();
10576 ptrAss(nodePtr, nodeRecord);
10578 if(nodePtr.p->noOfQueuedChkpt == 0 &&
10579 nodePtr.p->noOfStartedChkpt == 0 &&
10580 c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodePtr.i)){
10581 jam();
10583 CRASH_INSERTION(7028);
10586 * Nothing queued or started <=> Complete on that node
10589 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodePtr.i);
10590 if(ERROR_INSERTED(7075)){
10591 continue;
10594 CRASH_INSERTION(7193);
10595 BlockReference ref = calcLqhBlockRef(nodePtr.i);
10596 sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB);
10599 if(ERROR_INSERTED(7075)){
10600 if(c_lcpState.m_LAST_LCP_FRAG_ORD.done())
10601 CRASH_INSERTION(7075);
10603 }//Dbdih::sendLastLCP_FRAGORD()
10605 /* ------------------------------------------------------------------------- */
10606 /* A FRAGMENT REPLICA HAS COMPLETED EXECUTING ITS LOCAL CHECKPOINT. */
10607 /* CHECK IF ALL REPLICAS IN THE TABLE HAVE COMPLETED. IF SO STORE THE */
10608 /* THE TABLE DISTRIBUTION ON DISK. ALSO SEND LCP_REPORT TO ALL OTHER */
10609 /* NODES SO THAT THEY CAN STORE THE TABLE ONTO DISK AS WELL. */
10610 /* ------------------------------------------------------------------------- */
10611 void Dbdih::execLCP_FRAG_REP(Signal* signal)
10613 jamEntry();
10614 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
10616 #if 0
10617 printLCP_FRAG_REP(stdout,
10618 signal->getDataPtr(),
10619 signal->length(), number());
10620 #endif
10622 LcpFragRep * const lcpReport = (LcpFragRep *)&signal->theData[0];
10623 Uint32 nodeId = lcpReport->nodeId;
10624 Uint32 tableId = lcpReport->tableId;
10625 Uint32 fragId = lcpReport->fragId;
10627 jamEntry();
10629 if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
10631 jam();
10632 Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
10633 Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
10634 if (owng == nodeg)
10636 jam();
10637 ndbout_c("throwing away LCP_FRAG_REP from (and killing) %d", nodeId);
10638 SET_ERROR_INSERT_VALUE(7179);
10639 signal->theData[0] = 9999;
10640 sendSignal(numberToRef(CMVMI, nodeId),
10641 GSN_NDB_TAMPER, signal, 1, JBA);
10642 return;
10646 if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
10648 jam();
10649 Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
10650 Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
10651 if (owng == nodeg)
10653 jam();
10654 ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
10655 return;
10659 CRASH_INSERTION2(7025, isMaster());
10660 CRASH_INSERTION2(7016, !isMaster());
10662 bool fromTimeQueue = (signal->senderBlockRef() == reference());
10664 TabRecordPtr tabPtr;
10665 tabPtr.i = tableId;
10666 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
10667 if(tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
10668 jam();
10669 /*-----------------------------------------------------------------------*/
10670 // If the table is currently copied to disk we also
10671 // stop already here to avoid strange half-way updates
10672 // of the table data structures.
10673 /*-----------------------------------------------------------------------*/
10675 We need to send this signal without a delay since we have discovered
10676 that we have run out of space in the short time queue. This problem
10677 is very erunlikely to happen but it has and it results in a node crash.
10678 This should be considered a "quick fix" and not a permanent solution.
10679 A cleaner/better way would be to check the time queue if it is full or
10680 not before sending this signal.
10682 sendSignal(reference(), GSN_LCP_FRAG_REP, signal, signal->length(), JBB);
10683 /* Kept here for reference
10684 sendSignalWithDelay(reference(), GSN_LCP_FRAG_REP,
10685 signal, 20, signal->length());
10688 if(!fromTimeQueue){
10689 c_lcpState.noOfLcpFragRepOutstanding++;
10692 return;
10693 }//if
10695 if(fromTimeQueue){
10696 jam();
10698 ndbrequire(c_lcpState.noOfLcpFragRepOutstanding > 0);
10699 c_lcpState.noOfLcpFragRepOutstanding--;
10702 bool tableDone = reportLcpCompletion(lcpReport);
10704 Uint32 started = lcpReport->maxGciStarted;
10705 Uint32 completed = lcpReport->maxGciCompleted;
10707 if (started > c_lcpState.lcpStopGcp)
10709 jam();
10710 c_lcpState.lcpStopGcp = started;
10713 if(tableDone){
10714 jam();
10716 if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){
10717 jam();
10718 g_eventLogger.info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ",
10719 tableId, fragId);
10720 } else {
10721 jam();
10723 * Write table description to file
10725 tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
10726 tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
10727 tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
10728 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
10729 signal->theData[1] = tabPtr.i;
10730 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
10732 checkLcpAllTablesDoneInLqh();
10736 #ifdef VM_TRACE
10737 /* --------------------------------------------------------------------- */
10738 // REPORT that local checkpoint have completed this fragment.
10739 /* --------------------------------------------------------------------- */
10740 signal->theData[0] = NDB_LE_LCPFragmentCompleted;
10741 signal->theData[1] = nodeId;
10742 signal->theData[2] = tableId;
10743 signal->theData[3] = fragId;
10744 signal->theData[4] = started;
10745 signal->theData[5] = completed;
10746 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB);
10747 #endif
10749 bool ok = false;
10750 switch(c_lcpMasterTakeOverState.state){
10751 case LMTOS_IDLE:
10752 ok = true;
10753 jam();
10755 * Fall through
10757 break;
10758 case LMTOS_WAIT_EMPTY_LCP: // LCP Take over waiting for EMPTY_LCPCONF
10759 jam();
10760 return;
10761 case LMTOS_WAIT_LCP_FRAG_REP:
10762 jam();
10763 checkEmptyLcpComplete(signal);
10764 return;
10765 case LMTOS_INITIAL:
10766 case LMTOS_ALL_IDLE:
10767 case LMTOS_ALL_ACTIVE:
10768 case LMTOS_LCP_CONCLUDING:
10769 case LMTOS_COPY_ONGOING:
10770 ndbrequire(false);
10772 ndbrequire(ok);
10774 /* ----------------------------------------------------------------------- */
10775 // Check if there are more LCP's to start up.
10776 /* ----------------------------------------------------------------------- */
10777 if(isMaster()){
10778 jam();
10781 * Remove from "running" array
10783 NodeRecordPtr nodePtr;
10784 nodePtr.i = nodeId;
10785 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
10787 const Uint32 outstanding = nodePtr.p->noOfStartedChkpt;
10788 ndbrequire(outstanding > 0);
10789 if(nodePtr.p->startedChkpt[0].tableId != tableId ||
10790 nodePtr.p->startedChkpt[0].fragId != fragId){
10791 jam();
10792 ndbrequire(outstanding > 1);
10793 ndbrequire(nodePtr.p->startedChkpt[1].tableId == tableId);
10794 ndbrequire(nodePtr.p->startedChkpt[1].fragId == fragId);
10795 } else {
10796 jam();
10797 nodePtr.p->startedChkpt[0] = nodePtr.p->startedChkpt[1];
10799 nodePtr.p->noOfStartedChkpt--;
10800 checkStartMoreLcp(signal, nodeId);
10804 bool
10805 Dbdih::checkLcpAllTablesDoneInLqh(){
10806 TabRecordPtr tabPtr;
10809 * Check if finished with all tables
10811 for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
10812 jam();
10813 ptrAss(tabPtr, tabRecord);
10814 if ((tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) &&
10815 (tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE)) {
10816 jam();
10818 * Nope, not finished with all tables
10820 return false;
10821 }//if
10822 }//for
10824 CRASH_INSERTION2(7026, isMaster());
10825 CRASH_INSERTION2(7017, !isMaster());
10827 c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, __LINE__);
10829 if (ERROR_INSERTED(7194))
10831 ndbout_c("CLEARING 7194");
10832 CLEAR_ERROR_INSERT_VALUE;
10835 return true;
10838 void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
10839 Fragmentstore* fragPtrP,
10840 Uint32 nodeId,
10841 bool old)
10843 replicaPtr.i = old ? fragPtrP->oldStoredReplicas : fragPtrP->storedReplicas;
10844 while(replicaPtr.i != RNIL){
10845 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
10846 if (replicaPtr.p->procNode == nodeId) {
10847 jam();
10848 return;
10849 } else {
10850 jam();
10851 replicaPtr.i = replicaPtr.p->nextReplica;
10852 }//if
10855 #ifdef VM_TRACE
10856 g_eventLogger.info("Fragment Replica(node=%d) not found", nodeId);
10857 replicaPtr.i = fragPtrP->oldStoredReplicas;
10858 while(replicaPtr.i != RNIL){
10859 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
10860 if (replicaPtr.p->procNode == nodeId) {
10861 jam();
10862 break;
10863 } else {
10864 jam();
10865 replicaPtr.i = replicaPtr.p->nextReplica;
10866 }//if
10868 if(replicaPtr.i != RNIL){
10869 g_eventLogger.info("...But was found in oldStoredReplicas");
10870 } else {
10871 g_eventLogger.info("...And wasn't found in oldStoredReplicas");
10873 #endif
10874 ndbrequire(false);
10875 }//Dbdih::findReplica()
10879 Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
10880 ReplicaRecordPtr replicaPtr)
10882 ndbrequire(!isMaster());
10883 Uint32 lcpNo = rep->lcpNo;
10884 Uint32 lcpId = rep->lcpId;
10886 warningEvent("Detected previous node failure of %d during lcp",
10887 rep->nodeId);
10888 replicaPtr.p->nextLcp = lcpNo;
10889 replicaPtr.p->lcpId[lcpNo] = 0;
10890 replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
10892 for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
10894 jam();
10895 if (replicaPtr.p->lcpStatus[i] == ZVALID &&
10896 replicaPtr.p->lcpId[i] >= lcpId)
10898 ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
10899 ndbrequire(false);
10903 return 0;
10907 * Return true if table is all fragment replicas have been checkpointed
10908 * to disk (in all LQHs)
10909 * false otherwise
10911 bool
10912 Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
10914 Uint32 lcpNo = lcpReport->lcpNo;
10915 Uint32 lcpId = lcpReport->lcpId;
10916 Uint32 maxGciStarted = lcpReport->maxGciStarted;
10917 Uint32 maxGciCompleted = lcpReport->maxGciCompleted;
10918 Uint32 tableId = lcpReport->tableId;
10919 Uint32 fragId = lcpReport->fragId;
10920 Uint32 nodeId = lcpReport->nodeId;
10922 TabRecordPtr tabPtr;
10923 tabPtr.i = tableId;
10924 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
10926 FragmentstorePtr fragPtr;
10927 getFragstore(tabPtr.p, fragId, fragPtr);
10929 ReplicaRecordPtr replicaPtr;
10930 findReplica(replicaPtr, fragPtr.p, nodeId);
10932 ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
10933 if(lcpNo != replicaPtr.p->nextLcp){
10934 if (handle_invalid_lcp_no(lcpReport, replicaPtr))
10936 g_eventLogger.error("lcpNo = %d replicaPtr.p->nextLcp = %d",
10937 lcpNo, replicaPtr.p->nextLcp);
10938 ndbrequire(false);
10941 ndbrequire(lcpNo == replicaPtr.p->nextLcp);
10942 ndbrequire(lcpNo < MAX_LCP_STORED);
10943 ndbrequire(replicaPtr.p->lcpId[lcpNo] != lcpId);
10945 replicaPtr.p->lcpIdStarted = lcpId;
10946 replicaPtr.p->lcpOngoingFlag = false;
10948 removeOldCrashedReplicas(replicaPtr);
10949 replicaPtr.p->lcpId[lcpNo] = lcpId;
10950 replicaPtr.p->lcpStatus[lcpNo] = ZVALID;
10951 replicaPtr.p->maxGciStarted[lcpNo] = maxGciStarted;
10952 gth(maxGciStarted + 1, 0);
10953 replicaPtr.p->maxGciCompleted[lcpNo] = maxGciCompleted;
10954 replicaPtr.p->nextLcp = nextLcpNo(replicaPtr.p->nextLcp);
10956 ndbrequire(fragPtr.p->noLcpReplicas > 0);
10957 fragPtr.p->noLcpReplicas --;
10959 if(fragPtr.p->noLcpReplicas > 0){
10960 jam();
10961 return false;
10964 for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
10965 jam();
10966 getFragstore(tabPtr.p, fid, fragPtr);
10967 if (fragPtr.p->noLcpReplicas > 0){
10968 jam();
10969 /* ----------------------------------------------------------------- */
10970 // Not all fragments in table have been checkpointed.
10971 /* ----------------------------------------------------------------- */
10972 if(0)
10973 g_eventLogger.info("reportLcpCompletion: fragment %d not ready", fid);
10974 return false;
10975 }//if
10976 }//for
10977 return true;
10978 }//Dbdih::reportLcpCompletion()
10980 void Dbdih::checkStartMoreLcp(Signal* signal, Uint32 nodeId)
10982 ndbrequire(isMaster());
10984 NodeRecordPtr nodePtr;
10985 nodePtr.i = nodeId;
10986 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
10988 ndbrequire(nodePtr.p->noOfStartedChkpt < 2);
10990 if (nodePtr.p->noOfQueuedChkpt > 0) {
10991 jam();
10992 nodePtr.p->noOfQueuedChkpt--;
10993 Uint32 i = nodePtr.p->noOfStartedChkpt;
10994 nodePtr.p->startedChkpt[i] = nodePtr.p->queuedChkpt[0];
10995 nodePtr.p->queuedChkpt[0] = nodePtr.p->queuedChkpt[1];
10996 //-------------------------------------------------------------------
10997 // We can send a LCP_FRAGORD to the node ordering it to perform a
10998 // local checkpoint on this fragment replica.
10999 //-------------------------------------------------------------------
11000 nodePtr.p->noOfStartedChkpt = i + 1;
11002 sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
11005 /* ----------------------------------------------------------------------- */
11006 // When there are no more outstanding LCP reports and there are no one queued
11007 // in at least one node, then we are ready to make sure all nodes have at
11008 // least two outstanding LCP requests per node and at least two queued for
11009 // sending.
11010 /* ----------------------------------------------------------------------- */
11011 startNextChkpt(signal);
11012 }//Dbdih::checkStartMoreLcp()
11014 void
11015 Dbdih::sendLCP_FRAG_ORD(Signal* signal,
11016 NodeRecord::FragmentCheckpointInfo info){
11018 ReplicaRecordPtr replicaPtr;
11019 replicaPtr.i = info.replicaPtr;
11020 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
11022 BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode);
11024 if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId())
11026 return;
11029 LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
11030 lcpFragOrd->tableId = info.tableId;
11031 lcpFragOrd->fragmentId = info.fragId;
11032 lcpFragOrd->lcpId = SYSFILE->latestLCP_ID;
11033 lcpFragOrd->lcpNo = replicaPtr.p->nextLcp;
11034 lcpFragOrd->keepGci = c_lcpState.keepGci;
11035 lcpFragOrd->lastFragmentFlag = false;
11036 sendSignal(ref, GSN_LCP_FRAG_ORD, signal, LcpFragOrd::SignalLength, JBB);
11039 void Dbdih::checkLcpCompletedLab(Signal* signal)
11041 if(c_lcpState.lcpStatus < LCP_TAB_COMPLETED){
11042 jam();
11043 return;
11046 TabRecordPtr tabPtr;
11047 for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
11048 jam();
11049 ptrAss(tabPtr, tabRecord);
11050 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
11051 if (tabPtr.p->tabLcpStatus != TabRecord::TLS_COMPLETED) {
11052 jam();
11053 return;
11054 }//if
11055 }//if
11056 }//for
11058 CRASH_INSERTION2(7027, isMaster());
11059 CRASH_INSERTION2(7018, !isMaster());
11061 if(c_lcpState.lcpStatus == LCP_TAB_COMPLETED){
11063 * We'r done
11065 c_lcpState.setLcpStatus(LCP_TAB_SAVED, __LINE__);
11066 sendLCP_COMPLETE_REP(signal);
11067 return;
11070 ndbrequire(c_lcpState.lcpStatus == LCP_TAB_SAVED);
11071 allNodesLcpCompletedLab(signal);
11072 return;
11073 }//Dbdih::checkLcpCompletedLab()
11075 void
11076 Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
11077 jam();
11078 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
11079 rep->nodeId = getOwnNodeId();
11080 rep->lcpId = SYSFILE->latestLCP_ID;
11081 rep->blockNo = DBDIH;
11083 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
11084 LcpCompleteRep::SignalLength, JBB);
11087 * Say that an initial node restart does not need to be redone
11088 * once node has been part of first LCP
11090 if (c_set_initial_start_flag &&
11091 c_lcpState.m_participatingLQH.get(getOwnNodeId()))
11093 jam();
11094 c_set_initial_start_flag = FALSE;
11098 /*-------------------------------------------------------------------------- */
11099 /* COMP_LCP_ROUND A LQH HAS COMPLETED A LOCAL CHECKPOINT */
11100 /*------------------------------------------------------------------------- */
11101 void Dbdih::execLCP_COMPLETE_REP(Signal* signal)
11103 jamEntry();
11105 CRASH_INSERTION(7191);
11107 #if 0
11108 g_eventLogger.info("LCP_COMPLETE_REP");
11109 printLCP_COMPLETE_REP(stdout,
11110 signal->getDataPtr(),
11111 signal->length(), number());
11112 #endif
11114 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtr();
11115 Uint32 lcpId = rep->lcpId;
11116 Uint32 nodeId = rep->nodeId;
11117 Uint32 blockNo = rep->blockNo;
11119 if(c_lcpMasterTakeOverState.state > LMTOS_WAIT_LCP_FRAG_REP){
11120 jam();
11122 * Don't allow LCP_COMPLETE_REP to arrive during
11123 * LCP master take over
11125 ndbrequire(isMaster());
11126 ndbrequire(blockNo == DBDIH);
11127 sendSignalWithDelay(reference(), GSN_LCP_COMPLETE_REP, signal, 100,
11128 signal->length());
11129 return;
11132 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
11134 switch(blockNo){
11135 case DBLQH:
11136 jam();
11137 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.clearWaitingFor(nodeId);
11138 ndbrequire(!c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId));
11139 break;
11140 case DBDIH:
11141 jam();
11142 ndbrequire(isMaster());
11143 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor(nodeId);
11144 break;
11145 case 0:
11146 jam();
11147 ndbrequire(!isMaster());
11148 ndbrequire(c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false);
11149 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = true;
11150 break;
11151 default:
11152 ndbrequire(false);
11154 ndbrequire(lcpId == SYSFILE->latestLCP_ID);
11156 allNodesLcpCompletedLab(signal);
11157 return;
11160 void Dbdih::allNodesLcpCompletedLab(Signal* signal)
11162 jam();
11164 if (c_lcpState.lcpStatus != LCP_TAB_SAVED) {
11165 jam();
11167 * We have not sent LCP_COMPLETE_REP to master DIH yet
11169 return;
11170 }//if
11172 if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.done()){
11173 jam();
11174 return;
11177 if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.done()){
11178 jam();
11179 return;
11182 if (!isMaster() &&
11183 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false){
11184 jam();
11186 * Wait until master DIH has signaled lcp is complete
11188 return;
11191 if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){
11192 jam();
11193 #ifdef VM_TRACE
11194 g_eventLogger.info("Exiting from allNodesLcpCompletedLab");
11195 #endif
11196 return;
11200 /*------------------------------------------------------------------------ */
11201 /* WE HAVE NOW COMPLETED A LOCAL CHECKPOINT. WE ARE NOW READY TO WAIT */
11202 /* FOR THE NEXT LOCAL CHECKPOINT. SEND WITHOUT TIME-OUT SINCE IT MIGHT */
11203 /* BE TIME TO START THE NEXT LOCAL CHECKPOINT IMMEDIATELY. */
11204 /* CLEAR BIT 3 OF SYSTEM RESTART BITS TO INDICATE THAT THERE IS NO */
11205 /* LOCAL CHECKPOINT ONGOING. THIS WILL BE WRITTEN AT SOME LATER TIME */
11206 /* DURING A GLOBAL CHECKPOINT. IT IS NOT NECESSARY TO WRITE IT */
11207 /* IMMEDIATELY. WE WILL ALSO CLEAR BIT 2 OF SYSTEM RESTART BITS IF ALL */
11208 /* CURRENTLY ACTIVE NODES COMPLETED THE LOCAL CHECKPOINT. */
11209 /*------------------------------------------------------------------------ */
11210 CRASH_INSERTION(7019);
11211 signal->setTrace(0);
11213 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
11214 setLcpActiveStatusEnd();
11215 Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
11217 if(!isMaster()){
11218 jam();
11220 * We're not master, be content
11222 return;
11225 // Send LCP_COMPLETE_REP to all other nodes
11226 // allowing them to set their lcpStatus to LCP_STATUS_IDLE
11227 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
11228 rep->nodeId = getOwnNodeId();
11229 rep->lcpId = SYSFILE->latestLCP_ID;
11230 rep->blockNo = 0; // 0 = Sent from master
11232 NodeRecordPtr nodePtr;
11233 nodePtr.i = cfirstAliveNode;
11234 do {
11235 jam();
11236 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
11237 if (nodePtr.i != cownNodeId){
11238 BlockReference ref = calcDihBlockRef(nodePtr.i);
11239 sendSignal(ref, GSN_LCP_COMPLETE_REP, signal,
11240 LcpCompleteRep::SignalLength, JBB);
11242 nodePtr.i = nodePtr.p->nextNode;
11243 } while (nodePtr.i != RNIL);
11246 jam();
11247 /***************************************************************************/
11248 // Report the event that a local checkpoint has completed.
11249 /***************************************************************************/
11250 signal->theData[0] = NDB_LE_LocalCheckpointCompleted; //Event type
11251 signal->theData[1] = SYSFILE->latestLCP_ID;
11252 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
11254 if (c_newest_restorable_gci > c_lcpState.lcpStopGcp)
11256 jam();
11257 c_lcpState.lcpStopGcp = c_newest_restorable_gci;
11261 * Start checking for next LCP
11263 checkLcpStart(signal, __LINE__);
11265 if (cwaitLcpSr == true) {
11266 jam();
11267 cwaitLcpSr = false;
11268 ndbsttorry10Lab(signal, __LINE__);
11269 return;
11270 }//if
11272 if (c_nodeStartMaster.blockLcp == true) {
11273 jam();
11274 lcpBlockedLab(signal);
11275 return;
11276 }//if
11277 return;
11278 }//Dbdih::allNodesLcpCompletedLab()
11280 /******************************************************************************/
11281 /* ********** TABLE UPDATE MODULE *************/
11282 /* ****************************************************************************/
11283 /* ------------------------------------------------------------------------- */
11284 /* THIS MODULE IS USED TO UPDATE THE TABLE DESCRIPTION. IT STARTS BY */
11285 /* CREATING THE FIRST TABLE FILE, THEN UPDATES THIS FILE AND CLOSES IT.*/
11286 /* AFTER THAT THE SAME HAPPENS WITH THE SECOND FILE. AFTER THAT THE */
11287 /* TABLE DISTRIBUTION HAS BEEN UPDATED. */
11288 /* */
11289 /* THE REASON FOR CREATING THE FILE AND NOT OPENING IT IS TO ENSURE */
11290 /* THAT WE DO NOT GET A MIX OF OLD AND NEW INFORMATION IN THE FILE IN */
11291 /* ERROR SITUATIONS. */
11292 /* ------------------------------------------------------------------------- */
11293 void Dbdih::tableUpdateLab(Signal* signal, TabRecordPtr tabPtr) {
11294 FileRecordPtr filePtr;
11295 if(tabPtr.p->tabStorage == TabRecord::ST_TEMPORARY) {
11296 // For temporary tables we do not write to disk. Mark both copies 0 and 1
11297 // as done, and go straight to the after-close code.
11298 filePtr.i = tabPtr.p->tabFile[1];
11299 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11300 tableCloseLab(signal, filePtr);
11301 return;
11303 filePtr.i = tabPtr.p->tabFile[0];
11304 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11305 createFileRw(signal, filePtr);
11306 filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
11307 return;
11308 }//Dbdih::tableUpdateLab()
11310 void Dbdih::tableCreateLab(Signal* signal, FileRecordPtr filePtr)
11312 TabRecordPtr tabPtr;
11313 tabPtr.i = filePtr.p->tabRef;
11314 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11315 writeTabfile(signal, tabPtr.p, filePtr);
11316 filePtr.p->reqStatus = FileRecord::TABLE_WRITE;
11317 return;
11318 }//Dbdih::tableCreateLab()
11320 void Dbdih::tableWriteLab(Signal* signal, FileRecordPtr filePtr)
11322 closeFile(signal, filePtr);
11323 filePtr.p->reqStatus = FileRecord::TABLE_CLOSE;
11324 return;
11325 }//Dbdih::tableWriteLab()
11327 void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr)
11329 TabRecordPtr tabPtr;
11330 tabPtr.i = filePtr.p->tabRef;
11331 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11332 if (filePtr.i == tabPtr.p->tabFile[0]) {
11333 jam();
11334 filePtr.i = tabPtr.p->tabFile[1];
11335 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11336 createFileRw(signal, filePtr);
11337 filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
11338 return;
11339 }//if
11340 switch (tabPtr.p->tabUpdateState) {
11341 case TabRecord::US_LOCAL_CHECKPOINT:
11342 jam();
11343 releaseTabPages(tabPtr.i);
11344 signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
11345 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
11347 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11348 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11349 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
11350 return;
11351 break;
11352 case TabRecord::US_REMOVE_NODE:
11353 jam();
11354 releaseTabPages(tabPtr.i);
11355 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
11356 jam();
11357 FragmentstorePtr fragPtr;
11358 getFragstore(tabPtr.p, fragId, fragPtr);
11359 updateNodeInfo(fragPtr);
11360 }//for
11361 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11362 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11363 if (tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE) {
11364 jam();
11365 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
11366 signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
11367 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
11368 }//if
11369 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
11370 signal->theData[1] = tabPtr.p->tabRemoveNode;
11371 signal->theData[2] = tabPtr.i + 1;
11372 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11373 return;
11374 break;
11375 case TabRecord::US_INVALIDATE_NODE_LCP:
11376 jam();
11377 releaseTabPages(tabPtr.i);
11378 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11379 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11381 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
11382 signal->theData[1] = tabPtr.p->tabRemoveNode;
11383 signal->theData[2] = tabPtr.i + 1;
11384 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11385 return;
11386 case TabRecord::US_COPY_TAB_REQ:
11387 jam();
11388 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11389 copyTabReq_complete(signal, tabPtr);
11390 return;
11391 break;
11392 case TabRecord::US_ADD_TABLE_MASTER:
11393 jam();
11394 releaseTabPages(tabPtr.i);
11395 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11396 signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_MASTER;
11397 signal->theData[1] = tabPtr.i;
11398 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11399 return;
11400 break;
11401 case TabRecord::US_ADD_TABLE_SLAVE:
11402 jam();
11403 releaseTabPages(tabPtr.i);
11404 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
11405 signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_SLAVE;
11406 signal->theData[1] = tabPtr.i;
11407 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11408 return;
11409 break;
11410 default:
11411 ndbrequire(false);
11412 return;
11413 break;
11414 }//switch
11415 }//Dbdih::tableCloseLab()
11418 * GCP stop detected,
11419 * send SYSTEM_ERROR to all other alive nodes
11421 void Dbdih::crashSystemAtGcpStop(Signal* signal, bool local)
11423 if (local)
11424 goto dolocal;
11426 switch(cgcpStatus){
11427 case GCP_PREPARE_SENT:
11429 jam();
11431 * We're waiting for a GCP PREPARE CONF
11433 infoEvent("Detected GCP stop(%d)...sending kill to %s",
11434 cgcpStatus, c_GCP_PREPARE_Counter.getText());
11435 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
11436 cgcpStatus, c_GCP_PREPARE_Counter.getText());
11439 NodeReceiverGroup rg(DBDIH, c_GCP_PREPARE_Counter);
11440 signal->theData[0] = 7022;
11441 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
11445 NodeReceiverGroup rg(NDBCNTR, c_GCP_PREPARE_Counter);
11446 SystemError * const sysErr = (SystemError*)&signal->theData[0];
11447 sysErr->errorCode = SystemError::GCPStopDetected;
11448 sysErr->errorRef = reference();
11449 sysErr->data1 = cgcpStatus;
11450 sysErr->data2 = cgcpOrderBlocked;
11451 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
11452 SystemError::SignalLength, JBA);
11454 ndbrequire(!c_GCP_PREPARE_Counter.done());
11455 return;
11457 case GCP_COMMIT_SENT:
11459 jam();
11461 * We're waiting for a GCP_NODEFINISH
11463 infoEvent("Detected GCP stop(%d)...sending kill to %s",
11464 cgcpStatus, c_GCP_COMMIT_Counter.getText());
11465 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
11466 cgcpStatus, c_GCP_COMMIT_Counter.getText());
11469 NodeReceiverGroup rg(DBDIH, c_GCP_COMMIT_Counter);
11470 signal->theData[0] = 7022;
11471 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
11475 NodeReceiverGroup rg(NDBCNTR, c_GCP_COMMIT_Counter);
11476 SystemError * const sysErr = (SystemError*)&signal->theData[0];
11477 sysErr->errorCode = SystemError::GCPStopDetected;
11478 sysErr->errorRef = reference();
11479 sysErr->data1 = cgcpStatus;
11480 sysErr->data2 = cgcpOrderBlocked;
11481 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
11482 SystemError::SignalLength, JBA);
11484 ndbrequire(!c_GCP_COMMIT_Counter.done());
11485 return;
11487 case GCP_NODE_FINISHED:
11489 jam();
11491 * We're waiting for a GCP save conf
11493 NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter);
11494 signal->theData[0] = 2305;
11495 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
11497 infoEvent("Detected GCP stop(%d)...sending kill to %s",
11498 cgcpStatus, c_GCP_SAVEREQ_Counter.getText());
11499 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
11500 cgcpStatus, c_GCP_SAVEREQ_Counter.getText());
11501 ndbrequire(!c_GCP_SAVEREQ_Counter.done());
11502 return;
11504 case GCP_SAVE_LQH_FINISHED:
11506 jam();
11508 * We're waiting for a COPY_GCICONF
11510 infoEvent("Detected GCP stop(%d)...sending kill to %s",
11511 cgcpStatus, c_COPY_GCIREQ_Counter.getText());
11512 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
11513 cgcpStatus, c_COPY_GCIREQ_Counter.getText());
11516 NodeReceiverGroup rg(DBDIH, c_COPY_GCIREQ_Counter);
11517 signal->theData[0] = 7022;
11518 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
11522 NodeReceiverGroup rg(NDBCNTR, c_COPY_GCIREQ_Counter);
11523 SystemError * const sysErr = (SystemError*)&signal->theData[0];
11524 sysErr->errorCode = SystemError::GCPStopDetected;
11525 sysErr->errorRef = reference();
11526 sysErr->data1 = cgcpStatus;
11527 sysErr->data2 = cgcpOrderBlocked;
11528 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
11529 SystemError::SignalLength, JBA);
11531 ndbrequire(!c_COPY_GCIREQ_Counter.done());
11532 return;
11534 case GCP_READY: (void)1;
11537 dolocal:
11538 ndbout_c("m_copyReason: %d m_waiting: %d",
11539 c_copyGCIMaster.m_copyReason,
11540 c_copyGCIMaster.m_waiting);
11542 ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
11543 c_copyGCISlave.m_senderData,
11544 c_copyGCISlave.m_senderRef,
11545 c_copyGCISlave.m_copyReason,
11546 c_copyGCISlave.m_expectedNextWord);
11548 FileRecordPtr file0Ptr;
11549 file0Ptr.i = crestartInfoFile[0];
11550 ptrCheckGuard(file0Ptr, cfileFileSize, fileRecord);
11551 FileRecordPtr file1Ptr;
11552 file1Ptr.i = crestartInfoFile[1];
11553 ptrCheckGuard(file1Ptr, cfileFileSize, fileRecord);
11555 ndbout_c("file[0] status: %d type: %d reqStatus: %d file1: %d %d %d",
11556 file0Ptr.p->fileStatus, file0Ptr.p->fileType, file0Ptr.p->reqStatus,
11557 file1Ptr.p->fileStatus, file1Ptr.p->fileType, file1Ptr.p->reqStatus
11560 signal->theData[0] = 404;
11561 signal->theData[1] = file0Ptr.p->fileRef;
11562 EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
11564 signal->theData[0] = 404;
11565 signal->theData[1] = file1Ptr.p->fileRef;
11566 EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
11568 ndbout_c("c_COPY_GCIREQ_Counter = %s",
11569 c_COPY_GCIREQ_Counter.getText());
11570 ndbout_c("c_COPY_TABREQ_Counter = %s",
11571 c_COPY_TABREQ_Counter.getText());
11572 ndbout_c("c_CREATE_FRAGREQ_Counter = %s",
11573 c_CREATE_FRAGREQ_Counter.getText());
11574 ndbout_c("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
11575 c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
11576 ndbout_c("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
11577 ndbout_c("c_END_TOREQ_Counter = %s", c_END_TOREQ_Counter.getText());
11578 ndbout_c("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
11579 ndbout_c("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
11580 ndbout_c("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
11581 ndbout_c("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
11582 ndbout_c("c_MASTER_GCPREQ_Counter = %s",
11583 c_MASTER_GCPREQ_Counter.getText());
11584 ndbout_c("c_MASTER_LCPREQ_Counter = %s",
11585 c_MASTER_LCPREQ_Counter.getText());
11586 ndbout_c("c_START_INFOREQ_Counter = %s",
11587 c_START_INFOREQ_Counter.getText());
11588 ndbout_c("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
11589 ndbout_c("c_START_TOREQ_Counter = %s", c_START_TOREQ_Counter.getText());
11590 ndbout_c("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
11591 ndbout_c("c_TC_CLOPSIZEREQ_Counter = %s",
11592 c_TC_CLOPSIZEREQ_Counter.getText());
11593 ndbout_c("c_TCGETOPSIZEREQ_Counter = %s",
11594 c_TCGETOPSIZEREQ_Counter.getText());
11595 ndbout_c("c_UPDATE_TOREQ_Counter = %s", c_UPDATE_TOREQ_Counter.getText());
11597 if (local == false)
11599 jam();
11600 NodeRecordPtr nodePtr;
11601 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
11602 jam();
11603 ptrAss(nodePtr, nodeRecord);
11604 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
11605 jam();
11606 const BlockReference ref =
11607 numberToRef(refToBlock(cntrlblockref), nodePtr.i);
11608 SystemError * const sysErr = (SystemError*)&signal->theData[0];
11609 sysErr->errorCode = SystemError::GCPStopDetected;
11610 sysErr->errorRef = reference();
11611 sysErr->data1 = cgcpStatus;
11612 sysErr->data2 = cgcpOrderBlocked;
11613 sendSignal(ref, GSN_SYSTEM_ERROR, signal,
11614 SystemError::SignalLength, JBA);
11615 }//if
11616 }//for
11618 else
11620 jam();
11621 SystemError * const sysErr = (SystemError*)&signal->theData[0];
11622 sysErr->errorCode = SystemError::GCPStopDetected;
11623 sysErr->errorRef = reference();
11624 sysErr->data1 = cgcpStatus;
11625 sysErr->data2 = cgcpOrderBlocked;
11626 EXECUTE_DIRECT(NDBCNTR, GSN_SYSTEM_ERROR,
11627 signal, SystemError::SignalLength);
11628 ndbrequire(false);
11630 return;
11631 }//Dbdih::crashSystemAtGcpStop()
11633 /*************************************************************************/
11634 /* */
11635 /* MODULE: ALLOCPAGE */
11636 /* DESCRIPTION: THE SUBROUTINE IS CALLED WITH POINTER TO PAGE */
11637 /* RECORD. A PAGE RECORD IS TAKEN FROM */
11638 /* THE FREE PAGE LIST */
11639 /*************************************************************************/
11640 void Dbdih::allocpage(PageRecordPtr& pagePtr)
11642 ndbrequire(cfirstfreepage != RNIL);
11643 pagePtr.i = cfirstfreepage;
11644 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11645 cfirstfreepage = pagePtr.p->nextfreepage;
11646 pagePtr.p->nextfreepage = RNIL;
11647 }//Dbdih::allocpage()
11649 /*************************************************************************/
11650 /* */
11651 /* MODULE: ALLOC_STORED_REPLICA */
11652 /* DESCRIPTION: THE SUBROUTINE IS CALLED TO GET A REPLICA RECORD, */
11653 /* TO INITIALISE IT AND TO LINK IT INTO THE FRAGMENT */
11654 /* STORE RECORD. USED FOR STORED REPLICAS. */
11655 /*************************************************************************/
11656 void Dbdih::allocStoredReplica(FragmentstorePtr fragPtr,
11657 ReplicaRecordPtr& newReplicaPtr,
11658 Uint32 nodeId)
11660 Uint32 i;
11661 ReplicaRecordPtr arrReplicaPtr;
11662 ReplicaRecordPtr arrPrevReplicaPtr;
11664 seizeReplicaRec(newReplicaPtr);
11665 for (i = 0; i < MAX_LCP_STORED; i++) {
11666 newReplicaPtr.p->maxGciCompleted[i] = 0;
11667 newReplicaPtr.p->maxGciStarted[i] = 0;
11668 newReplicaPtr.p->lcpId[i] = 0;
11669 newReplicaPtr.p->lcpStatus[i] = ZINVALID;
11670 }//for
11671 newReplicaPtr.p->noCrashedReplicas = 0;
11672 newReplicaPtr.p->initialGci = currentgcp;
11673 for (i = 0; i < 8; i++) {
11674 newReplicaPtr.p->replicaLastGci[i] = (Uint32)-1;
11675 newReplicaPtr.p->createGci[i] = 0;
11676 }//for
11677 newReplicaPtr.p->createGci[0] = currentgcp;
11678 ndbrequire(currentgcp != 0xF1F1F1F1);
11679 newReplicaPtr.p->nextLcp = 0;
11680 newReplicaPtr.p->procNode = nodeId;
11681 newReplicaPtr.p->lcpOngoingFlag = false;
11682 newReplicaPtr.p->lcpIdStarted = 0;
11684 arrPrevReplicaPtr.i = RNIL;
11685 arrReplicaPtr.i = fragPtr.p->storedReplicas;
11686 while (arrReplicaPtr.i != RNIL) {
11687 jam();
11688 ptrCheckGuard(arrReplicaPtr, creplicaFileSize, replicaRecord);
11689 arrPrevReplicaPtr = arrReplicaPtr;
11690 arrReplicaPtr.i = arrReplicaPtr.p->nextReplica;
11691 }//while
11692 if (arrPrevReplicaPtr.i == RNIL) {
11693 jam();
11694 fragPtr.p->storedReplicas = newReplicaPtr.i;
11695 } else {
11696 jam();
11697 arrPrevReplicaPtr.p->nextReplica = newReplicaPtr.i;
11698 }//if
11699 fragPtr.p->noStoredReplicas++;
11700 }//Dbdih::allocStoredReplica()
11702 /*************************************************************************/
11703 /* CALCULATE HOW MANY HOT SPARES THAT ARE TO BE ASSIGNED IN THIS SYSTEM */
11704 /*************************************************************************/
11705 void Dbdih::calculateHotSpare()
11707 Uint32 tchsTmp;
11708 Uint32 tchsNoNodes;
11710 switch (cnoReplicas) {
11711 case 1:
11712 jam();
11713 cnoHotSpare = 0;
11714 break;
11715 case 2:
11716 case 3:
11717 case 4:
11718 jam();
11719 if (csystemnodes > cnoReplicas) {
11720 jam();
11721 /* --------------------------------------------------------------------- */
11722 /* WITH MORE NODES THAN REPLICAS WE WILL ALWAYS USE AT LEAST ONE HOT */
11723 /* SPARE IF THAT HAVE BEEN REQUESTED BY THE CONFIGURATION FILE. THE */
11724 /* NUMBER OF NODES TO BE USED FOR NORMAL OPERATION IS ALWAYS */
11725 /* A MULTIPLE OF THE NUMBER OF REPLICAS SINCE WE WILL ORGANISE NODES */
11726 /* INTO NODE GROUPS. THE REMAINING NODES WILL BE HOT SPARE NODES. */
11727 /* --------------------------------------------------------------------- */
11728 if ((csystemnodes - cnoReplicas) >= cminHotSpareNodes) {
11729 jam();
11730 /* --------------------------------------------------------------------- */
11731 // We set the minimum number of hot spares according to users request
11732 // through the configuration file.
11733 /* --------------------------------------------------------------------- */
11734 tchsNoNodes = csystemnodes - cminHotSpareNodes;
11735 cnoHotSpare = cminHotSpareNodes;
11736 } else if (cminHotSpareNodes > 0) {
11737 jam();
11738 /* --------------------------------------------------------------------- */
11739 // The user requested at least one hot spare node and we will support him
11740 // in that.
11741 /* --------------------------------------------------------------------- */
11742 tchsNoNodes = csystemnodes - 1;
11743 cnoHotSpare = 1;
11744 } else {
11745 jam();
11746 /* --------------------------------------------------------------------- */
11747 // The user did not request any hot spare nodes so in this case we will
11748 // only use hot spare nodes if the number of nodes is such that we cannot
11749 // use all nodes as normal nodes.
11750 /* --------------------------------------------------------------------- */
11751 tchsNoNodes = csystemnodes;
11752 cnoHotSpare = 0;
11753 }//if
11754 } else {
11755 jam();
11756 /* --------------------------------------------------------------------- */
11757 // We only have enough to support the replicas. We will not have any hot
11758 // spares.
11759 /* --------------------------------------------------------------------- */
11760 tchsNoNodes = csystemnodes;
11761 cnoHotSpare = 0;
11762 }//if
11763 tchsTmp = tchsNoNodes - (cnoReplicas * (tchsNoNodes / cnoReplicas));
11764 cnoHotSpare = cnoHotSpare + tchsTmp;
11765 break;
11766 default:
11767 jam();
11768 ndbrequire(false);
11769 break;
11770 }//switch
11771 }//Dbdih::calculateHotSpare()
11773 /*************************************************************************/
11774 /* CHECK IF THE NODE CRASH IS TO ESCALATE INTO A SYSTEM CRASH. WE COULD */
11775 /* DO THIS BECAUSE ALL REPLICAS OF SOME FRAGMENT ARE LOST. WE COULD ALSO */
11776 /* DO IT AFTER MANY NODE FAILURES THAT MAKE IT VERY DIFFICULT TO RESTORE */
11777 /* DATABASE AFTER A SYSTEM CRASH. IT MIGHT EVEN BE IMPOSSIBLE AND THIS */
11778 /* MUST BE AVOIDED EVEN MORE THAN AVOIDING SYSTEM CRASHES. */
11779 /*************************************************************************/
11780 void Dbdih::checkEscalation()
11782 Uint32 TnodeGroup[MAX_NDB_NODES];
11783 NodeRecordPtr nodePtr;
11784 Uint32 i;
11785 for (i = 0; i < MAX_NDB_NODES; i++) {
11786 TnodeGroup[i] = ZFALSE;
11787 }//for
11788 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
11789 jam();
11790 ptrAss(nodePtr, nodeRecord);
11791 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
11792 nodePtr.p->activeStatus == Sysfile::NS_Active){
11793 ndbrequire(nodePtr.p->nodeGroup < MAX_NDB_NODES);
11794 TnodeGroup[nodePtr.p->nodeGroup] = ZTRUE;
11797 for (i = 0; i < cnoOfNodeGroups; i++) {
11798 jam();
11799 if (TnodeGroup[i] == ZFALSE) {
11800 jam();
11801 progError(__LINE__, NDBD_EXIT_LOST_NODE_GROUP, "Lost node group");
11802 }//if
11803 }//for
11804 }//Dbdih::checkEscalation()
11806 /*************************************************************************/
11807 /* */
11808 /* MODULE: CHECK_KEEP_GCI */
11809 /* DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL */
11810 /* CHECKPOINT. */
11811 /*************************************************************************/
11812 void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*,
11813 Uint32 replicaStartIndex)
11815 ReplicaRecordPtr ckgReplicaPtr;
11816 ckgReplicaPtr.i = replicaStartIndex;
11817 while (ckgReplicaPtr.i != RNIL) {
11818 jam();
11819 ptrCheckGuard(ckgReplicaPtr, creplicaFileSize, replicaRecord);
11820 Uint32 keepGci;
11821 Uint32 oldestRestorableGci;
11822 findMinGci(ckgReplicaPtr, keepGci, oldestRestorableGci);
11823 if (keepGci < c_lcpState.keepGci) {
11824 jam();
11825 /* ------------------------------------------------------------------- */
11826 /* WE MUST KEEP LOG RECORDS SO THAT WE CAN USE ALL LOCAL CHECKPOINTS */
11827 /* THAT ARE AVAILABLE. THUS WE NEED TO CALCULATE THE MINIMUM OVER ALL */
11828 /* FRAGMENTS. */
11829 /* ------------------------------------------------------------------- */
11830 c_lcpState.keepGci = keepGci;
11831 }//if
11832 if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
11833 jam();
11834 c_lcpState.oldestRestorableGci = oldestRestorableGci;
11835 }//if
11836 ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
11837 }//while
11838 }//Dbdih::checkKeepGci()
11840 void Dbdih::closeFile(Signal* signal, FileRecordPtr filePtr)
11842 signal->theData[0] = filePtr.p->fileRef;
11843 signal->theData[1] = reference();
11844 signal->theData[2] = filePtr.i;
11845 signal->theData[3] = ZCLOSE_NO_DELETE;
11846 sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
11847 }//Dbdih::closeFile()
11849 void Dbdih::closeFileDelete(Signal* signal, FileRecordPtr filePtr)
11851 signal->theData[0] = filePtr.p->fileRef;
11852 signal->theData[1] = reference();
11853 signal->theData[2] = filePtr.i;
11854 signal->theData[3] = ZCLOSE_DELETE;
11855 sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
11856 }//Dbdih::closeFileDelete()
11858 void Dbdih::createFileRw(Signal* signal, FileRecordPtr filePtr)
11860 signal->theData[0] = reference();
11861 signal->theData[1] = filePtr.i;
11862 signal->theData[2] = filePtr.p->fileName[0];
11863 signal->theData[3] = filePtr.p->fileName[1];
11864 signal->theData[4] = filePtr.p->fileName[2];
11865 signal->theData[5] = filePtr.p->fileName[3];
11866 signal->theData[6] = ZCREATE_READ_WRITE;
11867 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
11868 }//Dbdih::createFileRw()
11870 void Dbdih::emptyverificbuffer(Signal* signal, bool aContinueB)
11872 if(cfirstVerifyQueue == RNIL){
11873 jam();
11874 return;
11875 }//if
11876 ApiConnectRecordPtr localApiConnectptr;
11877 if(getBlockCommit() == false){
11878 jam();
11879 ndbrequire(cverifyQueueCounter > 0);
11880 cverifyQueueCounter--;
11881 localApiConnectptr.i = cfirstVerifyQueue;
11882 ptrCheckGuard(localApiConnectptr, capiConnectFileSize, apiConnectRecord);
11883 ndbrequire(localApiConnectptr.p->apiGci <= currentgcp);
11884 cfirstVerifyQueue = localApiConnectptr.p->nextApi;
11885 if (cfirstVerifyQueue == RNIL) {
11886 jam();
11887 ndbrequire(cverifyQueueCounter == 0);
11888 clastVerifyQueue = RNIL;
11889 }//if
11890 signal->theData[0] = localApiConnectptr.i;
11891 signal->theData[1] = currentgcp;
11892 sendSignal(clocaltcblockref, GSN_DIVERIFYCONF, signal, 2, JBB);
11893 if (aContinueB == true) {
11894 jam();
11895 //-----------------------------------------------------------------------
11896 // This emptying happened as part of a take-out process by continueb signals.
11897 // This ensures that we will empty the queue eventually. We will also empty
11898 // one item every time we insert one item to ensure that the list doesn't
11899 // grow when it is not blocked.
11900 //-----------------------------------------------------------------------
11901 signal->theData[0] = DihContinueB::ZEMPTY_VERIFY_QUEUE;
11902 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
11903 }//if
11904 } else {
11905 jam();
11906 //-----------------------------------------------------------------------
11907 // We are blocked so it is no use in continuing the emptying of the
11908 // verify buffer. Whenever the block is removed the emptying will
11909 // restart.
11910 //-----------------------------------------------------------------------
11912 return;
11913 }//Dbdih::emptyverificbuffer()
11915 /*----------------------------------------------------------------*/
11916 /* FIND A FREE HOT SPARE IF AVAILABLE AND ALIVE. */
11917 /*----------------------------------------------------------------*/
11918 Uint32 Dbdih::findHotSpare()
11920 NodeRecordPtr nodePtr;
11921 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
11922 jam();
11923 ptrAss(nodePtr, nodeRecord);
11924 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
11925 if (nodePtr.p->activeStatus == Sysfile::NS_HotSpare) {
11926 jam();
11927 return nodePtr.i;
11928 }//if
11929 }//if
11930 }//for
11931 return RNIL;
11932 }//Dbdih::findHotSpare()
11934 /*************************************************************************/
11935 /* FIND THE NODES FROM WHICH WE CAN EXECUTE THE LOG TO RESTORE THE */
11936 /* DATA NODE IN A SYSTEM RESTART. */
11937 /*************************************************************************/
11938 bool Dbdih::findLogNodes(CreateReplicaRecord* createReplica,
11939 FragmentstorePtr fragPtr,
11940 Uint32 startGci,
11941 Uint32 stopGci)
11943 ConstPtr<ReplicaRecord> flnReplicaPtr;
11944 flnReplicaPtr.i = createReplica->replicaRec;
11945 ptrCheckGuard(flnReplicaPtr, creplicaFileSize, replicaRecord);
11946 /* --------------------------------------------------------------------- */
11947 /* WE START BY CHECKING IF THE DATA NODE CAN HANDLE THE LOG ALL BY */
11948 /* ITSELF. THIS IS THE DESIRED BEHAVIOUR. IF THIS IS NOT POSSIBLE */
11949 /* THEN WE SEARCH FOR THE BEST POSSIBLE NODES AMONG THE NODES THAT */
11950 /* ARE PART OF THIS SYSTEM RESTART. */
11951 /* THIS CAN ONLY BE HANDLED BY THE LAST CRASHED REPLICA. */
11952 /* The condition is that the replica was created before or at the */
11953 /* time of the starting gci, in addition it must have been alive */
11954 /* at the time of the stopping gci. This is checked by two */
11955 /* conditions, the first checks replicaLastGci and the second */
11956 /* checks that it is also smaller than the last gci the node was */
11957 /* involved in. This is necessary to check since createGci is set */
11958 /* Last + 1 and sometimes startGci = stopGci + 1 and in that case */
11959 /* it could happen that replicaLastGci is set to -1 with CreateGci */
11960 /* set to LastGci + 1. */
11961 /* --------------------------------------------------------------------- */
11962 arrGuard(flnReplicaPtr.p->noCrashedReplicas, 8);
11963 const Uint32 noCrashed = flnReplicaPtr.p->noCrashedReplicas;
11965 if (!(ERROR_INSERTED(7073) || ERROR_INSERTED(7074))&&
11966 (startGci >= flnReplicaPtr.p->createGci[noCrashed]) &&
11967 (stopGci <= flnReplicaPtr.p->replicaLastGci[noCrashed]) &&
11968 (stopGci <= SYSFILE->lastCompletedGCI[flnReplicaPtr.p->procNode])) {
11969 jam();
11970 /* --------------------------------------------------------------------- */
11971 /* WE FOUND ALL THE LOG RECORDS NEEDED IN THE DATA NODE. WE WILL */
11972 /* USE THOSE. */
11973 /* --------------------------------------------------------------------- */
11974 createReplica->noLogNodes = 1;
11975 createReplica->logStartGci[0] = startGci;
11976 createReplica->logStopGci[0] = stopGci;
11977 createReplica->logNodeId[0] = flnReplicaPtr.p->procNode;
11978 return true;
11979 }//if
11980 Uint32 logNode = 0;
11981 do {
11982 Uint32 fblStopGci;
11983 jam();
11984 if(!findBestLogNode(createReplica,
11985 fragPtr,
11986 startGci,
11987 stopGci,
11988 logNode,
11989 fblStopGci)){
11990 jam();
11991 return false;
11994 logNode++;
11995 if (fblStopGci >= stopGci) {
11996 jam();
11997 createReplica->noLogNodes = logNode;
11998 return true;
11999 }//if
12000 startGci = fblStopGci + 1;
12001 if (logNode >= 4) { // Why??
12002 jam();
12003 break;
12004 }//if
12005 } while (1);
12006 /* --------------------------------------------------------------------- */
12007 /* IT WAS NOT POSSIBLE TO RESTORE THE REPLICA. THIS CAN EITHER BE */
12008 /* BECAUSE OF LACKING NODES OR BECAUSE OF A REALLY SERIOUS PROBLEM.*/
12009 /* --------------------------------------------------------------------- */
12010 return false;
12011 }//Dbdih::findLogNodes()
12013 /*************************************************************************/
12014 /* FIND THE BEST POSSIBLE LOG NODE TO EXECUTE THE LOG AS SPECIFIED */
12015 /* BY THE INPUT PARAMETERS. WE SCAN THROUGH ALL ALIVE REPLICAS. */
12016 /* THIS MEANS STORED, OLD_STORED */
12017 /*************************************************************************/
12018 bool
12019 Dbdih::findBestLogNode(CreateReplicaRecord* createReplica,
12020 FragmentstorePtr fragPtr,
12021 Uint32 startGci,
12022 Uint32 stopGci,
12023 Uint32 logNode,
12024 Uint32& fblStopGci)
12026 ConstPtr<ReplicaRecord> fblFoundReplicaPtr;
12027 ConstPtr<ReplicaRecord> fblReplicaPtr;
12028 LINT_INIT(fblFoundReplicaPtr.p);
12030 /* --------------------------------------------------------------------- */
12031 /* WE START WITH ZERO AS FOUND TO ENSURE THAT FIRST HIT WILL BE */
12032 /* BETTER. */
12033 /* --------------------------------------------------------------------- */
12034 fblStopGci = 0;
12035 fblReplicaPtr.i = fragPtr.p->storedReplicas;
12036 while (fblReplicaPtr.i != RNIL) {
12037 jam();
12038 ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
12039 if (checkNodeAlive(fblReplicaPtr.p->procNode)) {
12040 jam();
12041 Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
12042 if (fliStopGci > fblStopGci) {
12043 jam();
12044 fblStopGci = fliStopGci;
12045 fblFoundReplicaPtr = fblReplicaPtr;
12046 }//if
12047 }//if
12048 fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
12049 }//while
12050 fblReplicaPtr.i = fragPtr.p->oldStoredReplicas;
12051 while (fblReplicaPtr.i != RNIL) {
12052 jam();
12053 ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
12054 if (checkNodeAlive(fblReplicaPtr.p->procNode)) {
12055 jam();
12056 Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
12057 if (fliStopGci > fblStopGci) {
12058 jam();
12059 fblStopGci = fliStopGci;
12060 fblFoundReplicaPtr = fblReplicaPtr;
12061 }//if
12062 }//if
12063 fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
12064 }//while
12065 if (fblStopGci != 0) {
12066 jam();
12067 ndbrequire(logNode < MAX_LOG_EXEC);
12068 createReplica->logNodeId[logNode] = fblFoundReplicaPtr.p->procNode;
12069 createReplica->logStartGci[logNode] = startGci;
12070 if (fblStopGci >= stopGci) {
12071 jam();
12072 createReplica->logStopGci[logNode] = stopGci;
12073 } else {
12074 jam();
12075 createReplica->logStopGci[logNode] = fblStopGci;
12076 }//if
12077 }//if
12079 return fblStopGci != 0;
12080 }//Dbdih::findBestLogNode()
12082 Uint32 Dbdih::findLogInterval(ConstPtr<ReplicaRecord> replicaPtr,
12083 Uint32 startGci)
12085 ndbrequire(replicaPtr.p->noCrashedReplicas <= 8);
12086 Uint32 loopLimit = replicaPtr.p->noCrashedReplicas + 1;
12087 for (Uint32 i = 0; i < loopLimit; i++) {
12088 jam();
12089 if (replicaPtr.p->createGci[i] <= startGci) {
12090 if (replicaPtr.p->replicaLastGci[i] >= startGci) {
12091 jam();
12092 return replicaPtr.p->replicaLastGci[i];
12093 }//if
12094 }//if
12095 }//for
12096 return 0;
12097 }//Dbdih::findLogInterval()
12099 /*************************************************************************/
12100 /* */
12101 /* MODULE: FIND THE MINIMUM GCI THAT THIS NODE HAS LOG RECORDS FOR.*/
12102 /*************************************************************************/
12103 void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
12104 Uint32& keepGci,
12105 Uint32& oldestRestorableGci)
12107 Uint32 nextLcpNo;
12108 Uint32 lcpNo;
12109 for (Uint32 i = 0; i < MAX_LCP_STORED; i++) {
12110 jam();
12111 if ((fmgReplicaPtr.p->lcpStatus[i] == ZVALID) &&
12112 ((fmgReplicaPtr.p->lcpId[i] + MAX_LCP_STORED) <= (SYSFILE->latestLCP_ID + 1))) {
12113 jam();
12114 /*--------------------------------------------------------------------*/
12115 // We invalidate the checkpoint we are preparing to overwrite.
12116 // The LCP id is still the old lcp id,
12117 // this is the reason of comparing with lcpId + 1.
12118 /*---------------------------------------------------------------------*/
12119 fmgReplicaPtr.p->lcpStatus[i] = ZINVALID;
12120 }//if
12121 }//for
12122 keepGci = (Uint32)-1;
12123 oldestRestorableGci = 0;
12124 nextLcpNo = fmgReplicaPtr.p->nextLcp;
12125 lcpNo = fmgReplicaPtr.p->nextLcp;
12126 do {
12127 ndbrequire(lcpNo < MAX_LCP_STORED);
12128 if (fmgReplicaPtr.p->lcpStatus[lcpNo] == ZVALID)
12130 jam();
12131 keepGci = fmgReplicaPtr.p->maxGciCompleted[lcpNo];
12132 oldestRestorableGci = fmgReplicaPtr.p->maxGciStarted[lcpNo];
12133 ndbassert(fmgReplicaPtr.p->maxGciStarted[lcpNo] <c_newest_restorable_gci);
12134 return;
12135 } else {
12136 jam();
12137 if (fmgReplicaPtr.p->createGci[0] == fmgReplicaPtr.p->initialGci) {
12138 jam();
12139 /*-------------------------------------------------------------------
12140 * WE CAN STILL RESTORE THIS REPLICA WITHOUT ANY LOCAL CHECKPOINTS BY
12141 * ONLY USING THE LOG. IF THIS IS NOT POSSIBLE THEN WE REPORT THE LAST
12142 * VALID LOCAL CHECKPOINT AS THE MINIMUM GCI RECOVERABLE.
12143 *-----------------------------------------------------------------*/
12144 keepGci = fmgReplicaPtr.p->createGci[0];
12145 }//if
12146 }//if
12147 lcpNo = prevLcpNo(lcpNo);
12148 } while (lcpNo != nextLcpNo);
12149 return;
12150 }//Dbdih::findMinGci()
12152 bool Dbdih::findStartGci(ConstPtr<ReplicaRecord> replicaPtr,
12153 Uint32 stopGci,
12154 Uint32& startGci,
12155 Uint32& lcpNo)
12157 lcpNo = replicaPtr.p->nextLcp;
12158 const Uint32 startLcpNo = lcpNo;
12159 do {
12160 lcpNo = prevLcpNo(lcpNo);
12161 ndbrequire(lcpNo < MAX_LCP_STORED);
12162 if (replicaPtr.p->lcpStatus[lcpNo] == ZVALID) {
12163 if (replicaPtr.p->maxGciStarted[lcpNo] < stopGci) {
12164 jam();
12165 /* ----------------------------------------------------------------- */
12166 /* WE HAVE FOUND A USEFUL LOCAL CHECKPOINT THAT CAN BE USED FOR */
12167 /* RESTARTING THIS FRAGMENT REPLICA. */
12168 /* ----------------------------------------------------------------- */
12169 startGci = replicaPtr.p->maxGciCompleted[lcpNo] + 1;
12170 return true;
12173 } while (lcpNo != startLcpNo);
12174 /* --------------------------------------------------------------------- */
12175 /* NO VALID LOCAL CHECKPOINT WAS AVAILABLE. WE WILL ADD THE */
12176 /* FRAGMENT. THUS THE NEXT LCP MUST BE SET TO ZERO. */
12177 /* WE MUST EXECUTE THE LOG FROM THE INITIAL GLOBAL CHECKPOINT WHEN */
12178 /* THE TABLE WAS CREATED. */
12179 /* --------------------------------------------------------------------- */
12180 startGci = replicaPtr.p->initialGci;
12181 ndbrequire(replicaPtr.p->nextLcp == 0);
12182 return false;
12183 }//Dbdih::findStartGci()
12185 /**************************************************************************/
12186 /* ---------------------------------------------------------------------- */
12187 /* FIND A TAKE OVER REPLICA WHICH IS TO BE STARTED OR COMMITTED WHEN*/
12188 /* TAKING OVER A FAILED NODE. */
12189 /* ---------------------------------------------------------------------- */
12190 /*************************************************************************/
12191 void Dbdih::findToReplica(TakeOverRecord* regTakeOver,
12192 Uint32 replicaType,
12193 FragmentstorePtr fragPtr,
12194 ReplicaRecordPtr& ftrReplicaPtr)
12196 switch (replicaType) {
12197 case CreateFragReq::STORED:
12198 case CreateFragReq::COMMIT_STORED:
12199 /* ----------------------------------------------------------------------*/
12200 /* HERE WE SEARCH FOR STORED REPLICAS. THE REPLICA MUST BE STORED IN THE */
12201 /* SECTION FOR OLD STORED REPLICAS SINCE WE HAVE NOT TAKEN OVER YET. */
12202 /* ----------------------------------------------------------------------*/
12203 ftrReplicaPtr.i = fragPtr.p->oldStoredReplicas;
12204 while (ftrReplicaPtr.i != RNIL) {
12205 ptrCheckGuard(ftrReplicaPtr, creplicaFileSize, replicaRecord);
12206 if (ftrReplicaPtr.p->procNode == regTakeOver->toStartingNode) {
12207 jam();
12208 return;
12209 } else {
12210 if (ftrReplicaPtr.p->procNode == regTakeOver->toFailedNode) {
12211 jam();
12212 return;
12213 } else {
12214 jam();
12215 ftrReplicaPtr.i = ftrReplicaPtr.p->nextReplica;
12216 }//if
12217 }//if
12218 }//while
12219 break;
12220 default:
12221 ndbrequire(false);
12222 break;
12223 }//switch
12224 }//Dbdih::findToReplica()
12226 void Dbdih::initCommonData()
12228 c_blockCommit = false;
12229 c_blockCommitNo = 0;
12230 c_createFragmentLock = RNIL;
12231 c_endToLock = RNIL;
12232 cfailurenr = 1;
12233 cfirstAliveNode = RNIL;
12234 cfirstDeadNode = RNIL;
12235 cfirstVerifyQueue = RNIL;
12236 cgckptflag = false;
12237 cgcpDelay = 0;
12238 cgcpMasterTakeOverState = GMTOS_IDLE;
12239 cgcpOrderBlocked = 0;
12240 cgcpParticipantState = GCP_PARTICIPANT_READY;
12241 cgcpSameCounter = 0;
12242 cgcpStartCounter = 0;
12243 cgcpStatus = GCP_READY;
12245 clastVerifyQueue = RNIL;
12246 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
12248 c_lcpState.clcpDelay = 0;
12249 c_lcpState.lcpStart = ZIDLE;
12250 c_lcpState.lcpStopGcp = 0;
12251 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
12252 c_lcpState.currentFragment.tableId = 0;
12253 c_lcpState.currentFragment.fragmentId = 0;
12254 c_lcpState.noOfLcpFragRepOutstanding = 0;
12255 c_lcpState.keepGci = 0;
12256 c_lcpState.oldestRestorableGci = 0;
12257 c_lcpState.ctcCounter = 0;
12258 c_lcpState.ctimer = 0;
12259 c_lcpState.immediateLcpStart = false;
12260 c_lcpState.m_MASTER_LCPREQ_Received = false;
12262 cmasterdihref = 0;
12263 cmasterNodeId = 0;
12264 cmasterState = MASTER_IDLE;
12265 cmasterTakeOverNode = 0;
12266 cnewgcp = 0;
12267 cnoHotSpare = 0;
12268 cnoOfActiveTables = 0;
12269 cnoOfNodeGroups = 0;
12270 c_nextNodeGroup = 0;
12271 cnoReplicas = 0;
12272 coldgcp = 0;
12273 coldGcpId = 0;
12274 coldGcpStatus = cgcpStatus;
12275 con_lineNodes = 0;
12276 creceivedfrag = 0;
12277 crestartGci = 0;
12278 crestartInfoFile[0] = RNIL;
12279 crestartInfoFile[1] = RNIL;
12280 cstartGcpNow = false;
12281 cstartPhase = 0;
12282 c_startToLock = RNIL;
12283 cstarttype = (Uint32)-1;
12284 csystemnodes = 0;
12285 c_updateToLock = RNIL;
12286 currentgcp = 0;
12287 c_newest_restorable_gci = 0;
12288 cverifyQueueCounter = 0;
12289 cwaitLcpSr = false;
12290 c_nextLogPart = 0;
12292 nodeResetStart();
12293 c_nodeStartMaster.wait = ZFALSE;
12295 memset(&sysfileData[0], 0, sizeof(sysfileData));
12297 const ndb_mgm_configuration_iterator * p =
12298 m_ctx.m_config.getOwnConfigIterator();
12299 ndbrequire(p != 0);
12301 c_lcpState.clcpDelay = 20;
12302 ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &c_lcpState.clcpDelay);
12303 c_lcpState.clcpDelay = c_lcpState.clcpDelay > 31 ? 31 : c_lcpState.clcpDelay;
12305 cminHotSpareNodes = 0;
12306 //ndb_mgm_get_int_parameter(p, CFG_DB_MIN_HOT_SPARES, &cminHotSpareNodes);
12307 cminHotSpareNodes = cminHotSpareNodes > 2 ? 2 : cminHotSpareNodes;
12309 cnoReplicas = 1;
12310 ndb_mgm_get_int_parameter(p, CFG_DB_NO_REPLICAS, &cnoReplicas);
12311 if (cnoReplicas > 4)
12313 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG,
12314 "Only up to four replicas are supported. Check NoOfReplicas.");
12317 cgcpDelay = 2000;
12318 ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay);
12319 cgcpDelay = cgcpDelay > 60000 ? 60000 : (cgcpDelay < 10 ? 10 : cgcpDelay);
12320 }//Dbdih::initCommonData()
12322 void Dbdih::initFragstore(FragmentstorePtr fragPtr)
12324 fragPtr.p->storedReplicas = RNIL;
12325 fragPtr.p->oldStoredReplicas = RNIL;
12327 fragPtr.p->noStoredReplicas = 0;
12328 fragPtr.p->noOldStoredReplicas = 0;
12329 fragPtr.p->fragReplicas = 0;
12330 fragPtr.p->preferredPrimary = 0;
12332 for (Uint32 i = 0; i < MAX_REPLICAS; i++)
12333 fragPtr.p->activeNodes[i] = 0;
12335 fragPtr.p->noLcpReplicas = 0;
12336 fragPtr.p->distributionKey = 0;
12337 }//Dbdih::initFragstore()
12339 /*************************************************************************/
12340 /* */
12341 /* MODULE: INIT_RESTART_INFO */
12342 /* DESCRIPTION: INITIATE RESTART INFO VARIABLE AND VARIABLES FOR */
12343 /* GLOBAL CHECKPOINTS. */
12344 /*************************************************************************/
12345 void Dbdih::initRestartInfo()
12347 Uint32 i;
12348 for (i = 0; i < MAX_NDB_NODES; i++) {
12349 SYSFILE->lastCompletedGCI[i] = 0;
12350 }//for
12351 NodeRecordPtr nodePtr;
12352 nodePtr.i = cfirstAliveNode;
12353 do {
12354 jam();
12355 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12356 SYSFILE->lastCompletedGCI[nodePtr.i] = 1;
12357 /* FIRST GCP = 1 ALREADY SET BY LQH */
12358 nodePtr.i = nodePtr.p->nextNode;
12359 } while (nodePtr.i != RNIL);
12360 coldgcp = 1;
12361 currentgcp = 2;
12362 cnewgcp = 2;
12363 crestartGci = 1;
12364 c_newest_restorable_gci = 1;
12366 SYSFILE->keepGCI = 1;
12367 SYSFILE->oldestRestorableGCI = 1;
12368 SYSFILE->newestRestorableGCI = 1;
12369 SYSFILE->systemRestartBits = 0;
12370 for (i = 0; i < NodeBitmask::Size; i++) {
12371 SYSFILE->lcpActive[0] = 0;
12372 }//for
12373 for (i = 0; i < Sysfile::TAKE_OVER_SIZE; i++) {
12374 SYSFILE->takeOver[i] = 0;
12375 }//for
12376 Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
12377 srand(time(0));
12378 globalData.m_restart_seq = SYSFILE->m_restart_seq = 0;
12379 }//Dbdih::initRestartInfo()
12381 /*--------------------------------------------------------------------*/
12382 /* NODE GROUP BITS ARE INITIALISED BEFORE THIS. */
12383 /* NODE ACTIVE BITS ARE INITIALISED BEFORE THIS. */
12384 /*--------------------------------------------------------------------*/
12385 /*************************************************************************/
12386 /* */
12387 /* MODULE: INIT_RESTORABLE_GCI_FILES */
12388 /* DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
12389 /* FILES THAT KEEP THE VARIABLE CRESTART_INFO */
12390 /*************************************************************************/
12391 void Dbdih::initRestorableGciFiles()
12393 Uint32 tirgTmp;
12394 FileRecordPtr filePtr;
12395 seizeFile(filePtr);
12396 filePtr.p->tabRef = RNIL;
12397 filePtr.p->fileType = FileRecord::GCP_FILE;
12398 filePtr.p->reqStatus = FileRecord::IDLE;
12399 filePtr.p->fileStatus = FileRecord::CLOSED;
12400 crestartInfoFile[0] = filePtr.i;
12401 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
12402 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
12403 filePtr.p->fileName[2] = (Uint32)-1; /* S PART IGNORED */
12404 tirgTmp = 1; /* FILE NAME VERSION 1 */
12405 tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE */
12406 tirgTmp = (tirgTmp << 8) + 1; /* D1 DIRECTORY */
12407 tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME */
12408 filePtr.p->fileName[3] = tirgTmp;
12409 /* --------------------------------------------------------------------- */
12410 /* THE NAME BECOMES /D1/DBDICT/S0.SYSFILE */
12411 /* --------------------------------------------------------------------- */
12412 seizeFile(filePtr);
12413 filePtr.p->tabRef = RNIL;
12414 filePtr.p->fileType = FileRecord::GCP_FILE;
12415 filePtr.p->reqStatus = FileRecord::IDLE;
12416 filePtr.p->fileStatus = FileRecord::CLOSED;
12417 crestartInfoFile[1] = filePtr.i;
12418 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
12419 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
12420 filePtr.p->fileName[2] = (Uint32)-1; /* S PART IGNORED */
12421 tirgTmp = 1; /* FILE NAME VERSION 1 */
12422 tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE */
12423 tirgTmp = (tirgTmp << 8) + 2; /* D1 DIRECTORY */
12424 tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME */
12425 filePtr.p->fileName[3] = tirgTmp;
12426 /* --------------------------------------------------------------------- */
12427 /* THE NAME BECOMES /D2/DBDICT/P0.SYSFILE */
12428 /* --------------------------------------------------------------------- */
12429 }//Dbdih::initRestorableGciFiles()
12431 void Dbdih::initTable(TabRecordPtr tabPtr)
12433 tabPtr.p->noOfFragChunks = 0;
12434 tabPtr.p->method = TabRecord::NOTDEFINED;
12435 tabPtr.p->tabStatus = TabRecord::TS_IDLE;
12436 tabPtr.p->noOfWords = 0;
12437 tabPtr.p->noPages = 0;
12438 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
12439 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
12440 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
12441 tabPtr.p->noOfBackups = 0;
12442 tabPtr.p->kvalue = 0;
12443 tabPtr.p->hashpointer = (Uint32)-1;
12444 tabPtr.p->mask = 0;
12445 tabPtr.p->tabStorage = TabRecord::ST_NORMAL;
12446 tabPtr.p->tabErrorCode = 0;
12447 tabPtr.p->schemaVersion = (Uint32)-1;
12448 tabPtr.p->tabRemoveNode = RNIL;
12449 tabPtr.p->totalfragments = (Uint32)-1;
12450 tabPtr.p->connectrec = RNIL;
12451 tabPtr.p->tabFile[0] = RNIL;
12452 tabPtr.p->tabFile[1] = RNIL;
12453 tabPtr.p->m_dropTab.tabUserRef = 0;
12454 tabPtr.p->m_dropTab.tabUserPtr = RNIL;
12455 Uint32 i;
12456 for (i = 0; i < MAX_NDB_NODES; i++) {
12457 tabPtr.p->startFid[i] = RNIL;
12458 }//for
12459 for (i = 0; i < 8; i++) {
12460 tabPtr.p->pageRef[i] = RNIL;
12461 }//for
12462 tabPtr.p->tableType = DictTabInfo::UndefTableType;
12463 }//Dbdih::initTable()
12465 /*************************************************************************/
12466 /* */
12467 /* MODULE: INIT_TABLE_FILES */
12468 /* DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
12469 /* FILES THAT KEEP THE TABLE FRAGMENTATION DESCRIPTION. */
12470 /*************************************************************************/
12471 void Dbdih::initTableFile(TabRecordPtr tabPtr)
12473 Uint32 titfTmp;
12474 FileRecordPtr filePtr;
12475 seizeFile(filePtr);
12476 filePtr.p->tabRef = tabPtr.i;
12477 filePtr.p->fileType = FileRecord::TABLE_FILE;
12478 filePtr.p->reqStatus = FileRecord::IDLE;
12479 filePtr.p->fileStatus = FileRecord::CLOSED;
12480 tabPtr.p->tabFile[0] = filePtr.i;
12481 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
12482 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
12483 filePtr.p->fileName[2] = tabPtr.i; /* Stid FILE NAME */
12484 titfTmp = 1; /* FILE NAME VERSION 1 */
12485 titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST */
12486 titfTmp = (titfTmp << 8) + 1; /* D1 DIRECTORY */
12487 titfTmp = (titfTmp << 8) + 255; /* P PART IGNORED */
12488 filePtr.p->fileName[3] = titfTmp;
12489 /* --------------------------------------------------------------------- */
12490 /* THE NAME BECOMES /D1/DBDICT/Stid.FRAGLIST */
12491 /* --------------------------------------------------------------------- */
12492 seizeFile(filePtr);
12493 filePtr.p->tabRef = tabPtr.i;
12494 filePtr.p->fileType = FileRecord::TABLE_FILE;
12495 filePtr.p->reqStatus = FileRecord::IDLE;
12496 filePtr.p->fileStatus = FileRecord::CLOSED;
12497 tabPtr.p->tabFile[1] = filePtr.i;
12498 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
12499 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
12500 filePtr.p->fileName[2] = tabPtr.i; /* Stid FILE NAME */
12501 titfTmp = 1; /* FILE NAME VERSION 1 */
12502 titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST */
12503 titfTmp = (titfTmp << 8) + 2; /* D2 DIRECTORY */
12504 titfTmp = (titfTmp << 8) + 255; /* P PART IGNORED */
12505 filePtr.p->fileName[3] = titfTmp;
12506 /* --------------------------------------------------------------------- */
12507 /* THE NAME BECOMES /D2/DBDICT/Stid.FRAGLIST */
12508 /* --------------------------------------------------------------------- */
12509 }//Dbdih::initTableFile()
12511 void Dbdih::initialiseRecordsLab(Signal* signal,
12512 Uint32 stepNo, Uint32 retRef, Uint32 retData)
12514 switch (stepNo) {
12515 case 0:
12516 jam();
12517 initCommonData();
12518 break;
12519 case 1:{
12520 ApiConnectRecordPtr apiConnectptr;
12521 jam();
12522 /******** INTIALIZING API CONNECT RECORDS ********/
12523 for (apiConnectptr.i = 0; apiConnectptr.i < capiConnectFileSize; apiConnectptr.i++) {
12524 refresh_watch_dog();
12525 ptrAss(apiConnectptr, apiConnectRecord);
12526 apiConnectptr.p->nextApi = RNIL;
12527 }//for
12528 jam();
12529 break;
12531 case 2:{
12532 ConnectRecordPtr connectPtr;
12533 jam();
12534 /****** CONNECT ******/
12535 for (connectPtr.i = 0; connectPtr.i < cconnectFileSize; connectPtr.i++) {
12536 refresh_watch_dog();
12537 ptrAss(connectPtr, connectRecord);
12538 connectPtr.p->userpointer = RNIL;
12539 connectPtr.p->userblockref = ZNIL;
12540 connectPtr.p->connectState = ConnectRecord::FREE;
12541 connectPtr.p->table = RNIL;
12542 connectPtr.p->nfConnect = connectPtr.i + 1;
12543 }//for
12544 connectPtr.i = cconnectFileSize - 1;
12545 ptrAss(connectPtr, connectRecord);
12546 connectPtr.p->nfConnect = RNIL;
12547 cfirstconnect = 0;
12548 break;
12550 case 3:
12552 FileRecordPtr filePtr;
12553 jam();
12554 /******** INTIALIZING FILE RECORDS ********/
12555 for (filePtr.i = 0; filePtr.i < cfileFileSize; filePtr.i++) {
12556 ptrAss(filePtr, fileRecord);
12557 filePtr.p->nextFile = filePtr.i + 1;
12558 filePtr.p->fileStatus = FileRecord::CLOSED;
12559 filePtr.p->reqStatus = FileRecord::IDLE;
12560 }//for
12561 filePtr.i = cfileFileSize - 1;
12562 ptrAss(filePtr, fileRecord);
12563 filePtr.p->nextFile = RNIL;
12564 cfirstfreeFile = 0;
12565 initRestorableGciFiles();
12566 break;
12568 case 4:
12569 jam();
12570 initialiseFragstore();
12571 break;
12572 case 5:
12574 jam();
12575 /******* NODE GROUP RECORD ******/
12576 /******* NODE RECORD ******/
12577 NodeGroupRecordPtr loopNGPtr;
12578 for (loopNGPtr.i = 0; loopNGPtr.i < MAX_NDB_NODES; loopNGPtr.i++) {
12579 ptrAss(loopNGPtr, nodeGroupRecord);
12580 loopNGPtr.p->nodesInGroup[0] = RNIL;
12581 loopNGPtr.p->nodesInGroup[1] = RNIL;
12582 loopNGPtr.p->nodesInGroup[2] = RNIL;
12583 loopNGPtr.p->nodesInGroup[3] = RNIL;
12584 loopNGPtr.p->nextReplicaNode = 0;
12585 loopNGPtr.p->nodeCount = 0;
12586 loopNGPtr.p->activeTakeOver = false;
12587 }//for
12588 NodeRecordPtr nodePtr;
12589 for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
12590 ptrAss(nodePtr, nodeRecord);
12591 new (nodePtr.p) NodeRecord();
12592 }//for
12593 break;
12595 case 6:
12597 PageRecordPtr pagePtr;
12598 jam();
12599 /******* PAGE RECORD ******/
12600 for (pagePtr.i = 0; pagePtr.i < cpageFileSize; pagePtr.i++) {
12601 refresh_watch_dog();
12602 ptrAss(pagePtr, pageRecord);
12603 pagePtr.p->nextfreepage = pagePtr.i + 1;
12604 }//for
12605 pagePtr.i = cpageFileSize - 1;
12606 ptrAss(pagePtr, pageRecord);
12607 pagePtr.p->nextfreepage = RNIL;
12608 cfirstfreepage = 0;
12609 break;
12611 case 7:
12613 ReplicaRecordPtr initReplicaPtr;
12614 jam();
12615 /******* REPLICA RECORD ******/
12616 for (initReplicaPtr.i = 0; initReplicaPtr.i < creplicaFileSize;
12617 initReplicaPtr.i++) {
12618 refresh_watch_dog();
12619 ptrAss(initReplicaPtr, replicaRecord);
12620 initReplicaPtr.p->lcpIdStarted = 0;
12621 initReplicaPtr.p->lcpOngoingFlag = false;
12622 initReplicaPtr.p->nextReplica = initReplicaPtr.i + 1;
12623 }//for
12624 initReplicaPtr.i = creplicaFileSize - 1;
12625 ptrAss(initReplicaPtr, replicaRecord);
12626 initReplicaPtr.p->nextReplica = RNIL;
12627 cnoFreeReplicaRec = creplicaFileSize;
12628 cfirstfreeReplica = 0;
12629 break;
12631 case 8:
12633 TabRecordPtr loopTabptr;
12634 jam();
12635 /********* TAB-DESCRIPTOR ********/
12636 for (loopTabptr.i = 0; loopTabptr.i < ctabFileSize; loopTabptr.i++) {
12637 ptrAss(loopTabptr, tabRecord);
12638 refresh_watch_dog();
12639 initTable(loopTabptr);
12640 }//for
12641 break;
12643 case 9:
12645 TakeOverRecordPtr takeOverPtr;
12646 jam();
12647 cfirstfreeTakeOver = RNIL;
12648 for (takeOverPtr.i = 0; takeOverPtr.i < MAX_NDB_NODES; takeOverPtr.i++) {
12649 ptrAss(takeOverPtr, takeOverRecord);
12650 initTakeOver(takeOverPtr);
12651 releaseTakeOver(takeOverPtr.i);
12652 }//for
12654 ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
12655 conf->senderRef = reference();
12656 conf->senderData = retData;
12657 sendSignal(retRef, GSN_READ_CONFIG_CONF, signal,
12658 ReadConfigConf::SignalLength, JBB);
12659 return;
12660 break;
12662 default:
12663 ndbrequire(false);
12664 break;
12665 }//switch
12666 jam();
12667 /* ---------------------------------------------------------------------- */
12668 /* SEND REAL-TIME BREAK DURING INIT OF VARIABLES DURING SYSTEM RESTART. */
12669 /* ---------------------------------------------------------------------- */
12670 signal->theData[0] = DihContinueB::ZINITIALISE_RECORDS;
12671 signal->theData[1] = stepNo + 1;
12672 signal->theData[2] = retRef;
12673 signal->theData[3] = retData;
12674 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
12675 }//Dbdih::initialiseRecordsLab()
12677 /*************************************************************************/
12678 /* INSERT THE NODE INTO THE LINKED LIST OF NODES INVOLVED ALL */
12679 /* DISTRIBUTED PROTOCOLS (EXCEPT GCP PROTOCOL THAT USES THE DIH */
12680 /* LINKED LIST INSTEAD). */
12681 /*************************************************************************/
12682 void Dbdih::insertAlive(NodeRecordPtr newNodePtr)
12684 NodeRecordPtr nodePtr;
12686 nodePtr.i = cfirstAliveNode;
12687 if (nodePtr.i == RNIL) {
12688 jam();
12689 cfirstAliveNode = newNodePtr.i;
12690 } else {
12691 do {
12692 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12693 if (nodePtr.p->nextNode == RNIL) {
12694 jam();
12695 nodePtr.p->nextNode = newNodePtr.i;
12696 break;
12697 } else {
12698 jam();
12699 nodePtr.i = nodePtr.p->nextNode;
12700 }//if
12701 } while (1);
12702 }//if
12703 newNodePtr.p->nextNode = RNIL;
12704 }//Dbdih::insertAlive()
12706 void Dbdih::insertBackup(FragmentstorePtr fragPtr, Uint32 nodeId)
12708 for (Uint32 i = fragPtr.p->fragReplicas; i > 1; i--) {
12709 jam();
12710 ndbrequire(i < MAX_REPLICAS && i > 0);
12711 fragPtr.p->activeNodes[i] = fragPtr.p->activeNodes[i - 1];
12712 }//for
12713 fragPtr.p->activeNodes[1] = nodeId;
12714 fragPtr.p->fragReplicas++;
12715 }//Dbdih::insertBackup()
12717 void Dbdih::insertDeadNode(NodeRecordPtr newNodePtr)
12719 NodeRecordPtr nodePtr;
12721 nodePtr.i = cfirstDeadNode;
12722 if (nodePtr.i == RNIL) {
12723 jam();
12724 cfirstDeadNode = newNodePtr.i;
12725 } else {
12726 do {
12727 jam();
12728 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12729 if (nodePtr.p->nextNode == RNIL) {
12730 jam();
12731 nodePtr.p->nextNode = newNodePtr.i;
12732 break;
12733 } else {
12734 jam();
12735 nodePtr.i = nodePtr.p->nextNode;
12736 }//if
12737 } while (1);
12738 }//if
12739 newNodePtr.p->nextNode = RNIL;
12740 }//Dbdih::insertDeadNode()
12742 void Dbdih::linkOldStoredReplica(FragmentstorePtr fragPtr,
12743 ReplicaRecordPtr replicatePtr)
12745 ReplicaRecordPtr losReplicaPtr;
12747 replicatePtr.p->nextReplica = RNIL;
12748 fragPtr.p->noOldStoredReplicas++;
12749 losReplicaPtr.i = fragPtr.p->oldStoredReplicas;
12750 if (losReplicaPtr.i == RNIL) {
12751 jam();
12752 fragPtr.p->oldStoredReplicas = replicatePtr.i;
12753 return;
12754 }//if
12755 ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
12756 while (losReplicaPtr.p->nextReplica != RNIL) {
12757 jam();
12758 losReplicaPtr.i = losReplicaPtr.p->nextReplica;
12759 ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
12760 }//if
12761 losReplicaPtr.p->nextReplica = replicatePtr.i;
12762 }//Dbdih::linkOldStoredReplica()
12764 void Dbdih::linkStoredReplica(FragmentstorePtr fragPtr,
12765 ReplicaRecordPtr replicatePtr)
12767 ReplicaRecordPtr lsrReplicaPtr;
12769 fragPtr.p->noStoredReplicas++;
12770 replicatePtr.p->nextReplica = RNIL;
12771 lsrReplicaPtr.i = fragPtr.p->storedReplicas;
12772 if (fragPtr.p->storedReplicas == RNIL) {
12773 jam();
12774 fragPtr.p->storedReplicas = replicatePtr.i;
12775 return;
12776 }//if
12777 ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
12778 while (lsrReplicaPtr.p->nextReplica != RNIL) {
12779 jam();
12780 lsrReplicaPtr.i = lsrReplicaPtr.p->nextReplica;
12781 ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
12782 }//if
12783 lsrReplicaPtr.p->nextReplica = replicatePtr.i;
12784 }//Dbdih::linkStoredReplica()
12786 /*************************************************************************/
12787 /* MAKE NODE GROUPS BASED ON THE LIST OF NODES RECEIVED FROM CNTR */
12788 /*************************************************************************/
12789 void Dbdih::makeNodeGroups(Uint32 nodeArray[])
12791 NodeRecordPtr mngNodeptr;
12792 Uint32 tmngNode;
12793 Uint32 tmngNodeGroup;
12794 Uint32 tmngLimit;
12795 Uint32 i, j;
12797 /**-----------------------------------------------------------------------
12798 * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
12799 * TO NODE GROUP ZNIL
12800 *-----------------------------------------------------------------------*/
12801 tmngNodeGroup = 0;
12802 tmngLimit = csystemnodes - cnoHotSpare;
12803 ndbrequire(tmngLimit < MAX_NDB_NODES);
12804 for (i = 0; i < tmngLimit; i++) {
12805 NodeGroupRecordPtr NGPtr;
12806 jam();
12807 tmngNode = nodeArray[i];
12808 mngNodeptr.i = tmngNode;
12809 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
12810 mngNodeptr.p->nodeGroup = tmngNodeGroup;
12811 NGPtr.i = tmngNodeGroup;
12812 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
12813 arrGuard(NGPtr.p->nodeCount, MAX_REPLICAS);
12814 NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i;
12815 if (NGPtr.p->nodeCount == cnoReplicas) {
12816 jam();
12817 tmngNodeGroup++;
12818 }//if
12819 }//for
12820 cnoOfNodeGroups = tmngNodeGroup;
12821 ndbrequire(csystemnodes < MAX_NDB_NODES);
12822 for (i = tmngLimit + 1; i < csystemnodes; i++) {
12823 jam();
12824 tmngNode = nodeArray[i];
12825 mngNodeptr.i = tmngNode;
12826 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
12827 mngNodeptr.p->nodeGroup = ZNIL;
12828 }//for
12829 for(i = 0; i < MAX_NDB_NODES; i++){
12830 jam();
12831 Sysfile::setNodeGroup(i, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
12832 }//for
12833 for (mngNodeptr.i = 1; mngNodeptr.i < MAX_NDB_NODES; mngNodeptr.i++) {
12834 jam();
12835 ptrAss(mngNodeptr, nodeRecord);
12836 if (mngNodeptr.p->nodeGroup != ZNIL) {
12837 jam();
12838 Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup);
12839 }//if
12840 }//for
12842 for (i = 0; i<cnoOfNodeGroups; i++)
12844 jam();
12845 bool alive = false;
12846 NodeGroupRecordPtr NGPtr;
12847 NGPtr.i = i;
12848 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
12849 for (j = 0; j<NGPtr.p->nodeCount; j++)
12851 jam();
12852 mngNodeptr.i = NGPtr.p->nodesInGroup[j];
12853 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
12854 if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
12856 alive = true;
12857 break;
12861 if (!alive)
12863 char buf[255];
12864 BaseString::snprintf
12865 (buf, sizeof(buf),
12866 "Illegal initial start, no alive node in nodegroup %u", i);
12867 progError(__LINE__,
12868 NDBD_EXIT_INSUFFICENT_NODES,
12869 buf);
12873 }//Dbdih::makeNodeGroups()
12876 * On node failure QMGR asks DIH about node groups. This is
12877 * a direct signal (function call in same process). Input is
12878 * bitmask of surviving nodes. The routine is not concerned
12879 * about node count. Reply is one of:
12880 * 1) win - we can survive, and nobody else can
12881 * 2) lose - we cannot survive
12882 * 3) partition - we can survive but there could be others
12884 void Dbdih::execCHECKNODEGROUPSREQ(Signal* signal)
12886 jamEntry();
12887 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
12889 bool direct = (sd->requestType & CheckNodeGroups::Direct);
12890 bool ok = false;
12891 switch(sd->requestType & ~CheckNodeGroups::Direct){
12892 case CheckNodeGroups::ArbitCheck:{
12893 ok = true;
12894 jam();
12895 unsigned missall = 0;
12896 unsigned haveall = 0;
12897 for (Uint32 i = 0; i < cnoOfNodeGroups; i++) {
12898 jam();
12899 NodeGroupRecordPtr ngPtr;
12900 ngPtr.i = i;
12901 ptrAss(ngPtr, nodeGroupRecord);
12902 Uint32 count = 0;
12903 for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
12904 jam();
12905 Uint32 nodeId = ngPtr.p->nodesInGroup[j];
12906 if (sd->mask.get(nodeId)) {
12907 jam();
12908 count++;
12909 }//if
12910 }//for
12911 if (count == 0) {
12912 jam();
12913 missall++;
12914 }//if
12915 if (count == ngPtr.p->nodeCount) {
12916 haveall++;
12917 }//if
12918 }//for
12920 if (missall) {
12921 jam();
12922 sd->output = CheckNodeGroups::Lose;
12923 } else if (haveall) {
12924 jam();
12925 sd->output = CheckNodeGroups::Win;
12926 } else {
12927 jam();
12928 sd->output = CheckNodeGroups::Partitioning;
12929 }//if
12931 break;
12932 case CheckNodeGroups::GetNodeGroup:
12933 ok = true;
12934 sd->output = Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
12935 break;
12936 case CheckNodeGroups::GetNodeGroupMembers: {
12937 ok = true;
12938 Uint32 ownNodeGroup =
12939 Sysfile::getNodeGroup(sd->nodeId, SYSFILE->nodeGroups);
12941 sd->output = ownNodeGroup;
12942 sd->mask.clear();
12944 NodeGroupRecordPtr ngPtr;
12945 ngPtr.i = ownNodeGroup;
12946 ptrAss(ngPtr, nodeGroupRecord);
12947 for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
12948 jam();
12949 sd->mask.set(ngPtr.p->nodesInGroup[j]);
12951 #if 0
12952 for (int i = 0; i < MAX_NDB_NODES; i++) {
12953 if (ownNodeGroup ==
12954 Sysfile::getNodeGroup(i, SYSFILE->nodeGroups)) {
12955 sd->mask.set(i);
12958 #endif
12960 break;
12962 ndbrequire(ok);
12964 if (!direct)
12965 sendSignal(sd->blockRef, GSN_CHECKNODEGROUPSCONF, signal,
12966 CheckNodeGroups::SignalLength, JBB);
12967 }//Dbdih::execCHECKNODEGROUPSREQ()
12969 void Dbdih::makePrnList(ReadNodesConf * readNodes, Uint32 nodeArray[])
12971 cfirstAliveNode = RNIL;
12972 ndbrequire(con_lineNodes > 0);
12973 ndbrequire(csystemnodes < MAX_NDB_NODES);
12974 for (Uint32 i = 0; i < csystemnodes; i++) {
12975 NodeRecordPtr nodePtr;
12976 jam();
12977 nodePtr.i = nodeArray[i];
12978 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12979 new (nodePtr.p) NodeRecord();
12980 if (NodeBitmask::get(readNodes->inactiveNodes, nodePtr.i) == false){
12981 jam();
12982 nodePtr.p->nodeStatus = NodeRecord::ALIVE;
12983 nodePtr.p->useInTransactions = true;
12984 nodePtr.p->copyCompleted = true;
12985 nodePtr.p->m_inclDihLcp = true;
12986 insertAlive(nodePtr);
12987 } else {
12988 jam();
12989 nodePtr.p->nodeStatus = NodeRecord::DEAD;
12990 insertDeadNode(nodePtr);
12991 }//if
12992 }//for
12993 }//Dbdih::makePrnList()
12995 /*************************************************************************/
12996 /* A NEW CRASHED REPLICA IS ADDED BY A NODE FAILURE. */
12997 /*************************************************************************/
12998 void Dbdih::newCrashedReplica(Uint32 nodeId, ReplicaRecordPtr ncrReplicaPtr)
13000 /*----------------------------------------------------------------------*/
13001 /* SET THE REPLICA_LAST_GCI OF THE CRASHED REPLICA TO LAST GCI */
13002 /* EXECUTED BY THE FAILED NODE. */
13003 /*----------------------------------------------------------------------*/
13004 /* WE HAVE A NEW CRASHED REPLICA. INITIATE CREATE GCI TO INDICATE */
13005 /* THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
13006 /* SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET. */
13007 /*----------------------------------------------------------------------*/
13008 Uint32 lastGCI = SYSFILE->lastCompletedGCI[nodeId];
13009 arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, 8,
13010 NDBD_EXIT_MAX_CRASHED_REPLICAS);
13011 ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
13012 lastGCI;
13013 ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
13014 ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] = 0;
13015 ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
13016 (Uint32)-1;
13018 if (ncrReplicaPtr.p->noCrashedReplicas == 7 && lastGCI)
13020 jam();
13021 SYSFILE->lastCompletedGCI[nodeId] = 0;
13022 warningEvent("Making filesystem for node %d unusable (need --initial)",
13023 nodeId);
13025 }//Dbdih::newCrashedReplica()
13027 /*************************************************************************/
13028 /* AT NODE FAILURE DURING START OF A NEW NODE WE NEED TO RESET A */
13029 /* SET OF VARIABLES CONTROLLING THE START AND INDICATING ONGOING */
13030 /* START OF A NEW NODE. */
13031 /*************************************************************************/
13032 void Dbdih::nodeResetStart()
13034 jam();
13035 c_nodeStartSlave.nodeId = 0;
13036 c_nodeStartMaster.startNode = RNIL;
13037 c_nodeStartMaster.failNr = cfailurenr;
13038 c_nodeStartMaster.activeState = false;
13039 c_nodeStartMaster.blockGcp = false;
13040 c_nodeStartMaster.blockLcp = false;
13041 c_nodeStartMaster.m_outstandingGsn = 0;
13042 }//Dbdih::nodeResetStart()
13044 void Dbdih::openFileRw(Signal* signal, FileRecordPtr filePtr)
13046 signal->theData[0] = reference();
13047 signal->theData[1] = filePtr.i;
13048 signal->theData[2] = filePtr.p->fileName[0];
13049 signal->theData[3] = filePtr.p->fileName[1];
13050 signal->theData[4] = filePtr.p->fileName[2];
13051 signal->theData[5] = filePtr.p->fileName[3];
13052 signal->theData[6] = FsOpenReq::OM_READWRITE;
13053 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
13054 }//Dbdih::openFileRw()
13056 void Dbdih::openFileRo(Signal* signal, FileRecordPtr filePtr)
13058 signal->theData[0] = reference();
13059 signal->theData[1] = filePtr.i;
13060 signal->theData[2] = filePtr.p->fileName[0];
13061 signal->theData[3] = filePtr.p->fileName[1];
13062 signal->theData[4] = filePtr.p->fileName[2];
13063 signal->theData[5] = filePtr.p->fileName[3];
13064 signal->theData[6] = FsOpenReq::OM_READONLY;
13065 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
13066 }//Dbdih::openFileRw()
13068 /*************************************************************************/
13069 /* REMOVE A CRASHED REPLICA BY PACKING THE ARRAY OF CREATED GCI AND*/
13070 /* THE LAST GCI OF THE CRASHED REPLICA. */
13071 /*************************************************************************/
13072 void Dbdih::packCrashedReplicas(ReplicaRecordPtr replicaPtr)
13074 ndbrequire(replicaPtr.p->noCrashedReplicas > 0);
13075 ndbrequire(replicaPtr.p->noCrashedReplicas <= 8);
13076 for (Uint32 i = 0; i < replicaPtr.p->noCrashedReplicas; i++) {
13077 jam();
13078 replicaPtr.p->createGci[i] = replicaPtr.p->createGci[i + 1];
13079 replicaPtr.p->replicaLastGci[i] = replicaPtr.p->replicaLastGci[i + 1];
13080 }//for
13081 replicaPtr.p->noCrashedReplicas--;
13083 #ifdef VM_TRACE
13084 for (Uint32 i = 0; i < replicaPtr.p->noCrashedReplicas; i++) {
13085 jam();
13086 ndbrequire(replicaPtr.p->createGci[i] != 0xF1F1F1F1);
13087 ndbrequire(replicaPtr.p->replicaLastGci[i] != 0xF1F1F1F1);
13088 }//for
13089 #endif
13090 }//Dbdih::packCrashedReplicas()
13092 void Dbdih::prepareReplicas(FragmentstorePtr fragPtr)
13094 ReplicaRecordPtr prReplicaPtr;
13095 Uint32 prevReplica = RNIL;
13097 /* --------------------------------------------------------------------- */
13098 /* BEGIN BY LINKING ALL REPLICA RECORDS ONTO THE OLD STORED REPLICA*/
13099 /* LIST. */
13100 /* AT A SYSTEM RESTART OBVIOUSLY ALL NODES ARE OLD. */
13101 /* --------------------------------------------------------------------- */
13102 prReplicaPtr.i = fragPtr.p->storedReplicas;
13103 while (prReplicaPtr.i != RNIL) {
13104 jam();
13105 prevReplica = prReplicaPtr.i;
13106 ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
13107 prReplicaPtr.i = prReplicaPtr.p->nextReplica;
13108 }//while
13109 /* --------------------------------------------------------------------- */
13110 /* LIST OF STORED REPLICAS WILL BE EMPTY NOW. */
13111 /* --------------------------------------------------------------------- */
13112 if (prevReplica != RNIL) {
13113 prReplicaPtr.i = prevReplica;
13114 ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
13115 prReplicaPtr.p->nextReplica = fragPtr.p->oldStoredReplicas;
13116 fragPtr.p->oldStoredReplicas = fragPtr.p->storedReplicas;
13117 fragPtr.p->storedReplicas = RNIL;
13118 fragPtr.p->noOldStoredReplicas += fragPtr.p->noStoredReplicas;
13119 fragPtr.p->noStoredReplicas = 0;
13120 }//if
13121 }//Dbdih::prepareReplicas()
13123 void Dbdih::readFragment(RWFragment* rf, FragmentstorePtr fragPtr)
13125 Uint32 TreadFid = readPageWord(rf);
13126 fragPtr.p->preferredPrimary = readPageWord(rf);
13127 fragPtr.p->noStoredReplicas = readPageWord(rf);
13128 fragPtr.p->noOldStoredReplicas = readPageWord(rf);
13129 Uint32 TdistKey = readPageWord(rf);
13131 ndbrequire(fragPtr.p->noStoredReplicas > 0);
13132 ndbrequire(TreadFid == rf->fragId);
13133 ndbrequire(TdistKey < 256);
13134 if ((cstarttype == NodeState::ST_NODE_RESTART) ||
13135 (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)) {
13136 jam();
13137 fragPtr.p->distributionKey = TdistKey;
13138 }//if
13140 fragPtr.p->m_log_part_id = readPageWord(rf);
13141 }//Dbdih::readFragment()
13143 Uint32 Dbdih::readPageWord(RWFragment* rf)
13145 if (rf->wordIndex >= 2048) {
13146 jam();
13147 ndbrequire(rf->wordIndex == 2048);
13148 rf->pageIndex++;
13149 ndbrequire(rf->pageIndex < 8);
13150 rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
13151 ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
13152 rf->wordIndex = 32;
13153 }//if
13154 Uint32 dataWord = rf->rwfPageptr.p->word[rf->wordIndex];
13155 rf->wordIndex++;
13156 return dataWord;
13157 }//Dbdih::readPageWord()
13159 void Dbdih::readReplica(RWFragment* rf, ReplicaRecordPtr readReplicaPtr)
13161 Uint32 i;
13162 readReplicaPtr.p->procNode = readPageWord(rf);
13163 readReplicaPtr.p->initialGci = readPageWord(rf);
13164 readReplicaPtr.p->noCrashedReplicas = readPageWord(rf);
13165 readReplicaPtr.p->nextLcp = readPageWord(rf);
13167 for (i = 0; i < MAX_LCP_STORED; i++) {
13168 readReplicaPtr.p->maxGciCompleted[i] = readPageWord(rf);
13169 readReplicaPtr.p->maxGciStarted[i] = readPageWord(rf);
13170 readReplicaPtr.p->lcpId[i] = readPageWord(rf);
13171 readReplicaPtr.p->lcpStatus[i] = readPageWord(rf);
13172 }//for
13173 const Uint32 noCrashedReplicas = readReplicaPtr.p->noCrashedReplicas;
13174 ndbrequire(noCrashedReplicas < 8);
13175 for (i = 0; i < noCrashedReplicas; i++) {
13176 readReplicaPtr.p->createGci[i] = readPageWord(rf);
13177 readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
13178 ndbrequire(readReplicaPtr.p->createGci[i] != 0xF1F1F1F1);
13179 ndbrequire(readReplicaPtr.p->replicaLastGci[i] != 0xF1F1F1F1);
13180 }//for
13181 for(i = noCrashedReplicas; i<8; i++){
13182 readReplicaPtr.p->createGci[i] = readPageWord(rf);
13183 readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
13184 // They are not initialized...
13185 readReplicaPtr.p->createGci[i] = 0;
13186 readReplicaPtr.p->replicaLastGci[i] = ~0;
13188 /* ---------------------------------------------------------------------- */
13189 /* IF THE LAST COMPLETED LOCAL CHECKPOINT IS VALID AND LARGER THAN */
13190 /* THE LAST COMPLETED CHECKPOINT THEN WE WILL INVALIDATE THIS LOCAL */
13191 /* CHECKPOINT FOR THIS REPLICA. */
13192 /* ---------------------------------------------------------------------- */
13193 Uint32 trraLcp = prevLcpNo(readReplicaPtr.p->nextLcp);
13194 ndbrequire(trraLcp < MAX_LCP_STORED);
13195 if ((readReplicaPtr.p->lcpStatus[trraLcp] == ZVALID) &&
13196 (readReplicaPtr.p->lcpId[trraLcp] > SYSFILE->latestLCP_ID)) {
13197 jam();
13198 readReplicaPtr.p->lcpStatus[trraLcp] = ZINVALID;
13199 }//if
13200 /* ---------------------------------------------------------------------- */
13201 /* WE ALSO HAVE TO INVALIDATE ANY LOCAL CHECKPOINTS THAT HAVE BEEN */
13202 /* INVALIDATED BY MOVING BACK THE RESTART GCI. */
13203 /* ---------------------------------------------------------------------- */
13204 for (i = 0; i < MAX_LCP_STORED; i++) {
13205 jam();
13206 if ((readReplicaPtr.p->lcpStatus[i] == ZVALID) &&
13207 (readReplicaPtr.p->maxGciStarted[i] > SYSFILE->newestRestorableGCI)) {
13208 jam();
13209 readReplicaPtr.p->lcpStatus[i] = ZINVALID;
13210 }//if
13211 }//for
13212 /* ---------------------------------------------------------------------- */
13213 /* WE WILL REMOVE ANY OCCURRENCES OF REPLICAS THAT HAVE CRASHED */
13214 /* THAT ARE NO LONGER VALID DUE TO MOVING RESTART GCI BACKWARDS. */
13215 /* ---------------------------------------------------------------------- */
13216 removeTooNewCrashedReplicas(readReplicaPtr);
13217 /* ---------------------------------------------------------------------- */
13218 /* WE WILL REMOVE ANY OCCURRENCES OF REPLICAS THAT HAVE CRASHED */
13219 /* THAT ARE NO LONGER VALID SINCE THEY ARE NO LONGER RESTORABLE. */
13220 /* ---------------------------------------------------------------------- */
13221 removeOldCrashedReplicas(readReplicaPtr);
13222 /* --------------------------------------------------------------------- */
13223 // We set the last GCI of the replica that was alive before the node
13224 // crashed last time. We set it to the last GCI which the node participated in.
13225 /* --------------------------------------------------------------------- */
13226 ndbrequire(readReplicaPtr.p->noCrashedReplicas < 8);
13227 readReplicaPtr.p->replicaLastGci[readReplicaPtr.p->noCrashedReplicas] =
13228 SYSFILE->lastCompletedGCI[readReplicaPtr.p->procNode];
13229 /* ---------------------------------------------------------------------- */
13230 /* FIND PROCESSOR RECORD */
13231 /* ---------------------------------------------------------------------- */
13232 }//Dbdih::readReplica()
13234 void Dbdih::readReplicas(RWFragment* rf, FragmentstorePtr fragPtr)
13236 Uint32 i;
13237 ReplicaRecordPtr newReplicaPtr;
13238 Uint32 noStoredReplicas = fragPtr.p->noStoredReplicas;
13239 Uint32 noOldStoredReplicas = fragPtr.p->noOldStoredReplicas;
13240 /* ----------------------------------------------------------------------- */
13241 /* WE CLEAR THE NUMBER OF STORED REPLICAS SINCE IT WILL BE CALCULATED */
13242 /* BY THE LINKING SUBROUTINES. */
13243 /* ----------------------------------------------------------------------- */
13244 fragPtr.p->noStoredReplicas = 0;
13245 fragPtr.p->noOldStoredReplicas = 0;
13246 Uint32 replicaIndex = 0;
13247 ndbrequire(noStoredReplicas + noOldStoredReplicas <= MAX_REPLICAS);
13248 for (i = 0; i < noStoredReplicas; i++) {
13249 seizeReplicaRec(newReplicaPtr);
13250 readReplica(rf, newReplicaPtr);
13251 if (checkNodeAlive(newReplicaPtr.p->procNode)) {
13252 jam();
13253 ndbrequire(replicaIndex < MAX_REPLICAS);
13254 fragPtr.p->activeNodes[replicaIndex] = newReplicaPtr.p->procNode;
13255 replicaIndex++;
13256 linkStoredReplica(fragPtr, newReplicaPtr);
13257 } else {
13258 jam();
13259 linkOldStoredReplica(fragPtr, newReplicaPtr);
13260 }//if
13261 }//for
13262 fragPtr.p->fragReplicas = noStoredReplicas;
13263 for (i = 0; i < noOldStoredReplicas; i++) {
13264 jam();
13265 seizeReplicaRec(newReplicaPtr);
13266 readReplica(rf, newReplicaPtr);
13267 linkOldStoredReplica(fragPtr, newReplicaPtr);
13268 }//for
13269 }//Dbdih::readReplicas()
13271 void Dbdih::readRestorableGci(Signal* signal, FileRecordPtr filePtr)
13273 signal->theData[0] = filePtr.p->fileRef;
13274 signal->theData[1] = reference();
13275 signal->theData[2] = filePtr.i;
13276 signal->theData[3] = ZLIST_OF_PAIRS;
13277 signal->theData[4] = ZVAR_NO_CRESTART_INFO;
13278 signal->theData[5] = 1;
13279 signal->theData[6] = 0;
13280 signal->theData[7] = 0;
13281 sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA);
13282 }//Dbdih::readRestorableGci()
13284 void Dbdih::readTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
13286 signal->theData[0] = filePtr.p->fileRef;
13287 signal->theData[1] = reference();
13288 signal->theData[2] = filePtr.i;
13289 signal->theData[3] = ZLIST_OF_PAIRS;
13290 signal->theData[4] = ZVAR_NO_WORD;
13291 signal->theData[5] = tab->noPages;
13292 for (Uint32 i = 0; i < tab->noPages; i++) {
13293 signal->theData[6 + (2 * i)] = tab->pageRef[i];
13294 signal->theData[7 + (2 * i)] = i;
13295 }//for
13296 sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 22, JBA);
13297 }//Dbdih::readTabfile()
13299 void Dbdih::releasePage(Uint32 pageIndex)
13301 PageRecordPtr pagePtr;
13302 pagePtr.i = pageIndex;
13303 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
13304 pagePtr.p->nextfreepage = cfirstfreepage;
13305 cfirstfreepage = pagePtr.i;
13306 }//Dbdih::releasePage()
13308 void Dbdih::releaseTabPages(Uint32 tableId)
13310 TabRecordPtr tabPtr;
13311 tabPtr.i = tableId;
13312 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13313 ndbrequire(tabPtr.p->noPages <= 8);
13314 for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
13315 jam();
13316 releasePage(tabPtr.p->pageRef[i]);
13317 }//for
13318 tabPtr.p->noPages = 0;
13319 }//Dbdih::releaseTabPages()
13321 /*************************************************************************/
13322 /* REMOVE NODE FROM SET OF ALIVE NODES. */
13323 /*************************************************************************/
13324 void Dbdih::removeAlive(NodeRecordPtr removeNodePtr)
13326 NodeRecordPtr nodePtr;
13328 nodePtr.i = cfirstAliveNode;
13329 if (nodePtr.i == removeNodePtr.i) {
13330 jam();
13331 cfirstAliveNode = removeNodePtr.p->nextNode;
13332 return;
13333 }//if
13334 do {
13335 jam();
13336 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13337 if (nodePtr.p->nextNode == removeNodePtr.i) {
13338 jam();
13339 nodePtr.p->nextNode = removeNodePtr.p->nextNode;
13340 break;
13341 } else {
13342 jam();
13343 nodePtr.i = nodePtr.p->nextNode;
13344 }//if
13345 } while (1);
13346 }//Dbdih::removeAlive()
13348 /*************************************************************************/
13349 /* REMOVE NODE FROM SET OF DEAD NODES. */
13350 /*************************************************************************/
13351 void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
13353 NodeRecordPtr nodePtr;
13355 nodePtr.i = cfirstDeadNode;
13356 if (nodePtr.i == removeNodePtr.i) {
13357 jam();
13358 cfirstDeadNode = removeNodePtr.p->nextNode;
13359 return;
13360 }//if
13361 do {
13362 jam();
13363 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13364 if (nodePtr.p->nextNode == removeNodePtr.i) {
13365 jam();
13366 nodePtr.p->nextNode = removeNodePtr.p->nextNode;
13367 break;
13368 } else {
13369 jam();
13370 nodePtr.i = nodePtr.p->nextNode;
13371 }//if
13372 } while (1);
13373 }//Dbdih::removeDeadNode()
13375 /*---------------------------------------------------------------*/
13376 /* REMOVE REPLICAS OF A FAILED NODE FROM LIST OF STORED */
13377 /* REPLICAS AND MOVE IT TO THE LIST OF OLD STORED REPLICAS.*/
13378 /* ALSO UPDATE THE CRASHED REPLICA INFORMATION. */
13379 /*---------------------------------------------------------------*/
13380 void Dbdih::removeNodeFromStored(Uint32 nodeId,
13381 FragmentstorePtr fragPtr,
13382 ReplicaRecordPtr replicatePtr,
13383 bool temporary)
13385 if (!temporary)
13387 jam();
13388 newCrashedReplica(nodeId, replicatePtr);
13390 else
13392 jam();
13394 removeStoredReplica(fragPtr, replicatePtr);
13395 linkOldStoredReplica(fragPtr, replicatePtr);
13396 ndbrequire(fragPtr.p->storedReplicas != RNIL);
13397 }//Dbdih::removeNodeFromStored()
13399 /*************************************************************************/
13400 /* REMOVE ANY OLD CRASHED REPLICAS THAT ARE NOT RESTORABLE ANY MORE*/
13401 /*************************************************************************/
13402 void Dbdih::removeOldCrashedReplicas(ReplicaRecordPtr rocReplicaPtr)
13404 while (rocReplicaPtr.p->noCrashedReplicas > 0) {
13405 jam();
13406 /* --------------------------------------------------------------------- */
13407 /* ONLY IF THERE IS AT LEAST ONE REPLICA THEN CAN WE REMOVE ANY. */
13408 /* --------------------------------------------------------------------- */
13409 if (rocReplicaPtr.p->replicaLastGci[0] < SYSFILE->oldestRestorableGCI){
13410 jam();
13411 /* ------------------------------------------------------------------- */
13412 /* THIS CRASHED REPLICA HAS BECOME EXTINCT AND MUST BE REMOVED TO */
13413 /* GIVE SPACE FOR NEW CRASHED REPLICAS. */
13414 /* ------------------------------------------------------------------- */
13415 packCrashedReplicas(rocReplicaPtr);
13416 } else {
13417 break;
13418 }//if
13419 }//while
13420 if (rocReplicaPtr.p->createGci[0] < SYSFILE->keepGCI){
13421 jam();
13422 /* --------------------------------------------------------------------- */
13423 /* MOVE FORWARD THE CREATE GCI TO A GCI THAT CAN BE USED. WE HAVE */
13424 /* NO CERTAINTY IN FINDING ANY LOG RECORDS FROM OLDER GCI'S. */
13425 /* --------------------------------------------------------------------- */
13426 rocReplicaPtr.p->createGci[0] = SYSFILE->keepGCI;
13427 ndbrequire(SYSFILE->keepGCI != 0xF1F1F1F1);
13428 }//if
13429 }//Dbdih::removeOldCrashedReplicas()
13431 void Dbdih::removeOldStoredReplica(FragmentstorePtr fragPtr,
13432 ReplicaRecordPtr replicatePtr)
13434 ReplicaRecordPtr rosTmpReplicaPtr;
13435 ReplicaRecordPtr rosPrevReplicaPtr;
13437 fragPtr.p->noOldStoredReplicas--;
13438 if (fragPtr.p->oldStoredReplicas == replicatePtr.i) {
13439 jam();
13440 fragPtr.p->oldStoredReplicas = replicatePtr.p->nextReplica;
13441 } else {
13442 rosPrevReplicaPtr.i = fragPtr.p->oldStoredReplicas;
13443 ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
13444 rosTmpReplicaPtr.i = rosPrevReplicaPtr.p->nextReplica;
13445 while (rosTmpReplicaPtr.i != replicatePtr.i) {
13446 jam();
13447 rosPrevReplicaPtr.i = rosTmpReplicaPtr.i;
13448 ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
13449 ptrCheckGuard(rosTmpReplicaPtr, creplicaFileSize, replicaRecord);
13450 rosTmpReplicaPtr.i = rosTmpReplicaPtr.p->nextReplica;
13451 }//if
13452 rosPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
13453 }//if
13454 }//Dbdih::removeOldStoredReplica()
13456 void Dbdih::removeStoredReplica(FragmentstorePtr fragPtr,
13457 ReplicaRecordPtr replicatePtr)
13459 ReplicaRecordPtr rsrTmpReplicaPtr;
13460 ReplicaRecordPtr rsrPrevReplicaPtr;
13462 fragPtr.p->noStoredReplicas--;
13463 if (fragPtr.p->storedReplicas == replicatePtr.i) {
13464 jam();
13465 fragPtr.p->storedReplicas = replicatePtr.p->nextReplica;
13466 } else {
13467 jam();
13468 rsrPrevReplicaPtr.i = fragPtr.p->storedReplicas;
13469 rsrTmpReplicaPtr.i = fragPtr.p->storedReplicas;
13470 ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
13471 rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
13472 while (rsrTmpReplicaPtr.i != replicatePtr.i) {
13473 jam();
13474 rsrPrevReplicaPtr.i = rsrTmpReplicaPtr.i;
13475 ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
13476 rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
13477 }//while
13478 ptrCheckGuard(rsrPrevReplicaPtr, creplicaFileSize, replicaRecord);
13479 rsrPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
13480 }//if
13481 }//Dbdih::removeStoredReplica()
13483 /*************************************************************************/
13484 /* REMOVE ALL TOO NEW CRASHED REPLICAS THAT IS IN THIS REPLICA. */
13485 /*************************************************************************/
13486 void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr)
13488 while (rtnReplicaPtr.p->noCrashedReplicas > 0) {
13489 jam();
13490 /* --------------------------------------------------------------------- */
13491 /* REMOVE ALL REPLICAS THAT ONLY LIVED IN A PERIOD THAT HAVE BEEN */
13492 /* REMOVED FROM THE RESTART INFORMATION SINCE THE RESTART FAILED */
13493 /* TOO MANY TIMES. */
13494 /* --------------------------------------------------------------------- */
13495 arrGuard(rtnReplicaPtr.p->noCrashedReplicas - 1, 8);
13496 if (rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] >
13497 SYSFILE->newestRestorableGCI){
13498 jam();
13499 rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
13500 (Uint32)-1;
13501 rtnReplicaPtr.p->replicaLastGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
13502 (Uint32)-1;
13503 rtnReplicaPtr.p->noCrashedReplicas--;
13504 } else {
13505 break;
13506 }//if
13507 }//while
13508 }//Dbdih::removeTooNewCrashedReplicas()
13510 /*************************************************************************/
13511 /* */
13512 /* MODULE: SEARCH FOR POSSIBLE REPLICAS THAT CAN HANDLE THE GLOBAL */
13513 /* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
13514 /* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
13515 /*************************************************************************/
13516 bool
13517 Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
13518 CreateReplicaRecord* createReplicaPtrP,
13519 ConstPtr<ReplicaRecord> replicaPtr)
13521 createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
13522 createReplicaPtrP->replicaRec = replicaPtr.i;
13524 /* ----------------------------------------------------------------- */
13525 /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
13526 /* SYSTEM RESTART. */
13527 /* ----------------------------------------------------------------- */
13528 Uint32 startGci;
13529 Uint32 startLcpNo;
13530 Uint32 stopGci = SYSFILE->newestRestorableGCI;
13531 bool result = findStartGci(replicaPtr,
13532 stopGci,
13533 startGci,
13534 startLcpNo);
13535 if (!result)
13537 jam();
13538 /* --------------------------------------------------------------- */
13539 /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
13540 /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
13541 /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
13542 /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
13543 /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
13544 /* */
13545 /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
13546 /* LOCAL CHECKPOINT TO ZNIL. */
13547 /* --------------------------------------------------------------- */
13548 createReplicaPtrP->lcpNo = ZNIL;
13550 else
13552 jam();
13553 /* --------------------------------------------------------------- */
13554 /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
13555 /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
13556 /* --------------------------------------------------------------- */
13557 createReplicaPtrP->lcpNo = startLcpNo;
13558 arrGuard(startLcpNo, MAX_LCP_STORED);
13559 createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
13560 }//if
13563 /* ----------------------------------------------------------------- */
13564 /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
13565 /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
13566 /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
13567 /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
13568 /* -_--------------------------------------------------------------- */
13569 return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
13572 void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
13574 Uint32 nextReplicaPtrI;
13575 Ptr<ReplicaRecord> replicaPtr;
13577 replicaPtr.i = fragPtr.p->storedReplicas;
13578 while (replicaPtr.i != RNIL) {
13579 jam();
13580 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13581 nextReplicaPtrI = replicaPtr.p->nextReplica;
13582 ConstPtr<ReplicaRecord> constReplicaPtr;
13583 constReplicaPtr.i = replicaPtr.i;
13584 constReplicaPtr.p = replicaPtr.p;
13585 NodeRecordPtr nodePtr;
13586 nodePtr.i = replicaPtr.p->procNode;
13587 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13588 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
13589 jam();
13590 switch (nodePtr.p->activeStatus) {
13591 case Sysfile::NS_Active:
13592 case Sysfile::NS_ActiveMissed_1:
13593 case Sysfile::NS_ActiveMissed_2:{
13594 /* ----------------------------------------------------------------- */
13595 /* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
13596 /* TO LQH START_FRAGREQ. */
13597 /* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
13598 /* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
13599 /* ----------------------------------------------------------------- */
13600 CreateReplicaRecordPtr createReplicaPtr;
13601 createReplicaPtr.i = cnoOfCreateReplicas;
13602 ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
13603 cnoOfCreateReplicas++;
13606 * Should have been checked in resetReplicaSr
13608 ndbrequire(setup_create_replica(fragPtr,
13609 createReplicaPtr.p,
13610 constReplicaPtr));
13611 break;
13613 default:
13614 jam();
13615 /*empty*/;
13616 break;
13617 }//switch
13619 replicaPtr.i = nextReplicaPtrI;
13620 }//while
13621 }//Dbdih::searchStoredReplicas()
13623 /*************************************************************************/
13624 /* */
13625 /* MODULE: SEIZE_FILE */
13626 /* DESCRIPTION: THE SUBROUTINE SEIZES A FILE RECORD FROM THE */
13627 /* FREE LIST. */
13628 /*************************************************************************/
13629 void Dbdih::seizeFile(FileRecordPtr& filePtr)
13631 filePtr.i = cfirstfreeFile;
13632 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
13633 cfirstfreeFile = filePtr.p->nextFile;
13634 filePtr.p->nextFile = RNIL;
13635 }//Dbdih::seizeFile()
13637 /*************************************************************************/
13638 /* SEND CREATE_FRAGREQ TO ALL NODES IN THE NDB CLUSTER. */
13639 /*************************************************************************/
13640 /*************************************************************************/
13641 /* */
13642 /* MODULE: FIND THE START GCI AND LOCAL CHECKPOINT TO USE. */
13643 /*************************************************************************/
13644 void Dbdih::sendStartFragreq(Signal* signal,
13645 TabRecordPtr tabPtr, Uint32 fragId)
13647 CreateReplicaRecordPtr replicaPtr;
13648 for (replicaPtr.i = 0; replicaPtr.i < cnoOfCreateReplicas; replicaPtr.i++) {
13649 jam();
13650 ptrAss(replicaPtr, createReplicaRecord);
13651 BlockReference ref = calcLqhBlockRef(replicaPtr.p->dataNodeId);
13652 StartFragReq * const startFragReq = (StartFragReq *)&signal->theData[0];
13653 startFragReq->userPtr = replicaPtr.p->replicaRec;
13654 startFragReq->userRef = reference();
13655 startFragReq->lcpNo = replicaPtr.p->lcpNo;
13656 startFragReq->lcpId = replicaPtr.p->createLcpId;
13657 startFragReq->tableId = tabPtr.i;
13658 startFragReq->fragId = fragId;
13660 if(ERROR_INSERTED(7072) || ERROR_INSERTED(7074)){
13661 jam();
13662 const Uint32 noNodes = replicaPtr.p->noLogNodes;
13663 Uint32 start = replicaPtr.p->logStartGci[noNodes - 1];
13664 const Uint32 stop = replicaPtr.p->logStopGci[noNodes - 1];
13666 for(Uint32 i = noNodes; i < 4 && (stop - start) > 0; i++){
13667 replicaPtr.p->noLogNodes++;
13668 replicaPtr.p->logStopGci[i - 1] = start;
13670 replicaPtr.p->logNodeId[i] = replicaPtr.p->logNodeId[i-1];
13671 replicaPtr.p->logStartGci[i] = start + 1;
13672 replicaPtr.p->logStopGci[i] = stop;
13673 start += 1;
13677 startFragReq->noOfLogNodes = replicaPtr.p->noLogNodes;
13679 for (Uint32 i = 0; i < 4 ; i++) {
13680 startFragReq->lqhLogNode[i] = replicaPtr.p->logNodeId[i];
13681 startFragReq->startGci[i] = replicaPtr.p->logStartGci[i];
13682 startFragReq->lastGci[i] = replicaPtr.p->logStopGci[i];
13683 }//for
13685 sendSignal(ref, GSN_START_FRAGREQ, signal,
13686 StartFragReq::SignalLength, JBB);
13687 }//for
13688 }//Dbdih::sendStartFragreq()
13690 /*************************************************************************/
13691 /* SET THE INITIAL ACTIVE STATUS ON ALL NODES AND PUT INTO LISTS. */
13692 /*************************************************************************/
13693 void Dbdih::setInitialActiveStatus()
13695 NodeRecordPtr siaNodeptr;
13696 Uint32 tsiaNoActiveNodes;
13698 tsiaNoActiveNodes = csystemnodes - cnoHotSpare;
13699 for(Uint32 i = 0; i<Sysfile::NODE_STATUS_SIZE; i++)
13700 SYSFILE->nodeStatus[i] = 0;
13701 for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) {
13702 ptrAss(siaNodeptr, nodeRecord);
13703 switch(siaNodeptr.p->nodeStatus){
13704 case NodeRecord::ALIVE:
13705 case NodeRecord::DEAD:
13706 if (tsiaNoActiveNodes == 0) {
13707 jam();
13708 siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
13709 } else {
13710 jam();
13711 tsiaNoActiveNodes = tsiaNoActiveNodes - 1;
13712 if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE)
13714 jam();
13715 siaNodeptr.p->activeStatus = Sysfile::NS_Active;
13717 else
13719 siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
13722 break;
13723 default:
13724 jam();
13725 siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
13726 break;
13727 }//if
13728 Sysfile::setNodeStatus(siaNodeptr.i,
13729 SYSFILE->nodeStatus,
13730 siaNodeptr.p->activeStatus);
13731 }//for
13732 }//Dbdih::setInitialActiveStatus()
13734 /*************************************************************************/
13735 /* SET LCP ACTIVE STATUS AT THE END OF A LOCAL CHECKPOINT. */
13736 /*************************************************************************/
13737 void Dbdih::setLcpActiveStatusEnd()
13739 NodeRecordPtr nodePtr;
13741 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13742 jam();
13743 ptrAss(nodePtr, nodeRecord);
13744 if (c_lcpState.m_participatingLQH.get(nodePtr.i)){
13745 switch (nodePtr.p->activeStatus) {
13746 case Sysfile::NS_Active:
13747 case Sysfile::NS_ActiveMissed_1:
13748 case Sysfile::NS_ActiveMissed_2:
13749 jam();
13750 /*-------------------------------------------------------------------*/
13751 /* THE NODE PARTICIPATED IN THIS CHECKPOINT.
13752 * WE CAN SET ITS STATUS TO ACTIVE */
13753 /*-------------------------------------------------------------------*/
13754 nodePtr.p->activeStatus = Sysfile::NS_Active;
13755 takeOverCompleted(nodePtr.i);
13756 break;
13757 case Sysfile::NS_TakeOver:
13758 jam();
13759 /*-------------------------------------------------------------------*/
13760 /* THE NODE HAS COMPLETED A CHECKPOINT AFTER TAKE OVER. WE CAN NOW */
13761 /* SET ITS STATUS TO ACTIVE. WE CAN ALSO COMPLETE THE TAKE OVER */
13762 /* AND ALSO WE CLEAR THE TAKE OVER NODE IN THE RESTART INFO. */
13763 /*-------------------------------------------------------------------*/
13764 nodePtr.p->activeStatus = Sysfile::NS_Active;
13765 takeOverCompleted(nodePtr.i);
13766 break;
13767 default:
13768 ndbrequire(false);
13769 return;
13770 break;
13771 }//switch
13772 }//if
13773 }//for
13775 if(getNodeState().getNodeRestartInProgress()){
13776 jam();
13777 if(c_lcpState.m_participatingLQH.get(getOwnNodeId())){
13778 nodePtr.i = getOwnNodeId();
13779 ptrAss(nodePtr, nodeRecord);
13780 ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active);
13781 g_eventLogger.info("NR: setLcpActiveStatusEnd - m_participatingLQH");
13782 } else {
13783 g_eventLogger.info("NR: setLcpActiveStatusEnd - !m_participatingLQH");
13787 c_lcpState.m_participatingDIH.clear();
13788 c_lcpState.m_participatingLQH.clear();
13789 if (isMaster()) {
13790 jam();
13791 setNodeRestartInfoBits();
13792 }//if
13793 }//Dbdih::setLcpActiveStatusEnd()
13795 void Dbdih::takeOverCompleted(Uint32 aNodeId)
13797 TakeOverRecordPtr takeOverPtr;
13798 takeOverPtr.i = findTakeOver(aNodeId);
13799 if (takeOverPtr.i != RNIL) {
13800 jam();
13801 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
13802 if (takeOverPtr.p->toMasterStatus != TakeOverRecord::WAIT_LCP) {
13803 jam();
13804 ndbrequire(!isMaster());
13805 return;
13806 }//if
13807 ndbrequire(isMaster());
13808 Sysfile::setTakeOverNode(aNodeId, SYSFILE->takeOver, 0);
13809 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_END_COPY;
13810 cstartGcpNow = true;
13811 }//if
13812 }//Dbdih::takeOverCompleted()
13814 /*************************************************************************/
13815 /* SET LCP ACTIVE STATUS BEFORE STARTING A LOCAL CHECKPOINT. */
13816 /*************************************************************************/
13817 void Dbdih::setLcpActiveStatusStart(Signal* signal)
13819 NodeRecordPtr nodePtr;
13821 c_lcpState.m_participatingLQH.clear();
13822 c_lcpState.m_participatingDIH.clear();
13824 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13825 ptrAss(nodePtr, nodeRecord);
13826 #if 0
13827 if(nodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER){
13828 infoEvent("Node %d nodeStatus=%d activeStatus=%d copyCompleted=%d lcp=%d",
13829 nodePtr.i,
13830 nodePtr.p->nodeStatus,
13831 nodePtr.p->activeStatus,
13832 nodePtr.p->copyCompleted,
13833 nodePtr.p->m_inclDihLcp);
13835 #endif
13836 if(nodePtr.p->nodeStatus == NodeRecord::ALIVE && nodePtr.p->m_inclDihLcp){
13837 jam();
13838 c_lcpState.m_participatingDIH.set(nodePtr.i);
13841 if ((nodePtr.p->nodeStatus == NodeRecord::ALIVE) &&
13842 (nodePtr.p->copyCompleted)) {
13843 switch (nodePtr.p->activeStatus) {
13844 case Sysfile::NS_Active:
13845 jam();
13846 /*-------------------------------------------------------------------*/
13847 // The normal case. Starting a LCP for a started node which hasn't
13848 // missed the previous LCP.
13849 /*-------------------------------------------------------------------*/
13850 c_lcpState.m_participatingLQH.set(nodePtr.i);
13851 break;
13852 case Sysfile::NS_ActiveMissed_1:
13853 jam();
13854 /*-------------------------------------------------------------------*/
13855 // The node is starting up and is participating in a local checkpoint
13856 // as the final phase of the start-up. We can still use the checkpoints
13857 // on the node after a system restart.
13858 /*-------------------------------------------------------------------*/
13859 c_lcpState.m_participatingLQH.set(nodePtr.i);
13860 break;
13861 case Sysfile::NS_ActiveMissed_2:
13862 jam();
13863 /*-------------------------------------------------------------------*/
13864 // The node is starting up and is participating in a local checkpoint
13865 // as the final phase of the start-up. We have missed so
13866 // many checkpoints that we no longer can use this node to
13867 // recreate fragments from disk.
13868 // It must be taken over with the copy fragment process after a system
13869 // crash. We indicate this by setting the active status to TAKE_OVER.
13870 /*-------------------------------------------------------------------*/
13871 c_lcpState.m_participatingLQH.set(nodePtr.i);
13872 nodePtr.p->activeStatus = Sysfile::NS_TakeOver;
13873 //break; // Fall through
13874 case Sysfile::NS_TakeOver:{
13875 TakeOverRecordPtr takeOverPtr;
13876 jam();
13877 /*-------------------------------------------------------------------*/
13878 /* THIS NODE IS CURRENTLY TAKING OVER A FAILED NODE. */
13879 /*-------------------------------------------------------------------*/
13880 takeOverPtr.i = findTakeOver(nodePtr.i);
13881 if (takeOverPtr.i != RNIL) {
13882 jam();
13883 ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
13884 if (takeOverPtr.p->toMasterStatus == TakeOverRecord::WAIT_LCP) {
13885 jam();
13886 /*---------------------------------------------------------------
13887 * ALL THE INFORMATION HAVE BEEN REPLICATED TO THE NEW
13888 * NODE AND WE ARE ONLY WAITING FOR A LOCAL CHECKPOINT TO BE
13889 * PERFORMED ON THE NODE TO SET ITS STATUS TO ACTIVE.
13891 infoEvent("Node %d is WAIT_LCP including in LCP", nodePtr.i);
13892 c_lcpState.m_participatingLQH.set(nodePtr.i);
13893 }//if
13894 }//if
13895 break;
13897 default:
13898 jam();
13899 /*empty*/;
13900 break;
13901 }//switch
13902 } else {
13903 switch (nodePtr.p->activeStatus) {
13904 case Sysfile::NS_Active:
13905 jam();
13906 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
13907 break;
13908 case Sysfile::NS_ActiveMissed_1:
13909 jam();
13910 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
13911 break;
13912 case Sysfile::NS_ActiveMissed_2:
13913 jam();
13914 CRASH_INSERTION(7192);
13915 if ((nodePtr.p->nodeStatus == NodeRecord::ALIVE) &&
13916 (!nodePtr.p->copyCompleted)) {
13917 jam();
13918 /*-----------------------------------------------------------------*/
13919 // The node is currently starting up and has not completed the
13920 // copy phase.
13921 // It will thus be in the TAKE_OVER state.
13922 /*-----------------------------------------------------------------*/
13923 ndbrequire(findTakeOver(nodePtr.i) != RNIL);
13924 nodePtr.p->activeStatus = Sysfile::NS_TakeOver;
13925 } else {
13926 jam();
13927 /*-----------------------------------------------------------------*/
13928 /* THE NODE IS ACTIVE AND HAS NOT COMPLETED ANY OF THE LAST 3
13929 * CHECKPOINTS */
13930 /* WE MUST TAKE IT OUT OF ACTION AND START A NEW NODE TO TAKE OVER.*/
13931 /*-----------------------------------------------------------------*/
13932 nodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
13933 }//if
13934 break;
13935 case Sysfile::NS_TakeOver:
13936 jam();
13937 break;
13938 default:
13939 jam();
13940 /*empty*/;
13941 break;
13942 }//switch
13943 }//if
13944 }//for
13945 if (isMaster()) {
13946 jam();
13947 checkStartTakeOver(signal);
13948 setNodeRestartInfoBits();
13949 }//if
13950 }//Dbdih::setLcpActiveStatusStart()
13952 /*************************************************************************/
13953 /* SET NODE ACTIVE STATUS AT SYSTEM RESTART AND WHEN UPDATED BY MASTER */
13954 /*************************************************************************/
13955 void Dbdih::setNodeActiveStatus()
13957 NodeRecordPtr snaNodeptr;
13959 for (snaNodeptr.i = 1; snaNodeptr.i < MAX_NDB_NODES; snaNodeptr.i++) {
13960 ptrAss(snaNodeptr, nodeRecord);
13961 const Uint32 tsnaNodeBits = Sysfile::getNodeStatus(snaNodeptr.i,
13962 SYSFILE->nodeStatus);
13963 switch (tsnaNodeBits) {
13964 case Sysfile::NS_Active:
13965 jam();
13966 snaNodeptr.p->activeStatus = Sysfile::NS_Active;
13967 break;
13968 case Sysfile::NS_ActiveMissed_1:
13969 jam();
13970 snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
13971 break;
13972 case Sysfile::NS_ActiveMissed_2:
13973 jam();
13974 snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
13975 break;
13976 case Sysfile::NS_TakeOver:
13977 jam();
13978 snaNodeptr.p->activeStatus = Sysfile::NS_TakeOver;
13979 break;
13980 case Sysfile::NS_HotSpare:
13981 jam();
13982 snaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
13983 break;
13984 case Sysfile::NS_NotActive_NotTakenOver:
13985 jam();
13986 snaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
13987 break;
13988 case Sysfile::NS_NotDefined:
13989 jam();
13990 snaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
13991 break;
13992 default:
13993 ndbrequire(false);
13994 break;
13995 }//switch
13996 }//for
13997 }//Dbdih::setNodeActiveStatus()
13999 /***************************************************************************/
14000 /* SET THE NODE GROUP BASED ON THE RESTART INFORMATION OR AS SET BY MASTER */
14001 /***************************************************************************/
14002 void Dbdih::setNodeGroups()
14004 NodeGroupRecordPtr NGPtr;
14005 NodeRecordPtr sngNodeptr;
14006 Uint32 Ti;
14008 for (Ti = 0; Ti < MAX_NDB_NODES; Ti++) {
14009 NGPtr.i = Ti;
14010 ptrAss(NGPtr, nodeGroupRecord);
14011 NGPtr.p->nodeCount = 0;
14012 }//for
14013 for (sngNodeptr.i = 1; sngNodeptr.i < MAX_NDB_NODES; sngNodeptr.i++) {
14014 ptrAss(sngNodeptr, nodeRecord);
14015 Sysfile::ActiveStatus s =
14016 (Sysfile::ActiveStatus)Sysfile::getNodeStatus(sngNodeptr.i,
14017 SYSFILE->nodeStatus);
14018 switch (s){
14019 case Sysfile::NS_Active:
14020 case Sysfile::NS_ActiveMissed_1:
14021 case Sysfile::NS_ActiveMissed_2:
14022 case Sysfile::NS_NotActive_NotTakenOver:
14023 case Sysfile::NS_TakeOver:
14024 jam();
14025 sngNodeptr.p->nodeGroup = Sysfile::getNodeGroup(sngNodeptr.i,
14026 SYSFILE->nodeGroups);
14027 NGPtr.i = sngNodeptr.p->nodeGroup;
14028 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
14029 NGPtr.p->nodesInGroup[NGPtr.p->nodeCount] = sngNodeptr.i;
14030 NGPtr.p->nodeCount++;
14031 break;
14032 case Sysfile::NS_HotSpare:
14033 case Sysfile::NS_NotDefined:
14034 jam();
14035 sngNodeptr.p->nodeGroup = ZNIL;
14036 break;
14037 default:
14038 ndbrequire(false);
14039 return;
14040 break;
14041 }//switch
14042 }//for
14043 cnoOfNodeGroups = 0;
14044 for (Ti = 0; Ti < MAX_NDB_NODES; Ti++) {
14045 jam();
14046 NGPtr.i = Ti;
14047 ptrAss(NGPtr, nodeGroupRecord);
14048 if (NGPtr.p->nodeCount != 0) {
14049 jam();
14050 cnoOfNodeGroups++;
14051 }//if
14052 }//for
14053 cnoHotSpare = csystemnodes - (cnoOfNodeGroups * cnoReplicas);
14054 }//Dbdih::setNodeGroups()
14056 /*************************************************************************/
14057 /* SET NODE INFORMATION AFTER RECEIVING RESTART INFORMATION FROM MASTER. */
14058 /* WE TAKE THE OPPORTUNITY TO SYNCHRONISE OUR DATA WITH THE MASTER. IT */
14059 /* IS ONLY THE MASTER THAT WILL ACT ON THIS DATA. WE WILL KEEP THEM */
14060 /* UPDATED FOR THE CASE WHEN WE HAVE TO BECOME MASTER. */
14061 /*************************************************************************/
14062 void Dbdih::setNodeInfo(Signal* signal)
14064 setNodeActiveStatus();
14065 setNodeGroups();
14066 sendHOT_SPAREREP(signal);
14067 }//Dbdih::setNodeInfo()
14069 /*************************************************************************/
14070 // Keep also DBDICT informed about the Hot Spare situation in the cluster.
14071 /*************************************************************************/
14072 void Dbdih::sendHOT_SPAREREP(Signal* signal)
14074 NodeRecordPtr locNodeptr;
14075 Uint32 Ti = 0;
14076 HotSpareRep * const hotSpare = (HotSpareRep*)&signal->theData[0];
14077 NodeBitmask::clear(hotSpare->theHotSpareNodes);
14078 for (locNodeptr.i = 1; locNodeptr.i < MAX_NDB_NODES; locNodeptr.i++) {
14079 ptrAss(locNodeptr, nodeRecord);
14080 switch (locNodeptr.p->activeStatus) {
14081 case Sysfile::NS_HotSpare:
14082 jam();
14083 NodeBitmask::set(hotSpare->theHotSpareNodes, locNodeptr.i);
14084 Ti++;
14085 break;
14086 default:
14087 jam();
14088 break;
14089 }//switch
14090 }//for
14091 hotSpare->noHotSpareNodes = Ti;
14092 sendSignal(DBDICT_REF, GSN_HOT_SPAREREP,
14093 signal, HotSpareRep::SignalLength, JBB);
14094 }//Dbdih::sendHOT_SPAREREP()
14096 /*************************************************************************/
14097 /* SET LCP ACTIVE STATUS FOR ALL NODES BASED ON THE INFORMATION IN */
14098 /* THE RESTART INFORMATION. */
14099 /*************************************************************************/
14100 #if 0
14101 void Dbdih::setNodeLcpActiveStatus()
14103 c_lcpState.m_lcpActiveStatus.clear();
14104 for (Uint32 i = 1; i < MAX_NDB_NODES; i++) {
14105 if (NodeBitmask::get(SYSFILE->lcpActive, i)) {
14106 jam();
14107 c_lcpState.m_lcpActiveStatus.set(i);
14108 }//if
14109 }//for
14110 }//Dbdih::setNodeLcpActiveStatus()
14111 #endif
14113 /*************************************************************************/
14114 /* SET THE RESTART INFO BITS BASED ON THE NODES ACTIVE STATUS. */
14115 /*************************************************************************/
14116 void Dbdih::setNodeRestartInfoBits()
14118 NodeRecordPtr nodePtr;
14119 Uint32 tsnrNodeGroup;
14120 Uint32 tsnrNodeActiveStatus;
14121 Uint32 i;
14122 for(i = 1; i < MAX_NDB_NODES; i++){
14123 Sysfile::setNodeStatus(i, SYSFILE->nodeStatus, Sysfile::NS_Active);
14124 }//for
14125 for(i = 1; i < Sysfile::NODE_GROUPS_SIZE; i++){
14126 SYSFILE->nodeGroups[i] = 0;
14127 }//for
14128 NdbNodeBitmask::clear(SYSFILE->lcpActive);
14130 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
14131 ptrAss(nodePtr, nodeRecord);
14132 switch (nodePtr.p->activeStatus) {
14133 case Sysfile::NS_Active:
14134 jam();
14135 tsnrNodeActiveStatus = Sysfile::NS_Active;
14136 break;
14137 case Sysfile::NS_ActiveMissed_1:
14138 jam();
14139 tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_1;
14140 break;
14141 case Sysfile::NS_ActiveMissed_2:
14142 jam();
14143 tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_2;
14144 break;
14145 case Sysfile::NS_HotSpare:
14146 jam();
14147 tsnrNodeActiveStatus = Sysfile::NS_HotSpare;
14148 break;
14149 case Sysfile::NS_TakeOver:
14150 jam();
14151 tsnrNodeActiveStatus = Sysfile::NS_TakeOver;
14152 break;
14153 case Sysfile::NS_NotActive_NotTakenOver:
14154 jam();
14155 tsnrNodeActiveStatus = Sysfile::NS_NotActive_NotTakenOver;
14156 break;
14157 case Sysfile::NS_NotDefined:
14158 jam();
14159 tsnrNodeActiveStatus = Sysfile::NS_NotDefined;
14160 break;
14161 default:
14162 ndbrequire(false);
14163 tsnrNodeActiveStatus = Sysfile::NS_NotDefined; // remove warning
14164 break;
14165 }//switch
14166 Sysfile::setNodeStatus(nodePtr.i, SYSFILE->nodeStatus,
14167 tsnrNodeActiveStatus);
14168 if (nodePtr.p->nodeGroup == ZNIL) {
14169 jam();
14170 tsnrNodeGroup = NO_NODE_GROUP_ID;
14171 } else {
14172 jam();
14173 tsnrNodeGroup = nodePtr.p->nodeGroup;
14174 }//if
14175 Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups, tsnrNodeGroup);
14176 if (c_lcpState.m_participatingLQH.get(nodePtr.i)){
14177 jam();
14178 NodeBitmask::set(SYSFILE->lcpActive, nodePtr.i);
14179 }//if
14180 }//for
14181 }//Dbdih::setNodeRestartInfoBits()
14183 /*************************************************************************/
14184 /* START THE GLOBAL CHECKPOINT PROTOCOL IN MASTER AT START-UP */
14185 /*************************************************************************/
14186 void Dbdih::startGcp(Signal* signal)
14188 cgcpStatus = GCP_READY;
14189 coldGcpStatus = cgcpStatus;
14190 coldGcpId = cnewgcp;
14191 cgcpSameCounter = 0;
14192 signal->theData[0] = DihContinueB::ZSTART_GCP;
14193 signal->theData[1] = 0;
14194 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14195 signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
14196 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
14197 }//Dbdih::startGcp()
14199 void Dbdih::updateNodeInfo(FragmentstorePtr fragPtr)
14201 ReplicaRecordPtr replicatePtr;
14202 Uint32 index = 0;
14203 replicatePtr.i = fragPtr.p->storedReplicas;
14204 do {
14205 jam();
14206 ptrCheckGuard(replicatePtr, creplicaFileSize, replicaRecord);
14207 ndbrequire(index < MAX_REPLICAS);
14208 fragPtr.p->activeNodes[index] = replicatePtr.p->procNode;
14209 index++;
14210 replicatePtr.i = replicatePtr.p->nextReplica;
14211 } while (replicatePtr.i != RNIL);
14212 fragPtr.p->fragReplicas = index;
14214 /* ----------------------------------------------------------------------- */
14215 // We switch primary to the preferred primary if the preferred primary is
14216 // in the list.
14217 /* ----------------------------------------------------------------------- */
14218 const Uint32 prefPrim = fragPtr.p->preferredPrimary;
14219 for (Uint32 i = 1; i < index; i++) {
14220 jam();
14221 ndbrequire(i < MAX_REPLICAS);
14222 if (fragPtr.p->activeNodes[i] == prefPrim){
14223 jam();
14224 Uint32 switchNode = fragPtr.p->activeNodes[0];
14225 fragPtr.p->activeNodes[0] = prefPrim;
14226 fragPtr.p->activeNodes[i] = switchNode;
14227 break;
14228 }//if
14229 }//for
14230 }//Dbdih::updateNodeInfo()
14232 void Dbdih::writeFragment(RWFragment* wf, FragmentstorePtr fragPtr)
14234 writePageWord(wf, wf->fragId);
14235 writePageWord(wf, fragPtr.p->preferredPrimary);
14236 writePageWord(wf, fragPtr.p->noStoredReplicas);
14237 writePageWord(wf, fragPtr.p->noOldStoredReplicas);
14238 writePageWord(wf, fragPtr.p->distributionKey);
14239 writePageWord(wf, fragPtr.p->m_log_part_id);
14240 }//Dbdih::writeFragment()
14242 void Dbdih::writePageWord(RWFragment* wf, Uint32 dataWord)
14244 if (wf->wordIndex >= 2048) {
14245 jam();
14246 ndbrequire(wf->wordIndex == 2048);
14247 allocpage(wf->rwfPageptr);
14248 wf->wordIndex = 32;
14249 wf->pageIndex++;
14250 ndbrequire(wf->pageIndex < 8);
14251 wf->rwfTabPtr.p->pageRef[wf->pageIndex] = wf->rwfPageptr.i;
14252 wf->rwfTabPtr.p->noPages++;
14253 }//if
14254 wf->rwfPageptr.p->word[wf->wordIndex] = dataWord;
14255 wf->wordIndex++;
14256 }//Dbdih::writePageWord()
14258 void Dbdih::writeReplicas(RWFragment* wf, Uint32 replicaStartIndex)
14260 ReplicaRecordPtr wfReplicaPtr;
14261 wfReplicaPtr.i = replicaStartIndex;
14262 while (wfReplicaPtr.i != RNIL) {
14263 jam();
14264 ptrCheckGuard(wfReplicaPtr, creplicaFileSize, replicaRecord);
14265 writePageWord(wf, wfReplicaPtr.p->procNode);
14266 writePageWord(wf, wfReplicaPtr.p->initialGci);
14267 writePageWord(wf, wfReplicaPtr.p->noCrashedReplicas);
14268 writePageWord(wf, wfReplicaPtr.p->nextLcp);
14269 Uint32 i;
14270 for (i = 0; i < MAX_LCP_STORED; i++) {
14271 writePageWord(wf, wfReplicaPtr.p->maxGciCompleted[i]);
14272 writePageWord(wf, wfReplicaPtr.p->maxGciStarted[i]);
14273 writePageWord(wf, wfReplicaPtr.p->lcpId[i]);
14274 writePageWord(wf, wfReplicaPtr.p->lcpStatus[i]);
14275 }//if
14276 for (i = 0; i < 8; i++) {
14277 writePageWord(wf, wfReplicaPtr.p->createGci[i]);
14278 writePageWord(wf, wfReplicaPtr.p->replicaLastGci[i]);
14279 }//if
14281 wfReplicaPtr.i = wfReplicaPtr.p->nextReplica;
14282 }//while
14283 }//Dbdih::writeReplicas()
14285 void Dbdih::writeRestorableGci(Signal* signal, FileRecordPtr filePtr)
14287 for (Uint32 i = 0; i < Sysfile::SYSFILE_SIZE32; i++) {
14288 sysfileDataToFile[i] = sysfileData[i];
14289 }//for
14290 signal->theData[0] = filePtr.p->fileRef;
14291 signal->theData[1] = reference();
14292 signal->theData[2] = filePtr.i;
14293 signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
14294 signal->theData[4] = ZVAR_NO_CRESTART_INFO_TO_FILE;
14295 signal->theData[5] = 1; /* AMOUNT OF PAGES */
14296 signal->theData[6] = 0; /* MEMORY PAGE = 0 SINCE COMMON STORED VARIABLE */
14297 signal->theData[7] = 0;
14298 sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
14299 }//Dbdih::writeRestorableGci()
14301 void Dbdih::writeTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
14303 signal->theData[0] = filePtr.p->fileRef;
14304 signal->theData[1] = reference();
14305 signal->theData[2] = filePtr.i;
14306 signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
14307 signal->theData[4] = ZVAR_NO_WORD;
14308 signal->theData[5] = tab->noPages;
14309 for (Uint32 i = 0; i < tab->noPages; i++) {
14310 jam();
14311 signal->theData[6 + (2 * i)] = tab->pageRef[i];
14312 signal->theData[7 + (2 * i)] = i;
14313 }//for
14314 Uint32 length = 6 + (2 * tab->noPages);
14315 sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, length, JBA);
14316 }//Dbdih::writeTabfile()
14318 void Dbdih::execDEBUG_SIG(Signal* signal)
14320 signal = signal; //Avoid compiler warnings
14321 }//Dbdih::execDEBUG_SIG()
14323 void
14324 Dbdih::execDUMP_STATE_ORD(Signal* signal)
14326 DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
14327 Uint32 arg = dumpState->args[0];
14328 if (arg == DumpStateOrd::DihDumpNodeRestartInfo) {
14329 infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d",
14330 c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait);
14331 infoEvent("cstartGcpNow = %d, cgcpStatus = %d",
14332 cstartGcpNow, cgcpStatus);
14333 infoEvent("cfirstVerifyQueue = %d, cverifyQueueCounter = %d",
14334 cfirstVerifyQueue, cverifyQueueCounter);
14335 infoEvent("cgcpOrderBlocked = %d, cgcpStartCounter = %d",
14336 cgcpOrderBlocked, cgcpStartCounter);
14337 }//if
14338 if (arg == DumpStateOrd::DihDumpNodeStatusInfo) {
14339 NodeRecordPtr localNodePtr;
14340 infoEvent("Printing nodeStatus of all nodes");
14341 for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) {
14342 ptrAss(localNodePtr, nodeRecord);
14343 if (localNodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER) {
14344 infoEvent("Node = %d has status = %d",
14345 localNodePtr.i, localNodePtr.p->nodeStatus);
14346 }//if
14347 }//for
14348 }//if
14350 if (arg == DumpStateOrd::DihPrintFragmentation){
14351 infoEvent("Printing fragmentation of all tables --");
14352 for(Uint32 i = 0; i<ctabFileSize; i++){
14353 TabRecordPtr tabPtr;
14354 tabPtr.i = i;
14355 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14357 if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
14358 continue;
14360 for(Uint32 j = 0; j < tabPtr.p->totalfragments; j++){
14361 FragmentstorePtr fragPtr;
14362 getFragstore(tabPtr.p, j, fragPtr);
14364 Uint32 nodeOrder[MAX_REPLICAS];
14365 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, nodeOrder);
14366 char buf[100];
14367 BaseString::snprintf(buf, sizeof(buf), " Table %d Fragment %d - ", tabPtr.i, j);
14368 for(Uint32 k = 0; k < noOfReplicas; k++){
14369 char tmp[100];
14370 BaseString::snprintf(tmp, sizeof(tmp), "%d ", nodeOrder[k]);
14371 strcat(buf, tmp);
14373 infoEvent(buf);
14378 if (signal->theData[0] == 7000) {
14379 infoEvent("ctimer = %d, cgcpParticipantState = %d, cgcpStatus = %d",
14380 c_lcpState.ctimer, cgcpParticipantState, cgcpStatus);
14381 infoEvent("coldGcpStatus = %d, coldGcpId = %d, cmasterState = %d",
14382 coldGcpStatus, coldGcpId, cmasterState);
14383 infoEvent("cmasterTakeOverNode = %d, ctcCounter = %d",
14384 cmasterTakeOverNode, c_lcpState.ctcCounter);
14385 }//if
14386 if (signal->theData[0] == 7001) {
14387 infoEvent("c_lcpState.keepGci = %d",
14388 c_lcpState.keepGci);
14389 infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
14390 c_lcpState.lcpStatus,
14391 c_lcpState.lcpStopGcp);
14392 infoEvent("cgcpStartCounter = %d, cimmediateLcpStart = %d",
14393 cgcpStartCounter, c_lcpState.immediateLcpStart);
14394 }//if
14395 if (signal->theData[0] == 7002) {
14396 infoEvent("cnoOfActiveTables = %d, cgcpDelay = %d",
14397 cnoOfActiveTables, cgcpDelay);
14398 infoEvent("cdictblockref = %d, cfailurenr = %d",
14399 cdictblockref, cfailurenr);
14400 infoEvent("con_lineNodes = %d, reference() = %d, creceivedfrag = %d",
14401 con_lineNodes, reference(), creceivedfrag);
14402 }//if
14403 if (signal->theData[0] == 7003) {
14404 infoEvent("cfirstAliveNode = %d, cgckptflag = %d",
14405 cfirstAliveNode, cgckptflag);
14406 infoEvent("clocallqhblockref = %d, clocaltcblockref = %d, cgcpOrderBlocked = %d",
14407 clocallqhblockref, clocaltcblockref, cgcpOrderBlocked);
14408 infoEvent("cstarttype = %d, csystemnodes = %d, currentgcp = %d",
14409 cstarttype, csystemnodes, currentgcp);
14410 }//if
14411 if (signal->theData[0] == 7004) {
14412 infoEvent("cmasterdihref = %d, cownNodeId = %d, cnewgcp = %d",
14413 cmasterdihref, cownNodeId, cnewgcp);
14414 infoEvent("cndbStartReqBlockref = %d, cremainingfrags = %d",
14415 cndbStartReqBlockref, cremainingfrags);
14416 infoEvent("cntrlblockref = %d, cgcpSameCounter = %d, coldgcp = %d",
14417 cntrlblockref, cgcpSameCounter, coldgcp);
14418 }//if
14419 if (signal->theData[0] == 7005) {
14420 infoEvent("crestartGci = %d",
14421 crestartGci);
14422 }//if
14423 if (signal->theData[0] == 7006) {
14424 infoEvent("clcpDelay = %d, cgcpMasterTakeOverState = %d",
14425 c_lcpState.clcpDelay, cgcpMasterTakeOverState);
14426 infoEvent("cmasterNodeId = %d", cmasterNodeId);
14427 infoEvent("cnoHotSpare = %d, c_nodeStartMaster.startNode = %d, c_nodeStartMaster.wait = %d",
14428 cnoHotSpare, c_nodeStartMaster.startNode, c_nodeStartMaster.wait);
14429 }//if
14430 if (signal->theData[0] == 7007) {
14431 infoEvent("c_nodeStartMaster.failNr = %d", c_nodeStartMaster.failNr);
14432 infoEvent("c_nodeStartMaster.startInfoErrorCode = %d",
14433 c_nodeStartMaster.startInfoErrorCode);
14434 infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d",
14435 c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp);
14436 }//if
14437 if (signal->theData[0] == 7008) {
14438 infoEvent("cfirstDeadNode = %d, cstartPhase = %d, cnoReplicas = %d",
14439 cfirstDeadNode, cstartPhase, cnoReplicas);
14440 infoEvent("cwaitLcpSr = %d",cwaitLcpSr);
14441 }//if
14442 if (signal->theData[0] == 7009) {
14443 infoEvent("ccalcOldestRestorableGci = %d, cnoOfNodeGroups = %d",
14444 c_lcpState.oldestRestorableGci, cnoOfNodeGroups);
14445 infoEvent("cstartGcpNow = %d",
14446 cstartGcpNow);
14447 infoEvent("crestartGci = %d",
14448 crestartGci);
14449 }//if
14450 if (signal->theData[0] == 7010) {
14451 infoEvent("cminHotSpareNodes = %d, c_lcpState.lcpStatusUpdatedPlace = %d, cLcpStart = %d",
14452 cminHotSpareNodes, c_lcpState.lcpStatusUpdatedPlace, c_lcpState.lcpStart);
14453 infoEvent("c_blockCommit = %d, c_blockCommitNo = %d",
14454 c_blockCommit, c_blockCommitNo);
14455 }//if
14456 if (signal->theData[0] == 7011){
14457 infoEvent("c_COPY_GCIREQ_Counter = %s",
14458 c_COPY_GCIREQ_Counter.getText());
14459 infoEvent("c_COPY_TABREQ_Counter = %s",
14460 c_COPY_TABREQ_Counter.getText());
14461 infoEvent("c_CREATE_FRAGREQ_Counter = %s",
14462 c_CREATE_FRAGREQ_Counter.getText());
14463 infoEvent("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
14464 c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
14465 infoEvent("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
14466 infoEvent("c_END_TOREQ_Counter = %s", c_END_TOREQ_Counter.getText());
14467 infoEvent("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
14468 infoEvent("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
14469 infoEvent("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
14470 infoEvent("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
14471 infoEvent("c_MASTER_GCPREQ_Counter = %s",
14472 c_MASTER_GCPREQ_Counter.getText());
14473 infoEvent("c_MASTER_LCPREQ_Counter = %s",
14474 c_MASTER_LCPREQ_Counter.getText());
14475 infoEvent("c_START_INFOREQ_Counter = %s",
14476 c_START_INFOREQ_Counter.getText());
14477 infoEvent("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
14478 infoEvent("c_START_TOREQ_Counter = %s", c_START_TOREQ_Counter.getText());
14479 infoEvent("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
14480 infoEvent("c_TC_CLOPSIZEREQ_Counter = %s",
14481 c_TC_CLOPSIZEREQ_Counter.getText());
14482 infoEvent("c_TCGETOPSIZEREQ_Counter = %s",
14483 c_TCGETOPSIZEREQ_Counter.getText());
14484 infoEvent("c_UPDATE_TOREQ_Counter = %s", c_UPDATE_TOREQ_Counter.getText());
14487 if(signal->theData[0] == 7012){
14488 char buf[8*_NDB_NODE_BITMASK_SIZE+1];
14489 infoEvent("ParticipatingDIH = %s", c_lcpState.m_participatingDIH.getText(buf));
14490 infoEvent("ParticipatingLQH = %s", c_lcpState.m_participatingLQH.getText(buf));
14491 infoEvent("m_LCP_COMPLETE_REP_Counter_DIH = %s",
14492 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.getText());
14493 infoEvent("m_LCP_COMPLETE_REP_Counter_LQH = %s",
14494 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.getText());
14495 infoEvent("m_LAST_LCP_FRAG_ORD = %s",
14496 c_lcpState.m_LAST_LCP_FRAG_ORD.getText());
14497 infoEvent("m_LCP_COMPLETE_REP_From_Master_Received = %d",
14498 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received);
14500 NodeRecordPtr nodePtr;
14501 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
14502 jam();
14503 ptrAss(nodePtr, nodeRecord);
14504 if(nodePtr.p->nodeStatus == NodeRecord::ALIVE){
14505 Uint32 i;
14506 for(i = 0; i<nodePtr.p->noOfStartedChkpt; i++){
14507 infoEvent("Node %d: started: table=%d fragment=%d replica=%d",
14508 nodePtr.i,
14509 nodePtr.p->startedChkpt[i].tableId,
14510 nodePtr.p->startedChkpt[i].fragId,
14511 nodePtr.p->startedChkpt[i].replicaPtr);
14514 for(i = 0; i<nodePtr.p->noOfQueuedChkpt; i++){
14515 infoEvent("Node %d: queued: table=%d fragment=%d replica=%d",
14516 nodePtr.i,
14517 nodePtr.p->queuedChkpt[i].tableId,
14518 nodePtr.p->queuedChkpt[i].fragId,
14519 nodePtr.p->queuedChkpt[i].replicaPtr);
14525 if(arg == 7019 && signal->getLength() == 2)
14527 char buf2[8+1];
14528 NodeRecordPtr nodePtr;
14529 nodePtr.i = signal->theData[1];
14530 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14531 infoEvent("NF Node %d tc: %d lqh: %d dih: %d dict: %d recNODE_FAILREP: %d",
14532 nodePtr.i,
14533 nodePtr.p->dbtcFailCompleted,
14534 nodePtr.p->dblqhFailCompleted,
14535 nodePtr.p->dbdihFailCompleted,
14536 nodePtr.p->dbdictFailCompleted,
14537 nodePtr.p->recNODE_FAILREP);
14538 infoEvent(" m_NF_COMPLETE_REP: %s m_nodefailSteps: %s",
14539 nodePtr.p->m_NF_COMPLETE_REP.getText(),
14540 nodePtr.p->m_nodefailSteps.getText(buf2));
14543 if(arg == 7020 && signal->getLength() > 3)
14545 Uint32 gsn= signal->theData[1];
14546 Uint32 block= signal->theData[2];
14547 Uint32 length= signal->length() - 3;
14548 memmove(signal->theData, signal->theData+3, 4*length);
14549 sendSignal(numberToRef(block, getOwnNodeId()), gsn, signal, length, JBB);
14551 warningEvent("-- SENDING CUSTOM SIGNAL --");
14552 char buf[100], buf2[100];
14553 buf2[0]= 0;
14554 for(Uint32 i = 0; i<length; i++)
14556 snprintf(buf, 100, "%s %.8x", buf2, signal->theData[i]);
14557 snprintf(buf2, 100, "%s", buf);
14559 warningEvent("gsn: %d block: %s, length: %d theData: %s",
14560 gsn, getBlockName(block, "UNKNOWN"), length, buf);
14562 g_eventLogger.warning("-- SENDING CUSTOM SIGNAL --");
14563 g_eventLogger.warning("gsn: %d block: %s, length: %d theData: %s",
14564 gsn, getBlockName(block, "UNKNOWN"), length, buf);
14567 if(arg == DumpStateOrd::DihDumpLCPState){
14568 infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
14569 infoEvent("lcpStatus = %d (update place = %d) ",
14570 c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
14571 infoEvent
14572 ("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
14573 c_lcpState.lcpStart, c_lcpState.lcpStopGcp,
14574 c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
14576 infoEvent
14577 ("immediateLcpStart = %d masterLcpNodeId = %d",
14578 c_lcpState.immediateLcpStart,
14579 refToNode(c_lcpState.m_masterLcpDihRef));
14581 for (Uint32 i = 0; i<10; i++)
14583 infoEvent("%u : status: %u place: %u", i,
14584 c_lcpState.m_saveState[i].m_status,
14585 c_lcpState.m_saveState[i].m_place);
14588 infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
14591 if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){
14592 infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
14593 infoEvent
14594 ("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d",
14595 c_lcpMasterTakeOverState.state,
14596 c_lcpMasterTakeOverState.updatePlace,
14597 c_lcpMasterTakeOverState.failedNodeId);
14599 infoEvent("c_lcpMasterTakeOverState.minTableId = %u minFragId = %u",
14600 c_lcpMasterTakeOverState.minTableId,
14601 c_lcpMasterTakeOverState.minFragId);
14603 infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
14606 if (signal->theData[0] == 7015)
14608 if (signal->getLength() == 1)
14610 signal->theData[1] = 0;
14613 Uint32 tableId = signal->theData[1];
14614 if (tableId < ctabFileSize)
14616 signal->theData[0] = 7021;
14617 execDUMP_STATE_ORD(signal);
14618 signal->theData[0] = 7015;
14619 signal->theData[1] = tableId + 1;
14620 sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
14624 if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
14625 g_eventLogger.info("Dbdih:: delay write of datapages for table = %d",
14626 dumpState->args[1]);
14627 // Send this dump to ACC and TUP
14628 EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2);
14629 EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2);
14631 // Start immediate LCP
14632 c_lcpState.ctimer += (1 << c_lcpState.clcpDelay);
14633 return;
14636 if (signal->theData[0] == DumpStateOrd::DihAllAllowNodeStart) {
14637 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
14638 setAllowNodeStart(i, true);
14639 return;
14640 }//if
14641 if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) {
14642 // Set time between LCP to min value
14643 g_eventLogger.info("Set time between LCP to min value");
14644 c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min
14645 return;
14647 if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) {
14648 // Set time between LCP to max value
14649 g_eventLogger.info("Set time between LCP to max value");
14650 c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max
14651 return;
14654 if(arg == 7098){
14655 if(signal->length() == 3){
14656 jam();
14657 infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)",
14658 signal->theData[1], signal->theData[2]);
14659 startLcpRoundLoopLab(signal, signal->theData[1], signal->theData[2]);
14660 return;
14661 } else {
14662 infoEvent("Invalid no of arguments to 7098 - startLcpRoundLoopLab -"
14663 " expected 2 (tableId, fragmentId)");
14667 if(arg == DumpStateOrd::DihStartLcpImmediately){
14668 c_lcpState.ctimer += (1 << c_lcpState.clcpDelay);
14669 return;
14672 if (arg == DumpStateOrd::DihSetTimeBetweenGcp)
14674 if (signal->getLength() == 1)
14676 const ndb_mgm_configuration_iterator * p =
14677 m_ctx.m_config.getOwnConfigIterator();
14678 ndbrequire(p != 0);
14679 ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay);
14681 else
14683 cgcpDelay = signal->theData[1];
14685 g_eventLogger.info("Setting time between gcp : %d", cgcpDelay);
14688 if (arg == 7021 && signal->getLength() == 2)
14690 TabRecordPtr tabPtr;
14691 tabPtr.i = signal->theData[1];
14692 if (tabPtr.i >= ctabFileSize)
14693 return;
14695 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14697 if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
14698 return;
14700 infoEvent
14701 ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
14702 tabPtr.i,
14703 tabPtr.p->tabCopyStatus,
14704 tabPtr.p->tabUpdateState,
14705 tabPtr.p->tabLcpStatus);
14707 FragmentstorePtr fragPtr;
14708 for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
14709 jam();
14710 getFragstore(tabPtr.p, fid, fragPtr);
14712 char buf[100], buf2[100];
14713 BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ",
14714 fid, fragPtr.p->noLcpReplicas);
14716 Uint32 num=0;
14717 ReplicaRecordPtr replicaPtr;
14718 replicaPtr.i = fragPtr.p->storedReplicas;
14719 do {
14720 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
14721 BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
14722 buf, num,
14723 replicaPtr.p->procNode,
14724 replicaPtr.p->lcpIdStarted,
14725 replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
14726 BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
14728 num++;
14729 replicaPtr.i = replicaPtr.p->nextReplica;
14730 } while (replicaPtr.i != RNIL);
14731 infoEvent(buf);
14735 if (arg == 7022)
14737 jam();
14738 crashSystemAtGcpStop(signal, true);
14740 }//Dbdih::execDUMP_STATE_ORD()
14742 void
14743 Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
14744 jamEntry();
14746 PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
14748 TabRecordPtr tabPtr;
14749 tabPtr.i = req->tableId;
14750 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14752 Uint32 senderRef = req->senderRef;
14753 Uint32 senderData = req->senderData;
14755 PrepDropTabRef::ErrorCode err = PrepDropTabRef::OK;
14756 { /**
14757 * Check table state
14759 bool ok = false;
14760 switch(tabPtr.p->tabStatus){
14761 case TabRecord::TS_IDLE:
14762 ok = true;
14763 jam();
14764 err = PrepDropTabRef::NoSuchTable;
14765 break;
14766 case TabRecord::TS_DROPPING:
14767 ok = true;
14768 jam();
14769 err = PrepDropTabRef::PrepDropInProgress;
14770 break;
14771 case TabRecord::TS_CREATING:
14772 jam();
14773 ok = true;
14774 break;
14775 case TabRecord::TS_ACTIVE:
14776 ok = true;
14777 jam();
14778 break;
14780 ndbrequire(ok);
14783 if(err != PrepDropTabRef::OK){
14784 jam();
14785 PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
14786 ref->senderRef = reference();
14787 ref->senderData = senderData;
14788 ref->tableId = tabPtr.i;
14789 ref->errorCode = err;
14790 sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
14791 PrepDropTabRef::SignalLength, JBB);
14792 return;
14795 tabPtr.p->tabStatus = TabRecord::TS_DROPPING;
14796 tabPtr.p->m_prepDropTab.senderRef = senderRef;
14797 tabPtr.p->m_prepDropTab.senderData = senderData;
14799 if(isMaster()){
14801 * Remove from queue
14803 NodeRecordPtr nodePtr;
14804 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
14805 jam();
14806 ptrAss(nodePtr, nodeRecord);
14807 if (c_lcpState.m_participatingLQH.get(nodePtr.i)){
14809 Uint32 index = 0;
14810 Uint32 count = nodePtr.p->noOfQueuedChkpt;
14811 while(index < count){
14812 if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){
14813 jam();
14814 // g_eventLogger.info("Unqueuing %d", index);
14816 count--;
14817 for(Uint32 i = index; i<count; i++){
14818 jam();
14819 nodePtr.p->queuedChkpt[i] = nodePtr.p->queuedChkpt[i + 1];
14821 } else {
14822 index++;
14825 nodePtr.p->noOfQueuedChkpt = count;
14830 { /**
14831 * Check table lcp state
14834 bool ok = false;
14835 switch(tabPtr.p->tabLcpStatus){
14836 case TabRecord::TLS_COMPLETED:
14837 case TabRecord::TLS_WRITING_TO_FILE:
14838 ok = true;
14839 jam();
14840 break;
14841 return;
14842 case TabRecord::TLS_ACTIVE:
14843 ok = true;
14844 jam();
14846 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
14849 * First check if all fragments are done
14851 if(checkLcpAllTablesDoneInLqh()){
14852 jam();
14854 g_eventLogger.info("This is the last table");
14857 * Then check if saving of tab info is done for all tables
14859 LcpStatus a = c_lcpState.lcpStatus;
14860 checkLcpCompletedLab(signal);
14862 if(a != c_lcpState.lcpStatus){
14863 g_eventLogger.info("And all tables are written to already written disk");
14866 break;
14868 ndbrequire(ok);
14871 { /**
14872 * Send WaitDropTabReq to all LQH
14874 WaitDropTabReq * req = (WaitDropTabReq*)signal->getDataPtrSend();
14875 req->tableId = tabPtr.i;
14876 req->senderRef = reference();
14878 NodeRecordPtr nodePtr;
14879 nodePtr.i = cfirstAliveNode;
14880 tabPtr.p->m_prepDropTab.waitDropTabCount.clearWaitingFor();
14881 while(nodePtr.i != RNIL){
14882 jam();
14883 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14885 tabPtr.p->m_prepDropTab.waitDropTabCount.setWaitingFor(nodePtr.i);
14886 sendSignal(calcLqhBlockRef(nodePtr.i), GSN_WAIT_DROP_TAB_REQ,
14887 signal, WaitDropTabReq::SignalLength, JBB);
14889 nodePtr.i = nodePtr.p->nextNode;
14893 waitDropTabWritingToFile(signal, tabPtr);
14896 void
14897 Dbdih::waitDropTabWritingToFile(Signal* signal, TabRecordPtr tabPtr){
14899 if(tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE){
14900 jam();
14901 signal->theData[0] = DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE;
14902 signal->theData[1] = tabPtr.i;
14903 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
14904 return;
14907 ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_COMPLETED);
14908 checkPrepDropTabComplete(signal, tabPtr);
14911 void
14912 Dbdih::checkPrepDropTabComplete(Signal* signal, TabRecordPtr tabPtr){
14914 if(tabPtr.p->tabLcpStatus != TabRecord::TLS_COMPLETED){
14915 jam();
14916 return;
14919 if(!tabPtr.p->m_prepDropTab.waitDropTabCount.done()){
14920 jam();
14921 return;
14924 const Uint32 ref = tabPtr.p->m_prepDropTab.senderRef;
14925 if(ref != 0){
14926 PrepDropTabConf* conf = (PrepDropTabConf*)signal->getDataPtrSend();
14927 conf->tableId = tabPtr.i;
14928 conf->senderRef = reference();
14929 conf->senderData = tabPtr.p->m_prepDropTab.senderData;
14930 sendSignal(tabPtr.p->m_prepDropTab.senderRef, GSN_PREP_DROP_TAB_CONF,
14931 signal, PrepDropTabConf::SignalLength, JBB);
14932 tabPtr.p->m_prepDropTab.senderRef = 0;
14936 void
14937 Dbdih::execWAIT_DROP_TAB_REF(Signal* signal){
14938 jamEntry();
14939 WaitDropTabRef * ref = (WaitDropTabRef*)signal->getDataPtr();
14941 TabRecordPtr tabPtr;
14942 tabPtr.i = ref->tableId;
14943 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14945 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
14946 Uint32 nodeId = refToNode(ref->senderRef);
14948 ndbrequire(ref->errorCode == WaitDropTabRef::NoSuchTable ||
14949 ref->errorCode == WaitDropTabRef::NF_FakeErrorREF);
14951 tabPtr.p->m_prepDropTab.waitDropTabCount.clearWaitingFor(nodeId);
14952 checkPrepDropTabComplete(signal, tabPtr);
14955 void
14956 Dbdih::execWAIT_DROP_TAB_CONF(Signal* signal){
14957 jamEntry();
14958 WaitDropTabConf * conf = (WaitDropTabConf*)signal->getDataPtr();
14960 TabRecordPtr tabPtr;
14961 tabPtr.i = conf->tableId;
14962 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14964 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
14965 Uint32 nodeId = refToNode(conf->senderRef);
14966 tabPtr.p->m_prepDropTab.waitDropTabCount.clearWaitingFor(nodeId);
14967 checkPrepDropTabComplete(signal, tabPtr);
14970 void
14971 Dbdih::checkWaitDropTabFailedLqh(Signal* signal, Uint32 nodeId, Uint32 tableId){
14973 TabRecordPtr tabPtr;
14974 tabPtr.i = tableId;
14976 WaitDropTabConf * conf = (WaitDropTabConf*)signal->getDataPtr();
14977 conf->tableId = tableId;
14979 const Uint32 RT_BREAK = 16;
14980 for(Uint32 i = 0; i<RT_BREAK && tabPtr.i < ctabFileSize; i++, tabPtr.i++){
14981 ptrAss(tabPtr, tabRecord);
14982 if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){
14983 if(tabPtr.p->m_prepDropTab.waitDropTabCount.isWaitingFor(nodeId)){
14984 conf->senderRef = calcLqhBlockRef(nodeId);
14985 execWAIT_DROP_TAB_CONF(signal);
14986 tabPtr.i++;
14987 break;
14992 if(tabPtr.i == ctabFileSize){
14994 * Finished
14996 jam();
14997 return;
15000 signal->theData[0] = DihContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH;
15001 signal->theData[1] = nodeId;
15002 signal->theData[2] = tabPtr.i;
15003 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
15007 void
15008 Dbdih::execNDB_TAMPER(Signal* signal)
15010 if ((ERROR_INSERTED(7011)) &&
15011 (signal->theData[0] == 7012)) {
15012 CLEAR_ERROR_INSERT_VALUE;
15013 calculateKeepGciLab(signal, 0, 0);
15014 return;
15015 }//if
15016 SET_ERROR_INSERT_VALUE(signal->theData[0]);
15017 return;
15018 }//Dbdih::execNDB_TAMPER()
15020 void Dbdih::execBLOCK_COMMIT_ORD(Signal* signal){
15021 BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
15023 jamEntry();
15024 #if 0
15025 ndbrequire(c_blockCommit == false ||
15026 c_blockCommitNo == block->failNo);
15027 #else
15028 if(!(c_blockCommit == false || c_blockCommitNo == block->failNo)){
15029 infoEvent("Possible bug in Dbdih::execBLOCK_COMMIT_ORD c_blockCommit = %d c_blockCommitNo = %d"
15030 " sig->failNo = %d", c_blockCommit, c_blockCommitNo, block->failNo);
15032 #endif
15033 c_blockCommit = true;
15034 c_blockCommitNo = block->failNo;
15037 void Dbdih::execUNBLOCK_COMMIT_ORD(Signal* signal){
15038 UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
15039 (void)unblock;
15041 jamEntry();
15043 if(c_blockCommit == true){
15044 jam();
15045 // ndbrequire(c_blockCommitNo == unblock->failNo);
15047 c_blockCommit = false;
15048 emptyverificbuffer(signal, true);
15052 void Dbdih::execSTOP_PERM_REQ(Signal* signal){
15054 jamEntry();
15056 StopPermReq* const req = (StopPermReq*)&signal->theData[0];
15057 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
15059 const Uint32 senderData = req->senderData;
15060 const BlockReference senderRef = req->senderRef;
15061 const NodeId nodeId = refToNode(senderRef);
15063 if (isMaster()) {
15065 * Master
15067 jam();
15068 CRASH_INSERTION(7065);
15069 if (c_stopPermMaster.clientRef != 0) {
15070 jam();
15072 ref->senderData = senderData;
15073 ref->errorCode = StopPermRef::NodeShutdownInProgress;
15074 sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
15075 StopPermRef::SignalLength, JBB);
15076 return;
15077 }//if
15079 if (c_nodeStartMaster.activeState) {
15080 jam();
15081 ref->senderData = senderData;
15082 ref->errorCode = StopPermRef::NodeStartInProgress;
15083 sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
15084 StopPermRef::SignalLength, JBB);
15085 return;
15086 }//if
15089 * Lock
15091 c_nodeStartMaster.activeState = true;
15092 c_stopPermMaster.clientRef = senderRef;
15094 c_stopPermMaster.clientData = senderData;
15095 c_stopPermMaster.returnValue = 0;
15096 c_switchReplicas.clear();
15098 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
15099 Callback c = { safe_cast(&Dbdih::switch_primary_stop_node), nodeId };
15100 ndbrequire(mutex.lock(c));
15101 } else {
15102 /**
15103 * Proxy part
15105 jam();
15106 CRASH_INSERTION(7066);
15107 if(c_stopPermProxy.clientRef != 0){
15108 jam();
15109 ref->senderData = senderData;
15110 ref->errorCode = StopPermRef::NodeShutdownInProgress;
15111 sendSignal(senderRef, GSN_STOP_PERM_REF, signal, 2, JBB);
15112 return;
15113 }//if
15115 c_stopPermProxy.clientRef = senderRef;
15116 c_stopPermProxy.masterRef = cmasterdihref;
15117 c_stopPermProxy.clientData = senderData;
15119 req->senderRef = reference();
15120 req->senderData = senderData;
15121 sendSignal(cmasterdihref, GSN_STOP_PERM_REQ, signal,
15122 StopPermReq::SignalLength, JBB);
15123 }//if
15124 }//Dbdih::execSTOP_PERM_REQ()
15126 void
15127 Dbdih::switch_primary_stop_node(Signal* signal, Uint32 node_id, Uint32 ret_val)
15129 ndbrequire(ret_val == 0);
15130 signal->theData[0] = DihContinueB::SwitchReplica;
15131 signal->theData[1] = node_id;
15132 signal->theData[2] = 0; // table id
15133 signal->theData[3] = 0; // fragment id
15134 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
15137 void Dbdih::execSTOP_PERM_REF(Signal* signal)
15139 jamEntry();
15140 ndbrequire(c_stopPermProxy.clientRef != 0);
15141 ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
15142 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
15143 c_stopPermProxy.clientRef = 0;
15144 }//Dbdih::execSTOP_PERM_REF()
15146 void Dbdih::execSTOP_PERM_CONF(Signal* signal)
15148 jamEntry();
15149 ndbrequire(c_stopPermProxy.clientRef != 0);
15150 ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
15151 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_CONF, signal, 1, JBB);
15152 c_stopPermProxy.clientRef = 0;
15153 }//Dbdih::execSTOP_PERM_CONF()
15155 void Dbdih::execDIH_SWITCH_REPLICA_REQ(Signal* signal)
15157 jamEntry();
15158 DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
15159 const Uint32 tableId = req->tableId;
15160 const Uint32 fragNo = req->fragNo;
15161 const BlockReference senderRef = req->senderRef;
15163 CRASH_INSERTION(7067);
15164 TabRecordPtr tabPtr;
15165 tabPtr.i = tableId;
15166 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
15168 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
15169 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
15170 jam();
15171 sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REQ, signal,
15172 DihSwitchReplicaReq::SignalLength, JBB);
15173 return;
15174 }//if
15175 FragmentstorePtr fragPtr;
15176 getFragstore(tabPtr.p, fragNo, fragPtr);
15179 * Do funky stuff
15181 Uint32 oldOrder[MAX_REPLICAS];
15182 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
15184 if (noOfReplicas < req->noOfReplicas) {
15185 jam();
15186 //---------------------------------------------------------------------
15187 // A crash occurred in the middle of our switch handling.
15188 //---------------------------------------------------------------------
15189 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
15190 ref->senderNode = cownNodeId;
15191 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
15192 sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_REF, signal,
15193 DihSwitchReplicaRef::SignalLength, JBB);
15194 }//if
15195 for (Uint32 i = 0; i < noOfReplicas; i++) {
15196 jam();
15197 ndbrequire(i < MAX_REPLICAS);
15198 fragPtr.p->activeNodes[i] = req->newNodeOrder[i];
15199 }//for
15201 * Reply
15203 DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
15204 conf->senderNode = cownNodeId;
15205 sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_CONF, signal,
15206 DihSwitchReplicaConf::SignalLength, JBB);
15207 }//Dbdih::execDIH_SWITCH_REPLICA_REQ()
15209 void Dbdih::execDIH_SWITCH_REPLICA_CONF(Signal* signal)
15211 jamEntry();
15213 * Response to master
15215 CRASH_INSERTION(7068);
15216 DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
15217 switchReplicaReply(signal, conf->senderNode);
15218 }//Dbdih::execDIH_SWITCH_REPLICA_CONF()
15220 void Dbdih::execDIH_SWITCH_REPLICA_REF(Signal* signal)
15222 jamEntry();
15223 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
15224 if(c_stopPermMaster.returnValue == 0){
15225 jam();
15226 c_stopPermMaster.returnValue = ref->errorCode;
15227 }//if
15228 switchReplicaReply(signal, ref->senderNode);
15229 }//Dbdih::execDIH_SWITCH_REPLICA_REF()
15231 void Dbdih::switchReplicaReply(Signal* signal,
15232 NodeId nodeId){
15233 jam();
15234 receiveLoopMacro(DIH_SWITCH_REPLICA_REQ, nodeId);
15235 //------------------------------------------------------
15236 // We have received all responses from the nodes. Thus
15237 // we have completed switching replica roles. Continue
15238 // with the next fragment.
15239 //------------------------------------------------------
15240 if(c_stopPermMaster.returnValue != 0){
15241 jam();
15242 c_switchReplicas.tableId = ctabFileSize + 1;
15243 }//if
15244 c_switchReplicas.fragNo++;
15246 signal->theData[0] = DihContinueB::SwitchReplica;
15247 signal->theData[1] = c_switchReplicas.nodeId;
15248 signal->theData[2] = c_switchReplicas.tableId;
15249 signal->theData[3] = c_switchReplicas.fragNo;
15250 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
15251 }//Dbdih::switchReplicaReply()
15253 void
15254 Dbdih::switchReplica(Signal* signal,
15255 Uint32 nodeId,
15256 Uint32 tableId,
15257 Uint32 fragNo){
15258 jam();
15259 DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
15261 const Uint32 RT_BREAK = 64;
15263 for (Uint32 i = 0; i < RT_BREAK; i++) {
15264 jam();
15265 if (tableId >= ctabFileSize) {
15266 jam();
15267 StopPermConf* const conf = (StopPermConf*)&signal->theData[0];
15268 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
15270 * Finished with all tables
15272 if(c_stopPermMaster.returnValue == 0) {
15273 jam();
15274 conf->senderData = c_stopPermMaster.clientData;
15275 sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_CONF,
15276 signal, 1, JBB);
15277 } else {
15278 jam();
15279 ref->senderData = c_stopPermMaster.clientData;
15280 ref->errorCode = c_stopPermMaster.returnValue;
15281 sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_REF, signal, 2,JBB);
15282 }//if
15285 * UnLock
15287 c_nodeStartMaster.activeState = false;
15288 c_stopPermMaster.clientRef = 0;
15289 c_stopPermMaster.clientData = 0;
15290 c_stopPermMaster.returnValue = 0;
15291 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
15292 mutex.unlock(); // ignore result
15293 return;
15294 }//if
15296 TabRecordPtr tabPtr;
15297 tabPtr.i = tableId;
15298 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
15300 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
15301 jam();
15302 tableId++;
15303 fragNo = 0;
15304 continue;
15305 }//if
15306 if (fragNo >= tabPtr.p->totalfragments) {
15307 jam();
15308 tableId++;
15309 fragNo = 0;
15310 continue;
15311 }//if
15312 FragmentstorePtr fragPtr;
15313 getFragstore(tabPtr.p, fragNo, fragPtr);
15315 Uint32 oldOrder[MAX_REPLICAS];
15316 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
15318 if(oldOrder[0] != nodeId) {
15319 jam();
15320 fragNo++;
15321 continue;
15322 }//if
15323 req->tableId = tableId;
15324 req->fragNo = fragNo;
15325 req->noOfReplicas = noOfReplicas;
15326 for (Uint32 i = 0; i < (noOfReplicas - 1); i++) {
15327 req->newNodeOrder[i] = oldOrder[i+1];
15328 }//for
15329 req->newNodeOrder[noOfReplicas-1] = nodeId;
15330 req->senderRef = reference();
15333 * Initialize struct
15335 c_switchReplicas.tableId = tableId;
15336 c_switchReplicas.fragNo = fragNo;
15337 c_switchReplicas.nodeId = nodeId;
15339 sendLoopMacro(DIH_SWITCH_REPLICA_REQ, sendDIH_SWITCH_REPLICA_REQ);
15340 return;
15341 }//for
15343 signal->theData[0] = DihContinueB::SwitchReplica;
15344 signal->theData[1] = nodeId;
15345 signal->theData[2] = tableId;
15346 signal->theData[3] = fragNo;
15347 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
15348 }//Dbdih::switchReplica()
15350 void Dbdih::execSTOP_ME_REQ(Signal* signal)
15352 jamEntry();
15353 StopMeReq* const req = (StopMeReq*)&signal->theData[0];
15354 const BlockReference senderRef = req->senderRef;
15355 const Uint32 senderData = req->senderData;
15356 const Uint32 nodeId = refToNode(senderRef);
15359 * Set node dead (remove from operations)
15361 NodeRecordPtr nodePtr;
15362 nodePtr.i = nodeId;
15363 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15364 nodePtr.p->useInTransactions = false;
15366 if (nodeId != getOwnNodeId()) {
15367 jam();
15368 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
15369 stopMeConf->senderData = senderData;
15370 stopMeConf->senderRef = reference();
15371 sendSignal(senderRef, GSN_STOP_ME_CONF, signal,
15372 StopMeConf::SignalLength, JBB);
15373 return;
15374 }//if
15377 * Local signal
15379 jam();
15380 ndbrequire(c_stopMe.clientRef == 0);
15382 c_stopMe.clientData = senderData;
15383 c_stopMe.clientRef = senderRef;
15385 req->senderData = senderData;
15386 req->senderRef = reference();
15388 sendLoopMacro(STOP_ME_REQ, sendSTOP_ME_REQ);
15391 * Send conf to self
15393 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
15394 stopMeConf->senderData = senderData;
15395 stopMeConf->senderRef = reference();
15396 sendSignal(reference(), GSN_STOP_ME_CONF, signal,
15397 StopMeConf::SignalLength, JBB);
15398 }//Dbdih::execSTOP_ME_REQ()
15400 void Dbdih::execSTOP_ME_REF(Signal* signal)
15402 ndbrequire(false);
15405 void Dbdih::execSTOP_ME_CONF(Signal* signal)
15407 jamEntry();
15408 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
15410 const Uint32 senderRef = stopMeConf->senderRef;
15411 const Uint32 senderData = stopMeConf->senderData;
15412 const Uint32 nodeId = refToNode(senderRef);
15414 ndbrequire(c_stopMe.clientRef != 0);
15415 ndbrequire(c_stopMe.clientData == senderData);
15417 receiveLoopMacro(STOP_ME_REQ, nodeId);
15418 //---------------------------------------------------------
15419 // All STOP_ME_REQ have been received. We will send the
15420 // confirmation back to the requesting block.
15421 //---------------------------------------------------------
15423 stopMeConf->senderRef = reference();
15424 stopMeConf->senderData = c_stopMe.clientData;
15425 sendSignal(c_stopMe.clientRef, GSN_STOP_ME_CONF, signal,
15426 StopMeConf::SignalLength, JBB);
15427 c_stopMe.clientRef = 0;
15428 }//Dbdih::execSTOP_ME_CONF()
15430 void Dbdih::execWAIT_GCP_REQ(Signal* signal)
15432 jamEntry();
15433 WaitGCPReq* const req = (WaitGCPReq*)&signal->theData[0];
15434 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
15435 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
15436 const Uint32 senderData = req->senderData;
15437 const BlockReference senderRef = req->senderRef;
15438 const Uint32 requestType = req->requestType;
15440 if(requestType == WaitGCPReq::CurrentGCI) {
15441 jam();
15442 conf->senderData = senderData;
15443 conf->gcp = cnewgcp;
15444 conf->blockStatus = cgcpOrderBlocked;
15445 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
15446 WaitGCPConf::SignalLength, JBB);
15447 return;
15448 }//if
15450 if (requestType == WaitGCPReq::BlockStartGcp)
15452 jam();
15453 conf->senderData = senderData;
15454 conf->gcp = cnewgcp;
15455 conf->blockStatus = cgcpOrderBlocked;
15456 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
15457 WaitGCPConf::SignalLength, JBB);
15458 cgcpOrderBlocked = 1;
15459 return;
15462 if (requestType == WaitGCPReq::UnblockStartGcp)
15464 jam();
15465 conf->senderData = senderData;
15466 conf->gcp = cnewgcp;
15467 conf->blockStatus = cgcpOrderBlocked;
15468 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
15469 WaitGCPConf::SignalLength, JBB);
15470 cgcpOrderBlocked = 0;
15471 return;
15474 if(isMaster()) {
15476 * Master
15478 jam();
15480 if((requestType == WaitGCPReq::CompleteIfRunning) &&
15481 (cgcpStatus == GCP_READY)) {
15482 jam();
15483 conf->senderData = senderData;
15484 conf->gcp = coldgcp;
15485 conf->blockStatus = cgcpOrderBlocked;
15486 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
15487 WaitGCPConf::SignalLength, JBB);
15488 return;
15489 }//if
15491 WaitGCPMasterPtr ptr;
15492 if(c_waitGCPMasterList.seize(ptr) == false){
15493 jam();
15494 ref->senderData = senderData;
15495 ref->errorCode = WaitGCPRef::NoWaitGCPRecords;
15496 sendSignal(senderRef, GSN_WAIT_GCP_REF, signal,
15497 WaitGCPRef::SignalLength, JBB);
15498 return;
15499 }//if
15500 ptr.p->clientRef = senderRef;
15501 ptr.p->clientData = senderData;
15503 if((requestType == WaitGCPReq::CompleteForceStart) &&
15504 (cgcpStatus == GCP_READY)) {
15505 jam();
15506 cstartGcpNow = true;
15507 }//if
15508 return;
15509 } else {
15510 /**
15511 * Proxy part
15513 jam();
15514 WaitGCPProxyPtr ptr;
15515 if (c_waitGCPProxyList.seize(ptr) == false) {
15516 jam();
15517 ref->senderData = senderData;
15518 ref->errorCode = WaitGCPRef::NoWaitGCPRecords;
15519 sendSignal(senderRef, GSN_WAIT_GCP_REF, signal,
15520 WaitGCPRef::SignalLength, JBB);
15521 return;
15522 }//if
15523 ptr.p->clientRef = senderRef;
15524 ptr.p->clientData = senderData;
15525 ptr.p->masterRef = cmasterdihref;
15527 req->senderData = ptr.i;
15528 req->senderRef = reference();
15529 req->requestType = requestType;
15531 sendSignal(cmasterdihref, GSN_WAIT_GCP_REQ, signal,
15532 WaitGCPReq::SignalLength, JBB);
15533 return;
15534 }//if
15535 }//Dbdih::execWAIT_GCP_REQ()
15537 void Dbdih::execWAIT_GCP_REF(Signal* signal)
15539 jamEntry();
15540 ndbrequire(!isMaster());
15541 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
15543 const Uint32 proxyPtr = ref->senderData;
15544 const Uint32 errorCode = ref->errorCode;
15546 WaitGCPProxyPtr ptr;
15547 ptr.i = proxyPtr;
15548 c_waitGCPProxyList.getPtr(ptr);
15550 ref->senderData = ptr.p->clientData;
15551 ref->errorCode = errorCode;
15552 sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_REF, signal,
15553 WaitGCPRef::SignalLength, JBB);
15555 c_waitGCPProxyList.release(ptr);
15556 }//Dbdih::execWAIT_GCP_REF()
15558 void Dbdih::execWAIT_GCP_CONF(Signal* signal)
15560 jamEntry();
15561 ndbrequire(!isMaster());
15562 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
15563 const Uint32 proxyPtr = conf->senderData;
15564 const Uint32 gcp = conf->gcp;
15565 WaitGCPProxyPtr ptr;
15567 ptr.i = proxyPtr;
15568 c_waitGCPProxyList.getPtr(ptr);
15570 conf->senderData = ptr.p->clientData;
15571 conf->gcp = gcp;
15572 conf->blockStatus = cgcpOrderBlocked;
15573 sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
15574 WaitGCPConf::SignalLength, JBB);
15576 c_waitGCPProxyList.release(ptr);
15577 }//Dbdih::execWAIT_GCP_CONF()
15579 void Dbdih::checkWaitGCPProxy(Signal* signal, NodeId failedNodeId)
15581 jam();
15582 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
15583 ref->errorCode = WaitGCPRef::NF_CausedAbortOfProcedure;
15585 WaitGCPProxyPtr ptr;
15586 c_waitGCPProxyList.first(ptr);
15587 while(ptr.i != RNIL) {
15588 jam();
15589 const Uint32 i = ptr.i;
15590 const Uint32 clientData = ptr.p->clientData;
15591 const BlockReference clientRef = ptr.p->clientRef;
15592 const BlockReference masterRef = ptr.p->masterRef;
15594 c_waitGCPProxyList.next(ptr);
15595 if(refToNode(masterRef) == failedNodeId) {
15596 jam();
15597 c_waitGCPProxyList.release(i);
15598 ref->senderData = clientData;
15599 sendSignal(clientRef, GSN_WAIT_GCP_REF, signal,
15600 WaitGCPRef::SignalLength, JBB);
15601 }//if
15602 }//while
15603 }//Dbdih::checkWaitGCPProxy()
15605 void Dbdih::checkWaitGCPMaster(Signal* signal, NodeId failedNodeId)
15607 jam();
15608 WaitGCPMasterPtr ptr;
15609 c_waitGCPMasterList.first(ptr);
15611 while (ptr.i != RNIL) {
15612 jam();
15613 const Uint32 i = ptr.i;
15614 const NodeId nodeId = refToNode(ptr.p->clientRef);
15616 c_waitGCPMasterList.next(ptr);
15617 if (nodeId == failedNodeId) {
15618 jam()
15619 c_waitGCPMasterList.release(i);
15620 }//if
15621 }//while
15622 }//Dbdih::checkWaitGCPMaster()
15624 void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
15626 jam();
15627 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
15628 conf->gcp = coldgcp;
15630 WaitGCPMasterPtr ptr;
15631 c_waitGCPMasterList.first(ptr);
15632 while(ptr.i != RNIL) {
15633 jam();
15634 const Uint32 i = ptr.i;
15635 const Uint32 clientData = ptr.p->clientData;
15636 const BlockReference clientRef = ptr.p->clientRef;
15638 c_waitGCPMasterList.next(ptr);
15639 conf->senderData = clientData;
15640 conf->blockStatus = cgcpOrderBlocked;
15641 sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
15642 WaitGCPConf::SignalLength, JBB);
15644 c_waitGCPMasterList.release(i);
15645 }//while
15646 }//Dbdih::emptyWaitGCPMasterQueue()
15648 void Dbdih::setNodeStatus(Uint32 nodeId, NodeRecord::NodeStatus newStatus)
15650 NodeRecordPtr nodePtr;
15651 nodePtr.i = nodeId;
15652 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15653 nodePtr.p->nodeStatus = newStatus;
15654 }//Dbdih::setNodeStatus()
15656 Dbdih::NodeRecord::NodeStatus Dbdih::getNodeStatus(Uint32 nodeId)
15658 NodeRecordPtr nodePtr;
15659 nodePtr.i = nodeId;
15660 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15661 return nodePtr.p->nodeStatus;
15662 }//Dbdih::getNodeStatus()
15664 Sysfile::ActiveStatus
15665 Dbdih::getNodeActiveStatus(Uint32 nodeId)
15667 NodeRecordPtr nodePtr;
15668 nodePtr.i = nodeId;
15669 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15670 return nodePtr.p->activeStatus;
15671 }//Dbdih::getNodeActiveStatus()
15674 void
15675 Dbdih::setNodeActiveStatus(Uint32 nodeId, Sysfile::ActiveStatus newStatus)
15677 NodeRecordPtr nodePtr;
15678 nodePtr.i = nodeId;
15679 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15680 nodePtr.p->activeStatus = newStatus;
15681 }//Dbdih::setNodeActiveStatus()
15683 void Dbdih::setAllowNodeStart(Uint32 nodeId, bool newState)
15685 NodeRecordPtr nodePtr;
15686 nodePtr.i = nodeId;
15687 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15688 nodePtr.p->allowNodeStart = newState;
15689 }//Dbdih::setAllowNodeStart()
15691 void Dbdih::setNodeCopyCompleted(Uint32 nodeId, bool newState)
15693 NodeRecordPtr nodePtr;
15694 nodePtr.i = nodeId;
15695 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15696 nodePtr.p->copyCompleted = newState;
15697 }//Dbdih::setNodeCopyCompleted()
15699 bool Dbdih::getAllowNodeStart(Uint32 nodeId)
15701 NodeRecordPtr nodePtr;
15702 nodePtr.i = nodeId;
15703 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15704 return nodePtr.p->allowNodeStart;
15705 }//Dbdih::getAllowNodeStart()
15707 bool Dbdih::getNodeCopyCompleted(Uint32 nodeId)
15709 NodeRecordPtr nodePtr;
15710 nodePtr.i = nodeId;
15711 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15712 return nodePtr.p->copyCompleted;
15713 }//Dbdih::getNodeCopyCompleted()
15715 bool Dbdih::checkNodeAlive(Uint32 nodeId)
15717 NodeRecordPtr nodePtr;
15718 nodePtr.i = nodeId;
15719 ndbrequire(nodeId > 0);
15720 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15721 if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
15722 return false;
15723 } else {
15724 return true;
15725 }//if
15726 }//Dbdih::checkNodeAlive()
15728 bool Dbdih::isMaster()
15730 return (reference() == cmasterdihref);
15731 }//Dbdih::isMaster()
15733 bool Dbdih::isActiveMaster()
15735 return ((reference() == cmasterdihref) && (cmasterState == MASTER_ACTIVE));
15736 }//Dbdih::isActiveMaster()
15738 Dbdih::NodeRecord::NodeRecord(){
15739 m_nodefailSteps.clear();
15740 gcpstate = NodeRecord::READY;
15742 activeStatus = Sysfile::NS_NotDefined;
15743 recNODE_FAILREP = ZFALSE;
15744 nodeGroup = ZNIL;
15745 dbtcFailCompleted = ZTRUE;
15746 dbdictFailCompleted = ZTRUE;
15747 dbdihFailCompleted = ZTRUE;
15748 dblqhFailCompleted = ZTRUE;
15749 noOfStartedChkpt = 0;
15750 noOfQueuedChkpt = 0;
15751 lcpStateAtTakeOver = (MasterLCPConf::State)255;
15753 activeTabptr = RNIL;
15754 nodeStatus = NodeRecord::NOT_IN_CLUSTER;
15755 useInTransactions = false;
15756 copyCompleted = false;
15757 allowNodeStart = true;
15760 // DICT lock slave
15762 void
15763 Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
15765 DictLockReq* req = (DictLockReq*)&signal->theData[0];
15766 DictLockSlavePtr lockPtr;
15768 c_dictLockSlavePool.seize(lockPtr);
15769 ndbrequire(lockPtr.i != RNIL);
15771 req->userPtr = lockPtr.i;
15772 req->lockType = lockType;
15773 req->userRef = reference();
15775 lockPtr.p->lockPtr = RNIL;
15776 lockPtr.p->lockType = lockType;
15777 lockPtr.p->locked = false;
15778 lockPtr.p->callback = c;
15780 // handle rolling upgrade
15782 Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
15784 const unsigned int get_major = getMajor(masterVersion);
15785 const unsigned int get_minor = getMinor(masterVersion);
15786 const unsigned int get_build = getBuild(masterVersion);
15787 ndbrequire(get_major >= 4);
15789 if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
15790 masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
15791 get_major == 5 && get_minor == 1 ||
15792 ERROR_INSERTED(7176)) {
15793 jam();
15795 infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u",
15796 (unsigned int)cmasterNodeId, get_major, get_minor, get_build);
15798 DictLockConf* conf = (DictLockConf*)&signal->theData[0];
15799 conf->userPtr = lockPtr.i;
15800 conf->lockType = lockType;
15801 conf->lockPtr = ZNIL;
15803 sendSignal(reference(), GSN_DICT_LOCK_CONF, signal,
15804 DictLockConf::SignalLength, JBB);
15805 return;
15809 BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
15810 sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
15811 DictLockReq::SignalLength, JBB);
15814 void
15815 Dbdih::execDICT_LOCK_CONF(Signal* signal)
15817 jamEntry();
15818 recvDictLockConf(signal);
15821 void
15822 Dbdih::execDICT_LOCK_REF(Signal* signal)
15824 jamEntry();
15825 ndbrequire(false);
15828 void
15829 Dbdih::recvDictLockConf(Signal* signal)
15831 const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
15833 DictLockSlavePtr lockPtr;
15834 c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
15836 lockPtr.p->lockPtr = conf->lockPtr;
15837 ndbrequire(lockPtr.p->lockType == conf->lockType);
15838 ndbrequire(lockPtr.p->locked == false);
15839 lockPtr.p->locked = true;
15841 lockPtr.p->callback.m_callbackData = lockPtr.i;
15842 execute(signal, lockPtr.p->callback, 0);
15845 void
15846 Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
15848 DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
15850 DictLockSlavePtr lockPtr;
15851 c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
15853 ord->lockPtr = lockPtr.p->lockPtr;
15854 ord->lockType = lockPtr.p->lockType;
15856 c_dictLockSlavePool.release(lockPtr);
15858 // handle rolling upgrade
15860 Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
15862 const unsigned int get_major = getMajor(masterVersion);
15863 const unsigned int get_minor = getMinor(masterVersion);
15864 ndbrequire(get_major >= 4);
15866 if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
15867 masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
15868 get_major == 5 && get_minor == 1 ||
15869 ERROR_INSERTED(7176)) {
15870 return;
15874 BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
15875 sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal,
15876 DictUnlockOrd::SignalLength, JBB);