From eb767412c6cc725473a7a23844c039a740f28983 Mon Sep 17 00:00:00 2001 From: aschwinn <al.schwinn@gsi.de> Date: Mon, 25 Sep 2017 15:58:17 +0200 Subject: [PATCH] #33 - Refactor isTimeToReconnect --- .../communication/SilecsConnection.cpp | 195 ++++++++---------- .../communication/SilecsConnection.h | 25 ++- 2 files changed, 95 insertions(+), 125 deletions(-) diff --git a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp index aa261e9..5893a30 100644 --- a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp +++ b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp @@ -27,7 +27,6 @@ namespace Silecs // static definition bool Connection::isAlive_ = false; -const unsigned int Connection::numberConn_ = 3; //------------------------------------------------------------------------------------------------------------------------------------------------ Connection::Connection(PLC* thePLC) @@ -42,18 +41,14 @@ Connection::Connection(PLC* thePLC) isConnected_ = false; //Reset the Reconnection mechanism - timeConn_ = NULL; - delayConn_ = UrgentConnection; //initial reconnection delay - remainConn_ = numberConn_; //initial number of attempt + lastReconnectionAttempt_ = 0; + reconnectDelay_ = shortDelay; //initial reconnection delay + reconnectAttempts_ = 0; } //------------------------------------------------------------------------------------------------------------------------------------------------ Connection::~Connection() { - //Connection has been closed before from the concrete connection object - if (timeConn_ != NULL) - delete timeConn_; - delete connMux_; delete writeMux_; delete readMux_; @@ -94,11 +89,6 @@ void Connection::disable(PLC* thePLC) //------------------------------------------------------------------------------------------------------------------------------------------------ bool Connection::doOpen(PLC* thePLC) { - //LOG((COMM|DIAG)) << "Start attempt to open connection ..."; .. commented, seem like this creates to many log-entries for KIBANA - bool isReachable = false; - bool isOpen = false; - bool justConnected = false; - { Lock lock(connMux_); if (isConnected_) @@ -117,75 +107,74 @@ bool Connection::doOpen(PLC* thePLC) return isConnected_; } - if (isTimeToReconnect()) + if (!isTimeToReconnect()) + { // Do nothing, just wait a bit to dont pullute the log + return isConnected_; + } + + if (ping(thePLC->getName().c_str(), NULL)) { - isReachable = (ping((char *)thePLC->getName().c_str(), NULL) == 0); - if (isReachable) - { - LOG((COMM|DIAG)) << "It's time to reconnect"; - - // It's time to open the connection according to the (re)connection timing - // Let's try several times with limited delay (some ms). - // It allows wake-up frozen PLC (SIEMENS in particular) after long stop period. - bool isOpen = false; - unsigned int nbConn = 2; //for fast low-level iteration - for (unsigned int i = 0; i < nbConn; i++) - { - LOG((COMM|DIAG)) << "Attempt to open PLC connection ...."; - isOpen = open(thePLC); - if (isOpen) - { - LOG((COMM|DIAG)) << "Connection opened successfully"; - break; - } - usleep(100000); // wait 100ms - } - } //pingable? - - if (!isOpen) - { - logError(thePLC, isReachable); - return isConnected_; - } + logError(thePLC, false); + return isConnected_; + } - if (thePLC->isSharedConnection()) - { - std::ostringstream os; - os << "Shared connection with " << thePLC->getName() << " is established."; - if (thePLC->theHeader_ != NULL) - TRACE("info") << os.str(); - LOG(COMM) << os.str(); - } - else + // It's time to open the connection according to the (re)connection timing + // Let's try several times with limited delay (some ms). + // It allows wake-up frozen PLC (SIEMENS in particular) after long stop period. + bool isOpen = false; + unsigned int nbConn = 2; //for fast low-level iteration + for (unsigned int i = 0; i < nbConn; i++) + { + LOG((COMM|DIAG)) << "Attempt to open PLC connection ...."; + isOpen = open(thePLC); + if (isOpen) { - std::ostringstream os; - os << "Connection with " << thePLC->getName() << ":" << thePLC->theCluster_->getClassName() << "/v" << thePLC->theCluster_->getClassVersion() << " is established."; - if (thePLC->theHeader_ != NULL) - TRACE("info") << os.str(); - LOG(COMM) << os.str(); + //reset reconnection settings + reconnectDelay_ = shortDelay; + reconnectAttempts_ = 0; + + isAlive_ = true; + isConnected_ = true; + LOG((COMM|DIAG)) << "Connection opened successfully"; + break; } + usleep(100000); // wait 100ms + } - isAlive_ = true; - isConnected_ = true; - justConnected = true; //retentive registers synchronization is required! - - //Connection status has changed: update the diagnostic variables - LOG((COMM|DIAG)) << "Updating PLC status"; - updateStatus(thePLC); + if (!isOpen) + { + logError(thePLC, true); + return isConnected_; + } + if (thePLC->isSharedConnection()) + { + std::ostringstream os; + os << "Shared connection with " << thePLC->getName() << " is established."; + if (thePLC->theHeader_ != NULL) + TRACE("info") << os.str(); + LOG(COMM) << os.str(); + } + else + { + std::ostringstream os; + os << "Connection with " << thePLC->getName() << ":" << thePLC->theCluster_->getClassName() << "/v" << thePLC->theCluster_->getClassVersion() << " is established."; + if (thePLC->theHeader_ != NULL) + TRACE("info") << os.str(); + LOG(COMM) << os.str(); } + + //Connection status has changed: update the diagnostic variables + LOG((COMM|DIAG)) << "Updating PLC status"; + updateStatus(thePLC); } //release lock /* Process the Retentive registers synchronization each time the PLC is just (re)connected. * This is a recursive call: performs task::execute method from doOpen that is * called into execute itself. The recursion is terminated when SilecsHeader connection is closed finally. */ - if (justConnected) - { - LOG((COMM|DIAG)) << "First connection - performing registers synchronization"; - thePLC->updateLocalData(); - } - LOG((COMM|DIAG)) << "isConnected_:" << isConnected_; + LOG((COMM|DIAG)) << "First connection - performing registers synchronization"; + thePLC->updateLocalData(); return isConnected_; } @@ -304,13 +293,6 @@ void Connection::updateStatus(PLC* thePLC) //Connection status has changed (opened/closed) //Update the PLC diagnostic variables thePLC->updateStatus(); - - //Reset reconnection mecanisme in case of connection succeed - if (isConnected_) - { - delayConn_ = UrgentConnection; //initial reconnection delay - remainConn_ = numberConn_; //initial number of attempt - } } //------------------------------------------------------------------------------------------------------------------------------------------------ @@ -318,16 +300,15 @@ void Connection::logError(PLC* thePLC, bool isReachable) { std::string errorMsg = isReachable ? "Connection with " + thePLC->getName() + ":" + thePLC->theCluster_->getClassName() + "/v" + thePLC->theCluster_->getClassVersion() + " has failed.\n" : "Controller " + thePLC->getName() + " does not respond to ping, might be OFF!\n"; - if (delayConn_ == LongTermConnection) + if (reconnectDelay_ == longDelay) { - if (remainConn_ > 0) + if (reconnectAttempts_ < MAX_CONNECTION_ATTEMPTS_PER_DELAY) { std::ostringstream os; - os << errorMsg << "Periodic attempt to reconnect, delay " << delayConn_ << "s (tracing off)."; + os << errorMsg << "Periodic attempt to reconnect, delay " << RECONNECTION_DELAYS[reconnectDelay_] << " seconds (tracing off)."; if (thePLC->theHeader_ != NULL) TRACE("error") << os.str(); LOG(COMM) << os.str(); - remainConn_ = 1; //Try to reconnect again and again (=1 means disable tracing). } /*else PLC does not respond anymore. It's probably stopped for a long time. @@ -337,7 +318,7 @@ void Connection::logError(PLC* thePLC, bool isReachable) else { std::ostringstream os; - os << errorMsg << "Next attempt to reconnect in " << delayConn_ << "s if requested. Remains: " << remainConn_; + os << errorMsg << "Next attempt to reconnect in " << RECONNECTION_DELAYS[reconnectDelay_] << " seconds."; if (thePLC->theHeader_ != NULL) TRACE("error") << os.str(); LOG(COMM) << os.str(); @@ -347,39 +328,25 @@ void Connection::logError(PLC* thePLC, bool isReachable) //------------------------------------------------------------------------------------------------------------------------------------------------ bool Connection::isTimeToReconnect() { - bool toBeReconnected = false; - if (timeConn_ != NULL) - { //how many time from the last connect attempt - double delay = timeConn_->getDelay(S_UNIT); - if (delay >= double(delayConn_)) - { - timeConn_->getValue(S_UNIT); //restart delay counting from now - toBeReconnected = true; + time_t now; + time(&now); + double secondsElapsed = difftime(now, lastReconnectionAttempt_); - if (remainConn_ > 0) - --remainConn_; - if (remainConn_ == 0) - { - if (delayConn_ != LongTermConnection) - { - if (delayConn_ == UrgentConnection) - delayConn_ = ShortTermConnection; - else - //(delayConn_ == ShortTermConnection) - delayConn_ = LongTermConnection; - remainConn_ = numberConn_; - } - } - } - } - else - { - //This is the first connection attempt, just start the Time counter. - timeConn_ = new TsCounter(true); //using hardware clock - timeConn_->getValue(S_UNIT); //start delay counting from now - toBeReconnected = true; - } - return toBeReconnected; + if ( secondsElapsed < RECONNECTION_DELAYS[reconnectDelay_]) + return false; + + lastReconnectionAttempt_ = now; + reconnectAttempts_ ++; + + if( reconnectAttempts_ < MAX_CONNECTION_ATTEMPTS_PER_DELAY) + return true; + + if( reconnectDelay_ < longDelay) + { + reconnectAttempts_ = 0; + reconnectDelay_ = static_cast<ReconnectionDelay>(reconnectDelay_+ 1); + } + return true; } /* This macro is used to trap the unexpected broken pipe and @@ -498,7 +465,7 @@ int rfcPing(char *ip, long ts) } /*..........................................................*/ -int Connection::ping(char *hostName, char *plcIP) +int Connection::ping(const char *hostName, char *plcIP) { struct in_addr addr; struct hostent *hp; diff --git a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h index 7c668b0..ffc6e65 100644 --- a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h +++ b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h @@ -24,13 +24,18 @@ namespace Silecs { class PLC; -// Time to wait (second) before next reconnection attempt typedef enum { - UrgentConnection = 2, - ShortTermConnection = 20, - LongTermConnection = 60 -} ConnectionDelay; + shortDelay = 0, + mediumDelay = 1, + longDelay = 2 +} ReconnectionDelay; + +// Time to wait (second) before next reconnection attempt +const static double RECONNECTION_DELAYS[3] = {2, 20, 60}; + +// Maximum muber of connection attempts before using longer delay +const static unsigned int MAX_CONNECTION_ATTEMPTS_PER_DELAY = 3; #ifdef __x86_64__ typedef long ChannelID; //pointer is 64bits word @@ -138,11 +143,9 @@ protected: Mutex* writeMux_; //Mutex used to protect the PLC write-channel resource Mutex* connMux_; //Mutex used to protect the global PLC connection resource (for open/close, etc.) - // Time counter to manage the automatic reconnection - TsCounter* timeConn_; - ConnectionDelay delayConn_; //current delay between two connection - unsigned long remainConn_; //number of attempt before next slow-down - static const unsigned int numberConn_; //number of connection attempt for each connection delay + time_t lastReconnectionAttempt_; + ReconnectionDelay reconnectDelay_; // time to wait before invoking reconnect (Seconds) + unsigned int reconnectAttempts_; // number of reconnection attempts ( will be cleared on successfull connect) /* ping function is a function used to check * if PLC is ON (~ping) before trying to connect it. @@ -161,7 +164,7 @@ protected: * intermediate connect_nonb() function is used to perform a non-blockant connection. * intermediate rfcPing() function is used to check if PLC is ON */ - static int ping(char *hostName, char *plcIP); + static int ping(const char *hostName, char *plcIP); // Communication Diagnostic & Monitoring static bool isAlive_; // PLC has repliyed to the ping: true/false -- GitLab