From eb767412c6cc725473a7a23844c039a740f28983 Mon Sep 17 00:00:00 2001
From: aschwinn <al.schwinn@gsi.de>
Date: Mon, 25 Sep 2017 15:58:17 +0200
Subject: [PATCH] #33 - Refactor isTimeToReconnect

---
 .../communication/SilecsConnection.cpp        | 195 ++++++++----------
 .../communication/SilecsConnection.h          |  25 ++-
 2 files changed, 95 insertions(+), 125 deletions(-)

diff --git a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp
index aa261e9..5893a30 100644
--- a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp
+++ b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.cpp
@@ -27,7 +27,6 @@ namespace Silecs
 
 // static definition
 bool Connection::isAlive_ = false;
-const unsigned int Connection::numberConn_ = 3;
 
 //------------------------------------------------------------------------------------------------------------------------------------------------
 Connection::Connection(PLC* thePLC)
@@ -42,18 +41,14 @@ Connection::Connection(PLC* thePLC)
     isConnected_ = false;
 
     //Reset the Reconnection mechanism
-    timeConn_ = NULL;
-    delayConn_ = UrgentConnection; //initial reconnection delay
-    remainConn_ = numberConn_; //initial number of attempt
+    lastReconnectionAttempt_ = 0;
+    reconnectDelay_ = shortDelay; //initial reconnection delay
+    reconnectAttempts_ = 0;
 }
 
 //------------------------------------------------------------------------------------------------------------------------------------------------
 Connection::~Connection()
 {
-    //Connection has been closed before from the concrete connection object
-    if (timeConn_ != NULL)
-        delete timeConn_;
-
     delete connMux_;
     delete writeMux_;
     delete readMux_;
@@ -94,11 +89,6 @@ void Connection::disable(PLC* thePLC)
 //------------------------------------------------------------------------------------------------------------------------------------------------
 bool Connection::doOpen(PLC* thePLC)
 {
-    //LOG((COMM|DIAG)) << "Start attempt to open connection ..."; .. commented, seem like this creates to many log-entries for KIBANA
-    bool isReachable = false;
-    bool isOpen = false;
-    bool justConnected = false;
-
     {
         Lock lock(connMux_);
         if (isConnected_)
@@ -117,75 +107,74 @@ bool Connection::doOpen(PLC* thePLC)
             return isConnected_;
         }
 
-        if (isTimeToReconnect())
+        if (!isTimeToReconnect())
+        { // Do nothing, just wait a bit to dont pullute the log
+          return isConnected_;
+        }
+
+        if (ping(thePLC->getName().c_str(), NULL))
         {
-            isReachable = (ping((char *)thePLC->getName().c_str(), NULL) == 0);
-            if (isReachable)
-            {
-                LOG((COMM|DIAG)) << "It's time to reconnect";
-
-                // It's time to open the connection according to the (re)connection timing
-                // Let's try several times with limited delay (some ms).
-                // It allows wake-up frozen PLC (SIEMENS in particular) after long stop period.
-                bool isOpen = false;
-                unsigned int nbConn = 2; //for fast low-level iteration
-                for (unsigned int i = 0; i < nbConn; i++)
-                {
-                    LOG((COMM|DIAG)) << "Attempt to open PLC connection ....";
-                    isOpen = open(thePLC);
-                    if (isOpen)
-                    {
-                        LOG((COMM|DIAG)) << "Connection opened successfully";
-                        break;
-                    }
-                    usleep(100000); // wait 100ms
-                }
-            } //pingable?
-
-            if (!isOpen)
-            {
-                logError(thePLC, isReachable);
-                return isConnected_;
-            }
+          logError(thePLC, false);
+          return isConnected_;
+        }
 
-            if (thePLC->isSharedConnection())
-            {
-                std::ostringstream os;
-                os << "Shared connection with " << thePLC->getName() << " is established.";
-                if (thePLC->theHeader_ != NULL)
-                    TRACE("info") << os.str();
-                LOG(COMM) << os.str();
-            }
-            else
+        // It's time to open the connection according to the (re)connection timing
+        // Let's try several times with limited delay (some ms).
+        // It allows wake-up frozen PLC (SIEMENS in particular) after long stop period.
+        bool isOpen = false;
+        unsigned int nbConn = 2; //for fast low-level iteration
+        for (unsigned int i = 0; i < nbConn; i++)
+        {
+            LOG((COMM|DIAG)) << "Attempt to open PLC connection ....";
+            isOpen = open(thePLC);
+            if (isOpen)
             {
-                std::ostringstream os;
-                os << "Connection with " << thePLC->getName() << ":" << thePLC->theCluster_->getClassName() << "/v" << thePLC->theCluster_->getClassVersion() << " is established.";
-                if (thePLC->theHeader_ != NULL)
-                    TRACE("info") << os.str();
-                LOG(COMM) << os.str();
+               //reset reconnection settings
+                reconnectDelay_ = shortDelay;
+                reconnectAttempts_ = 0;
+
+                isAlive_ = true;
+                isConnected_ = true;
+                LOG((COMM|DIAG)) << "Connection opened successfully";
+                break;
             }
+            usleep(100000); // wait 100ms
+        }
 
-            isAlive_ = true;
-            isConnected_ = true;
-            justConnected = true; //retentive registers synchronization is required!
-
-            //Connection status has changed: update the diagnostic variables
-            LOG((COMM|DIAG)) << "Updating PLC status";
-            updateStatus(thePLC);
+        if (!isOpen)
+        {
+            logError(thePLC, true);
+            return isConnected_;
+        }
 
+        if (thePLC->isSharedConnection())
+        {
+            std::ostringstream os;
+            os << "Shared connection with " << thePLC->getName() << " is established.";
+            if (thePLC->theHeader_ != NULL)
+                TRACE("info") << os.str();
+            LOG(COMM) << os.str();
+        }
+        else
+        {
+            std::ostringstream os;
+            os << "Connection with " << thePLC->getName() << ":" << thePLC->theCluster_->getClassName() << "/v" << thePLC->theCluster_->getClassVersion() << " is established.";
+            if (thePLC->theHeader_ != NULL)
+                TRACE("info") << os.str();
+            LOG(COMM) << os.str();
         }
+
+        //Connection status has changed: update the diagnostic variables
+        LOG((COMM|DIAG)) << "Updating PLC status";
+        updateStatus(thePLC);
     } //release lock
 
     /* Process the Retentive registers synchronization each time the PLC is just (re)connected.
      * This is a recursive call: performs task::execute method from doOpen that is
      * called into execute itself. The recursion is terminated when SilecsHeader connection is closed finally.
      */
-    if (justConnected)
-    {
-        LOG((COMM|DIAG)) << "First connection - performing registers synchronization";
-        thePLC->updateLocalData();
-    }
-    LOG((COMM|DIAG)) << "isConnected_:" << isConnected_;
+    LOG((COMM|DIAG)) << "First connection - performing registers synchronization";
+    thePLC->updateLocalData();
     return isConnected_;
 }
 
@@ -304,13 +293,6 @@ void Connection::updateStatus(PLC* thePLC)
     //Connection status has changed (opened/closed)
     //Update the PLC diagnostic variables
     thePLC->updateStatus();
-
-    //Reset reconnection mecanisme in case of connection succeed
-    if (isConnected_)
-    {
-        delayConn_ = UrgentConnection; //initial reconnection delay
-        remainConn_ = numberConn_; //initial number of attempt
-    }
 }
 
 //------------------------------------------------------------------------------------------------------------------------------------------------
@@ -318,16 +300,15 @@ void Connection::logError(PLC* thePLC, bool isReachable)
 {
     std::string errorMsg = isReachable ? "Connection with " + thePLC->getName() + ":" + thePLC->theCluster_->getClassName() + "/v" + thePLC->theCluster_->getClassVersion() + " has failed.\n" : "Controller " + thePLC->getName() + " does not respond to ping, might be OFF!\n";
 
-    if (delayConn_ == LongTermConnection)
+    if (reconnectDelay_ == longDelay)
     {
-        if (remainConn_ > 0)
+        if (reconnectAttempts_ <  MAX_CONNECTION_ATTEMPTS_PER_DELAY)
         {
             std::ostringstream os;
-            os << errorMsg << "Periodic attempt to reconnect, delay " << delayConn_ << "s (tracing off).";
+            os << errorMsg << "Periodic attempt to reconnect, delay " << RECONNECTION_DELAYS[reconnectDelay_] << " seconds (tracing off).";
             if (thePLC->theHeader_ != NULL)
                 TRACE("error") << os.str();
             LOG(COMM) << os.str();
-            remainConn_ = 1; //Try to reconnect again and again (=1 means disable tracing).
         }
         /*else
          PLC does not respond anymore. It's probably stopped for a long time.
@@ -337,7 +318,7 @@ void Connection::logError(PLC* thePLC, bool isReachable)
     else
     {
         std::ostringstream os;
-        os << errorMsg << "Next attempt to reconnect in " << delayConn_ << "s if requested. Remains: " << remainConn_;
+        os << errorMsg << "Next attempt to reconnect in " << RECONNECTION_DELAYS[reconnectDelay_] << " seconds.";
         if (thePLC->theHeader_ != NULL)
             TRACE("error") << os.str();
         LOG(COMM) << os.str();
@@ -347,39 +328,25 @@ void Connection::logError(PLC* thePLC, bool isReachable)
 //------------------------------------------------------------------------------------------------------------------------------------------------
 bool Connection::isTimeToReconnect()
 {
-    bool toBeReconnected = false;
-    if (timeConn_ != NULL)
-    { //how many time from the last connect attempt
-        double delay = timeConn_->getDelay(S_UNIT);
-        if (delay >= double(delayConn_))
-        {
-            timeConn_->getValue(S_UNIT); //restart delay counting from now
-            toBeReconnected = true;
+  time_t now;
+  time(&now);
+  double secondsElapsed = difftime(now, lastReconnectionAttempt_);
 
-            if (remainConn_ > 0)
-                --remainConn_;
-            if (remainConn_ == 0)
-            {
-                if (delayConn_ != LongTermConnection)
-                {
-                    if (delayConn_ == UrgentConnection)
-                        delayConn_ = ShortTermConnection;
-                    else
-                        //(delayConn_ == ShortTermConnection)
-                        delayConn_ = LongTermConnection;
-                    remainConn_ = numberConn_;
-                }
-            }
-        }
-    }
-    else
-    {
-        //This is the first connection attempt, just start the Time counter.
-        timeConn_ = new TsCounter(true); //using hardware clock
-        timeConn_->getValue(S_UNIT); //start delay counting from now
-        toBeReconnected = true;
-    }
-    return toBeReconnected;
+  if ( secondsElapsed < RECONNECTION_DELAYS[reconnectDelay_])
+    return false;
+
+  lastReconnectionAttempt_ = now;
+  reconnectAttempts_ ++;
+
+  if( reconnectAttempts_ < MAX_CONNECTION_ATTEMPTS_PER_DELAY)
+    return true;
+
+  if( reconnectDelay_ < longDelay)
+  {
+    reconnectAttempts_ = 0;
+    reconnectDelay_ =  static_cast<ReconnectionDelay>(reconnectDelay_+ 1);
+  }
+  return true;
 }
 
 /* This macro is used to trap the unexpected broken pipe and
@@ -498,7 +465,7 @@ int rfcPing(char *ip, long ts)
 }
 
 /*..........................................................*/
-int Connection::ping(char *hostName, char *plcIP)
+int Connection::ping(const char *hostName, char *plcIP)
 {
     struct in_addr addr;
     struct hostent *hp;
diff --git a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h
index 7c668b0..ffc6e65 100644
--- a/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h
+++ b/silecs-communication-cpp/src/silecs-communication/interface/communication/SilecsConnection.h
@@ -24,13 +24,18 @@ namespace Silecs
 {
 class PLC;
 
-// Time to wait (second) before next reconnection attempt
 typedef enum
 {
-    UrgentConnection = 2,
-    ShortTermConnection = 20,
-    LongTermConnection = 60
-} ConnectionDelay;
+  shortDelay  = 0,
+  mediumDelay = 1,
+  longDelay   = 2
+} ReconnectionDelay;
+
+// Time to wait (second) before next reconnection attempt
+const static double RECONNECTION_DELAYS[3] = {2, 20, 60};
+
+// Maximum muber of connection attempts before using longer delay
+const static unsigned int MAX_CONNECTION_ATTEMPTS_PER_DELAY = 3;
 
 #ifdef __x86_64__
 typedef long ChannelID; //pointer is 64bits word
@@ -138,11 +143,9 @@ protected:
     Mutex* writeMux_; //Mutex used to protect the PLC write-channel resource
     Mutex* connMux_; //Mutex used to protect the global PLC connection resource (for open/close, etc.)
 
-    // Time counter to manage the automatic reconnection
-    TsCounter* timeConn_;
-    ConnectionDelay delayConn_; //current delay between two connection
-    unsigned long remainConn_; //number of attempt before next slow-down
-    static const unsigned int numberConn_; //number of connection attempt for each connection delay
+    time_t lastReconnectionAttempt_;
+    ReconnectionDelay reconnectDelay_; // time to wait before invoking reconnect (Seconds)
+    unsigned int reconnectAttempts_; // number of reconnection attempts ( will be cleared on successfull connect)
 
     /* ping function is a function used to check
      * if PLC is ON (~ping) before trying to connect it.
@@ -161,7 +164,7 @@ protected:
      * intermediate connect_nonb() function is used to perform a non-blockant connection.
      * intermediate rfcPing() function is used to check if PLC is ON
      */
-    static int ping(char *hostName, char *plcIP);
+    static int ping(const char *hostName, char *plcIP);
 
     // Communication Diagnostic & Monitoring
     static bool isAlive_; // PLC has repliyed to the ping: true/false
-- 
GitLab