Discussion:
[Linuxptp-devel] [PATCH v3 RFC 1/3] Fix fault-clearing timer being erroneously re-armed
Richard Cochran
2013-02-01 04:22:43 UTC
Permalink
From: Delio Brignoli <***@audioscience.com>

Arm the fault-clearing timer only when an event causes a port to change state
to PS_FAULTY. Previously, if poll() returned because of an fd event other than
the fault-clearing timeout, the fault clearing timer would re-arm for
each port in PS_FAULTY state.

Signed-off-by: Delio Brignoli <***@audioscience.com>
---
clock.c | 9 ++++-----
1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/clock.c b/clock.c
index b0a4de6..5d5ccbf 100644
--- a/clock.c
+++ b/clock.c
@@ -719,6 +719,10 @@ int clock_poll(struct clock *c)
if (EV_ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES == event)
lost = 1;
port_dispatch(c->port[i], event, 0);
+ /* Clear any fault after a little while. */
+ if (PS_FAULTY == port_state(c->port[i])) {
+ clock_fault_timeout(c, i, 1);
+ }
}
}

@@ -728,11 +732,6 @@ int clock_poll(struct clock *c)
clock_fault_timeout(c, i, 0);
port_dispatch(c->port[i], EV_FAULT_CLEARED, 0);
}
-
- /* Clear any fault after a little while. */
- if (PS_FAULTY == port_state(c->port[i])) {
- clock_fault_timeout(c, i, 1);
- }
}

/* Check the UDS port. */
--
1.7.2.5
Richard Cochran
2013-02-01 04:22:45 UTC
Permalink
If the port resets itself after detecting a fault, then the polling events
for that port are no longer valid. This patch fixes a latent bug that
would appear if a fault and another event were to happen simultaneously.

Signed-off-by: Richard Cochran <***@gmail.com>
---
clock.c | 6 +++---
port.c | 7 ++++---
port.h | 4 +++-
3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/clock.c b/clock.c
index 880effc..704fcdc 100644
--- a/clock.c
+++ b/clock.c
@@ -691,7 +691,7 @@ struct PortIdentity clock_parent_identity(struct clock *c)

int clock_poll(struct clock *c)
{
- int cnt, i, j, k, lost = 0, sde = 0;
+ int cnt, err, i, j, k, lost = 0, sde = 0;
enum fsm_event event;

cnt = poll(c->pollfd, ARRAY_SIZE(c->pollfd), -1);
@@ -709,7 +709,7 @@ int clock_poll(struct clock *c)
for (i = 0; i < c->nports; i++) {

/* Let the ports handle their events. */
- for (j = 0; j < N_POLLFD; j++) {
+ for (j = err = 0; j < N_POLLFD && !err; j++) {
k = N_CLOCK_PFD * i + j;
if (c->pollfd[k].revents & (POLLIN|POLLPRI)) {
event = port_event(c->port[i], j);
@@ -717,7 +717,7 @@ int clock_poll(struct clock *c)
sde = 1;
if (EV_ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES == event)
lost = 1;
- port_dispatch(c->port[i], event, 0);
+ err = port_dispatch(c->port[i], event, 0);
/* Clear any fault after a little while. */
if (PS_FAULTY == port_state(c->port[i])) {
clock_fault_timeout(c, i, 1);
diff --git a/port.c b/port.c
index c486812..9184ea0 100644
--- a/port.c
+++ b/port.c
@@ -1406,7 +1406,7 @@ struct foreign_clock *port_compute_best(struct port *p)
return p->best;
}

-void port_dispatch(struct port *p, enum fsm_event event, int mdiff)
+int port_dispatch(struct port *p, enum fsm_event event, int mdiff)
{
enum port_state next;

@@ -1432,11 +1432,11 @@ void port_dispatch(struct port *p, enum fsm_event event, int mdiff)
next = port_initialize(p) ? PS_FAULTY : PS_LISTENING;
port_show_transition(p, next, event);
p->state = next;
- return;
+ return 1;
}

if (next == p->state)
- return;
+ return 0;

port_show_transition(p, next, event);

@@ -1493,6 +1493,7 @@ void port_dispatch(struct port *p, enum fsm_event event, int mdiff)
};
}
p->state = next;
+ return 0;
}

enum fsm_event port_event(struct port *p, int fd_index)
diff --git a/port.h b/port.h
index 2fc71e4..55c15e7 100644
--- a/port.h
+++ b/port.h
@@ -68,8 +68,10 @@ struct foreign_clock *port_compute_best(struct port *port);
* @param port A pointer previously obtained via port_open().
* @param event One of the @a fsm_event codes.
* @param mdiff Whether a new master has been selected.
+ * @return Zero if the port's file descriptor array is still valid,
+ * and non-zero if it has become invalid.
*/
-void port_dispatch(struct port *p, enum fsm_event event, int mdiff);
+int port_dispatch(struct port *p, enum fsm_event event, int mdiff);

/**
* Generates state machine events based on activity on a port's file
--
1.7.2.5
Richard Cochran
2013-02-01 04:22:44 UTC
Permalink
From: Delio Brignoli <***@audioscience.com>

A timeout of 15 seconds is not always acceptable, make it configurable.

[RC - moved the handling of special case tmo=0 and added a break out
of the fd event loop in case the fds have been closed.]

Signed-off-by: Delio Brignoli <***@audioscience.com>
Signed-off-by: Richard Cochran <***@gmail.com>
---
clock.c | 10 +++++-----
config.c | 5 +++++
default.cfg | 1 +
ds.h | 1 +
gPTP.cfg | 1 +
port.c | 3 ++-
ptp4l.8 | 5 +++++
ptp4l.c | 1 +
8 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/clock.c b/clock.c
index 5d5ccbf..880effc 100644
--- a/clock.c
+++ b/clock.c
@@ -37,7 +37,6 @@
#include "util.h"

#define CLK_N_PORTS (MAX_PORTS + 1) /* plus one for the UDS interface */
-#define FAULT_RESET_SECONDS 15
#define N_CLOCK_PFD (N_POLLFD + 1) /* one extra per port, for the fault timer */
#define MAVE_LENGTH 10
#define POW2_41 ((double)(1ULL << 41))
@@ -65,7 +64,7 @@ struct clock {
struct port *port[CLK_N_PORTS];
struct pollfd pollfd[CLK_N_PORTS*N_CLOCK_PFD];
int fault_fd[CLK_N_PORTS];
- time_t fault_timeout;
+ time_t fault_timeout[CLK_N_PORTS];
int nports; /* does not include the UDS port */
int free_running;
int freq_est_interval;
@@ -115,8 +114,8 @@ static int clock_fault_timeout(struct clock *c, int index, int set)
};
if (set) {
pr_debug("waiting %d seconds to clear fault on port %d",
- c->fault_timeout, index);
- tmo.it_value.tv_sec = c->fault_timeout;
+ c->fault_timeout[index], index);
+ tmo.it_value.tv_sec = c->fault_timeout[index];
} else {
pr_debug("clearing fault on port %d", index);
}
@@ -479,10 +478,10 @@ struct clock *clock_create(int phc_index, struct interface *iface, int count,
c->pollfd[i].events = 0;
}

- c->fault_timeout = FAULT_RESET_SECONDS;
c->fest.max_count = 2;

for (i = 0; i < count; i++) {
+ c->fault_timeout[i] = iface[i].pod.fault_reset_seconds;
c->port[i] = port_open(phc_index, timestamping, 1+i, &iface[i], c);
if (!c->port[i]) {
pr_err("failed to open port %s", iface[i].name);
@@ -722,6 +721,7 @@ int clock_poll(struct clock *c)
/* Clear any fault after a little while. */
if (PS_FAULTY == port_state(c->port[i])) {
clock_fault_timeout(c, i, 1);
+ break;
}
}
}
diff --git a/config.c b/config.c
index a4a6261..379733f 100644
--- a/config.c
+++ b/config.c
@@ -106,6 +106,11 @@ static enum parser_result parse_pod_setting(const char *option,
return BAD_VALUE;
pod->follow_up_info = val ? 1 : 0;

+ } else if (!strcmp(option, "fault_reset_seconds")) {
+ if (1 != sscanf(value, "%u", &val))
+ return BAD_VALUE;
+ pod->fault_reset_seconds = val;
+
} else
return NOT_PARSED;

diff --git a/default.cfg b/default.cfg
index ee2700d..87ace08 100644
--- a/default.cfg
+++ b/default.cfg
@@ -21,6 +21,7 @@ logMinDelayReqInterval 0
logMinPdelayReqInterval 0
announceReceiptTimeout 3
delayAsymmetry 0
+fault_reset_seconds 15
#
# Run time options
#
diff --git a/ds.h b/ds.h
index 514679b..43cd4a6 100644
--- a/ds.h
+++ b/ds.h
@@ -112,6 +112,7 @@ struct port_defaults {
int path_trace_enabled;
int follow_up_info;
int freq_est_interval; /*log seconds*/
+ int fault_reset_seconds;
};

#endif
diff --git a/gPTP.cfg b/gPTP.cfg
index f186e78..90a55ac 100644
--- a/gPTP.cfg
+++ b/gPTP.cfg
@@ -20,6 +20,7 @@ logSyncInterval -3
logMinPdelayReqInterval 0
announceReceiptTimeout 3
delayAsymmetry 0
+fault_reset_seconds 15
#
# Run time options
#
diff --git a/port.c b/port.c
index 8b03bd9..c486812 100644
--- a/port.c
+++ b/port.c
@@ -1419,7 +1419,8 @@ void port_dispatch(struct port *p, enum fsm_event event, int mdiff)
next = ptp_fsm(p->state, event, mdiff);
}

- if (PS_INITIALIZING == next) {
+ if (PS_INITIALIZING == next ||
+ (PS_FAULTY == next && 0 == p->pod.fault_reset_seconds)) {
/*
* This is a special case. Since we initialize the
* port immediately, we can skip right to listening
diff --git a/ptp4l.8 b/ptp4l.8
index fadf854..791236c 100644
--- a/ptp4l.8
+++ b/ptp4l.8
@@ -181,6 +181,11 @@ The default is 0 (disabled).
Include the 802.1AS data in the Follow_Up messages if enabled.
The default is 0 (disabled).
.TP
+.B fault_reset_seconds
+The time in seconds between the detection of a port's fault and the fault
+being reset.
+The default is 15 (15 seconds).
+.TP
.B delay_mechanism
Select the delay mechanism. Possible values are E2E, P2P and Auto.
The default is E2E.
diff --git a/ptp4l.c b/ptp4l.c
index 4fc0c88..0ce6a5c 100644
--- a/ptp4l.c
+++ b/ptp4l.c
@@ -64,6 +64,7 @@ static struct config cfg_settings = {
.path_trace_enabled = 0,
.follow_up_info = 0,
.freq_est_interval = 1,
+ .fault_reset_seconds = 15,
},

.timestamping = TS_HARDWARE,
--
1.7.2.5
Loading...