Discussion:
[Linuxptp-devel] [PATCH] Do not qualify announce messages with stepsRemoved >= 255
Delio Brignoli
2013-08-21 12:09:51 UTC
Permalink
See IEEE1588-2008 section 9.3.2.5 (d)

Signed-off-by: Delio Brignoli <***@audioscience.com>
---
port.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/port.c b/port.c
index 3e7a6c3..6644685 100644
--- a/port.c
+++ b/port.c
@@ -1382,6 +1382,13 @@ struct dataset *port_best_foreign(struct port *port)
static int process_announce(struct port *p, struct ptp_message *m)
{
int result = 0;
+
+ /* Do not qualify announce messages with stepsRemoved >= 255, see
+ * IEEE1588-2008 section 9.3.2.5 (d)
+ */
+ if (m->announce.stepsRemoved >= 255)
+ return result;
+
switch (p->state) {
case PS_INITIALIZING:
case PS_FAULTY:
--
1.7.0.4
Delio Brignoli
2013-08-21 12:09:52 UTC
Permalink
The previous behaviour was to assume a stale socket under the above
conditions and re-initialize the transport. Now the socket is checked
using getsockopt() and the transport re-initialized in case of an error.

UDP and UDP6 implementations are untested.

Signed-off-by: Delio Brignoli <***@audioscience.com>
---
port.c | 13 +++++++++++--
raw.c | 12 ++++++++++++
transport.c | 5 +++++
transport.h | 2 ++
transport_private.h | 2 ++
udp.c | 12 ++++++++++++
udp6.c | 12 ++++++++++++
7 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/port.c b/port.c
index 6644685..65f45d6 100644
--- a/port.c
+++ b/port.c
@@ -1331,6 +1331,14 @@ static int port_renew_transport(struct port *p)
return 0;
}

+static int port_check_transport(struct port *p)
+{
+ if (!port_is_enabled(p)) {
+ return 0;
+ }
+ return transport_check(p->trp, &p->fda);;
+}
+
/*
* Returns non-zero if the announce message is different than last.
*/
@@ -1991,8 +1999,9 @@ enum fsm_event port_event(struct port *p, int fd_index)
if (p->best)
fc_clear(p->best);
port_set_announce_tmo(p);
- if (clock_slave_only(p->clock) && port_renew_transport(p)) {
- return EV_FAULT_DETECTED;
+ if (clock_slave_only(p->clock) && port_check_transport(p)) {
+ if (port_renew_transport(p))
+ return EV_FAULT_DETECTED;
}
return EV_ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES;

diff --git a/raw.c b/raw.c
index 326ccd2..18bae2c 100644
--- a/raw.c
+++ b/raw.c
@@ -138,6 +138,17 @@ static int raw_configure(int fd, int event, int index,
return -1;
}

+static int raw_check(struct transport *t, struct fdarray *fda)
+{
+ int sockerror = 0;
+ socklen_t len = sizeof(sockerror);
+ int err1 = getsockopt(fda->fd[FD_EVENT], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ int err2 = getsockopt(fda->fd[FD_GENERAL], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ return err1 || err2;
+}
+
static int raw_close(struct transport *t, struct fdarray *fda)
{
close(fda->fd[0]);
@@ -315,6 +326,7 @@ struct transport *raw_transport_create(void)
raw = calloc(1, sizeof(*raw));
if (!raw)
return NULL;
+ raw->t.check = raw_check;
raw->t.close = raw_close;
raw->t.open = raw_open;
raw->t.recv = raw_recv;
diff --git a/transport.c b/transport.c
index 3c70b2b..8af2f9d 100644
--- a/transport.c
+++ b/transport.c
@@ -24,6 +24,11 @@
#include "udp6.h"
#include "uds.h"

+int transport_check(struct transport *t, struct fdarray *fda)
+{
+ return t->check(t, fda);
+}
+
int transport_close(struct transport *t, struct fdarray *fda)
{
return t->close(t, fda);
diff --git a/transport.h b/transport.h
index 46af456..8581436 100644
--- a/transport.h
+++ b/transport.h
@@ -61,6 +61,8 @@ struct hw_timestamp {

struct transport;

+int transport_check(struct transport *t, struct fdarray *fda);
+
int transport_close(struct transport *t, struct fdarray *fda);

int transport_open(struct transport *t, char *name,
diff --git a/transport_private.h b/transport_private.h
index 0c553e1..5fca9f2 100644
--- a/transport_private.h
+++ b/transport_private.h
@@ -28,6 +28,8 @@
struct transport {
enum transport_type type;

+ int (*check)(struct transport *t, struct fdarray *fda);
+
int (*close)(struct transport *t, struct fdarray *fda);

int (*open)(struct transport *t, char *name, struct fdarray *fda,
diff --git a/udp.c b/udp.c
index fea0122..6274a31 100644
--- a/udp.c
+++ b/udp.c
@@ -86,6 +86,17 @@ static int mcast_join(int fd, int index, const struct sockaddr *grp,
return 0;
}

+static int udp_check(struct transport *t, struct fdarray *fda)
+{
+ int sockerror = 0;
+ socklen_t len = sizeof(sockerror);
+ int err1 = getsockopt(fda->fd[FD_EVENT], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ int err2 = getsockopt(fda->fd[FD_GENERAL], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ return err1 || err2;
+}
+
static int udp_close(struct transport *t, struct fdarray *fda)
{
close(fda->fd[0]);
@@ -256,6 +267,7 @@ struct transport *udp_transport_create(void)
struct udp *udp = calloc(1, sizeof(*udp));
if (!udp)
return NULL;
+ udp->t.check = udp_check;
udp->t.close = udp_close;
udp->t.open = udp_open;
udp->t.recv = udp_recv;
diff --git a/udp6.c b/udp6.c
index 2cb0179..a3ea6e5 100644
--- a/udp6.c
+++ b/udp6.c
@@ -93,6 +93,17 @@ static int mc_join(int fd, int index, const struct sockaddr *grp, socklen_t grpl
return 0;
}

+static int udp6_check(struct transport *t, struct fdarray *fda)
+{
+ int sockerror = 0;
+ socklen_t len = sizeof(sockerror);
+ int err1 = getsockopt(fda->fd[FD_EVENT], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ int err2 = getsockopt(fda->fd[FD_GENERAL], SOL_SOCKET, SO_ERROR,
+ &sockerror, &len);
+ return err1 || err2;
+}
+
static int udp6_close(struct transport *t, struct fdarray *fda)
{
close(fda->fd[0]);
@@ -268,6 +279,7 @@ struct transport *udp6_transport_create(void)
udp6 = calloc(1, sizeof(*udp6));
if (!udp6)
return NULL;
+ udp6->t.check = udp6_check;
udp6->t.close = udp6_close;
udp6->t.open = udp6_open;
udp6->t.recv = udp6_recv;
--
1.7.0.4
Richard Cochran
2013-08-26 09:40:53 UTC
Permalink
Post by Delio Brignoli
The previous behaviour was to assume a stale socket under the above
conditions and re-initialize the transport. Now the socket is checked
using getsockopt() and the transport re-initialized in case of an error.
UDP and UDP6 implementations are untested.
I tried this patch with UDP and L2 transport and with the patch found
below. I never saw that the function port_renew_transport was called.
I tried four different platforms.

1. x86 using SW time stamping
2. xscale ixp using SW time stamping
3. beaglebone
4. coldfire MCF5234 (uClibc)

On the first three platforms, the socket still worked after the link
went down and up again. On the coldfire, the socket did *not* work,
and the call to port_check_transport() did not detect an error.

I am beginning to wonder whether what the problem was in the first
place, since I can't get the "normal" systems to trigger the
problem. (The coldfire does a lot of flaky things, so I don't really
trust it.)

Looking back over the mailing list, I see that Jacob also reported the
UDP sockets going "stale" after link loss. IIRC, I also saw this on
the normal platforms, but now I can't reproduce this issue. Maybe the
problem is related to joining the multicast group (which is handled by
the kernel in a fairly opaque manner).

In any case, this new patch does not work on my coldfire, and perhaps
others as well. I would like to play it safe and keep the existing
behavior of re-opening the sockets. It really should not hurt to do
so, even if it seems a bit silly.

Thanks,
Richard

---
diff --git a/port.c b/port.c
index 65f45d6..68bc244 100644
--- a/port.c
+++ b/port.c
@@ -2000,6 +2000,7 @@ enum fsm_event port_event(struct port *p, int fd_index)
fc_clear(p->best);
port_set_announce_tmo(p);
if (clock_slave_only(p->clock) && port_check_transport(p)) {
+ pr_err("port %hu: transport has errors", portnum(p));
if (port_renew_transport(p))
return EV_FAULT_DETECTED;
}

Delio Brignoli
2013-08-26 08:58:19 UTC
Permalink
Hello Richard,

Did you consider this for merging?

Thanks
--
Delio
Post by Delio Brignoli
See IEEE1588-2008 section 9.3.2.5 (d)
---
port.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/port.c b/port.c
index 3e7a6c3..6644685 100644
--- a/port.c
+++ b/port.c
@@ -1382,6 +1382,13 @@ struct dataset *port_best_foreign(struct port *port)
static int process_announce(struct port *p, struct ptp_message *m)
{
int result = 0;
+
+ /* Do not qualify announce messages with stepsRemoved >= 255, see
+ * IEEE1588-2008 section 9.3.2.5 (d)
+ */
+ if (m->announce.stepsRemoved >= 255)
+ return result;
+
switch (p->state) {
--
1.7.0.4
------------------------------------------------------------------------------
Introducing Performance Central, a new site from SourceForge and
AppDynamics. Performance Central is your source for news, insights,
analysis and resources for efficient Application Performance Management.
Visit us today!
http://pubads.g.doubleclick.net/gampad/clk?id=48897511&iu=/4140/ostg.clktrk
_______________________________________________
Linuxptp-devel mailing list
https://lists.sourceforge.net/lists/listinfo/linuxptp-devel
Richard Cochran
2013-08-26 09:03:33 UTC
Permalink
Post by Delio Brignoli
Hello Richard,
Did you consider this for merging?
Yes, I just pushed out patch #1.

It took me a while to test #2, and now I see that it really doesn't
work right. I will explain what I mean in another email.

Thanks,
Richard
Loading...