Jacob Keller
2013-04-05 15:52:14 UTC
This patch modifies sk_receive in order to use poll() on POLLERR instead of the
tryagain loop as this resolves issues with drivers who do not return timestamps
quickly enough. It also resolves the issue of wasting time repeating every
microsecond. It lets the kernel sleep our application until the data or timeout arrives.
This change also replaces the old tx_timestamp_retries config value with
tx_timestamp_timeout specified in milliseconds (the smallest length of time poll
accepts). This does have the side effect of increasing the minimum delay before
missing a timestamp by up to 1ms, but the poll should return sooner in the
normal case where a packet timestamp was not dropped.
This change vastly improves some devices and cleans the code up by simplifying a
race condition window due to drivers returning tx timestamp on the error queue.
Signed-off-by: Jacob Keller <***@intel.com>
---
config.c | 4 ++--
config.h | 2 +-
default.cfg | 2 +-
gPTP.cfg | 2 +-
ptp4l.8 | 8 ++++----
ptp4l.c | 2 +-
sk.c | 37 ++++++++++++++++---------------------
sk.h | 6 +++---
8 files changed, 29 insertions(+), 34 deletions(-)
diff --git a/config.c b/config.c
index 4c4a7b1..08a134f 100644
--- a/config.c
+++ b/config.c
@@ -270,10 +270,10 @@ static enum parser_result parse_global_setting(const char *option,
return BAD_VALUE;
*cfg->assume_two_step = val ? 1 : 0;
- } else if (!strcmp(option, "tx_timestamp_retries")) {
+ } else if (!strcmp(option, "tx_timestamp_timeout")) {
if (1 != sscanf(value, "%u", &val) || !(val > 0))
return BAD_VALUE;
- *cfg->tx_timestamp_retries = val;
+ *cfg->tx_timestamp_timeout = val;
} else if (!strcmp(option, "pi_proportional_const")) {
if (1 != sscanf(value, "%lf", &df) || !(df >= 0.0 && df < 1.0))
diff --git a/config.h b/config.h
index 497d683..e7f1f50 100644
--- a/config.h
+++ b/config.h
@@ -61,7 +61,7 @@ struct config {
struct default_ds dds;
struct port_defaults pod;
int *assume_two_step;
- int *tx_timestamp_retries;
+ int *tx_timestamp_timeout;
enum servo_type clock_servo;
diff --git a/default.cfg b/default.cfg
index 84cc0df..4694e32 100644
--- a/default.cfg
+++ b/default.cfg
@@ -30,7 +30,7 @@ assume_two_step 0
logging_level 6
path_trace_enabled 0
follow_up_info 0
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/gPTP.cfg b/gPTP.cfg
index f14990b..83a44bf 100644
--- a/gPTP.cfg
+++ b/gPTP.cfg
@@ -29,7 +29,7 @@ assume_two_step 1
logging_level 6
path_trace_enabled 1
follow_up_info 1
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/ptp4l.8 b/ptp4l.8
index 45ef6f3..ee9cccd 100644
--- a/ptp4l.8
+++ b/ptp4l.8
@@ -264,10 +264,10 @@ Treat one-step responses as two-step if enabled. It is used to work around
buggy 802.1AS switches.
The default is 0 (disabled).
.TP
-.B tx_timestamp_retries
-The number of retries to fetch the tx time stamp from the kernel when a message
-is sent.
-The default is 100.
+.B tx_timestamp_timeout
+The number of milliseconds to poll waiting for the tx time stamp from the kernel
+when a message has recently been sent.
+The default is 1.
.TP
.B clock_servo
The servo which is used to synchronize the local clock. Currently only one
diff --git a/ptp4l.c b/ptp4l.c
index 1312cd6..3bf1f86 100644
--- a/ptp4l.c
+++ b/ptp4l.c
@@ -89,7 +89,7 @@ static struct config cfg_settings = {
.transport = TRANS_UDP_IPV4,
.assume_two_step = &assume_two_step,
- .tx_timestamp_retries = &sk_tx_retries,
+ .tx_timestamp_timeout = &sk_tx_timeout,
.clock_servo = CLOCK_SERVO_PI,
diff --git a/sk.c b/sk.c
index 786d4d3..841bc89 100644
--- a/sk.c
+++ b/sk.c
@@ -28,13 +28,14 @@
#include <unistd.h>
#include <ifaddrs.h>
#include <stdlib.h>
+#include <poll.h>
#include "print.h"
#include "sk.h"
/* globals */
-int sk_tx_retries = 100;
+int sk_tx_timeout = 1;
/* private methods */
@@ -200,7 +201,7 @@ int sk_receive(int fd, void *buf, int buflen,
struct hw_timestamp *hwts, int flags)
{
char control[256];
- int cnt = 0, level, try_again, type;
+ int cnt = 0, res = 0, level, try_again, type;
struct cmsghdr *cm;
struct iovec iov = { buf, buflen };
struct msghdr msg;
@@ -213,28 +214,22 @@ int sk_receive(int fd, void *buf, int buflen,
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- try_again = flags == MSG_ERRQUEUE ? sk_tx_retries : 1;
-
- for ( ; try_again; try_again--) {
- cnt = recvmsg(fd, &msg, flags);
- if (cnt >= 0) {
- break;
- }
- if (errno == EINTR) {
- try_again++;
- } else if (errno == EAGAIN) {
- usleep(1);
- } else {
- break;
+ if (flags == MSG_ERRQUEUE) {
+ struct pollfd pfd = { fd, 0, 0 };
+ res = poll(&pfd, 1, sk_tx_timeout);
+ if (res < 1) {
+ pr_err("poll tx timestamp failed: %m");
+ return res;
+ } else if (!(pfd.revents & POLLERR)) {
+ pr_err("poll tx woke up on non ERR event");
+ return -1;
}
}
- if (cnt < 1) {
- if (flags == MSG_ERRQUEUE)
- pr_err("recvmsg tx timestamp failed: %m");
- else
- pr_err("recvmsg failed: %m");
- }
+ cnt = recvmsg(fd, &msg, flags);
+ if (cnt < 1)
+ pr_err("recvmsg%sfailed: %m",
+ flags == MSG_ERRQUEUE ? " tx timestamp " : " ");
for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
level = cm->cmsg_level;
diff --git a/sk.h b/sk.h
index 1923742..6070537 100644
--- a/sk.h
+++ b/sk.h
@@ -97,9 +97,9 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
enum transport_type transport);
/**
- * Limits the number of RECVMSG(2) calls when attempting to obtain a
- * transmit time stamp on an event message.
+ * Limits the time that RECVMSG(2) will poll while waiting for the tx timestamp
+ * if MSG_ERRQUEUE is set. Specified in milliseconds.
*/
-extern int sk_tx_retries;
+extern int sk_tx_timeout;
#endif
tryagain loop as this resolves issues with drivers who do not return timestamps
quickly enough. It also resolves the issue of wasting time repeating every
microsecond. It lets the kernel sleep our application until the data or timeout arrives.
This change also replaces the old tx_timestamp_retries config value with
tx_timestamp_timeout specified in milliseconds (the smallest length of time poll
accepts). This does have the side effect of increasing the minimum delay before
missing a timestamp by up to 1ms, but the poll should return sooner in the
normal case where a packet timestamp was not dropped.
This change vastly improves some devices and cleans the code up by simplifying a
race condition window due to drivers returning tx timestamp on the error queue.
Signed-off-by: Jacob Keller <***@intel.com>
---
config.c | 4 ++--
config.h | 2 +-
default.cfg | 2 +-
gPTP.cfg | 2 +-
ptp4l.8 | 8 ++++----
ptp4l.c | 2 +-
sk.c | 37 ++++++++++++++++---------------------
sk.h | 6 +++---
8 files changed, 29 insertions(+), 34 deletions(-)
diff --git a/config.c b/config.c
index 4c4a7b1..08a134f 100644
--- a/config.c
+++ b/config.c
@@ -270,10 +270,10 @@ static enum parser_result parse_global_setting(const char *option,
return BAD_VALUE;
*cfg->assume_two_step = val ? 1 : 0;
- } else if (!strcmp(option, "tx_timestamp_retries")) {
+ } else if (!strcmp(option, "tx_timestamp_timeout")) {
if (1 != sscanf(value, "%u", &val) || !(val > 0))
return BAD_VALUE;
- *cfg->tx_timestamp_retries = val;
+ *cfg->tx_timestamp_timeout = val;
} else if (!strcmp(option, "pi_proportional_const")) {
if (1 != sscanf(value, "%lf", &df) || !(df >= 0.0 && df < 1.0))
diff --git a/config.h b/config.h
index 497d683..e7f1f50 100644
--- a/config.h
+++ b/config.h
@@ -61,7 +61,7 @@ struct config {
struct default_ds dds;
struct port_defaults pod;
int *assume_two_step;
- int *tx_timestamp_retries;
+ int *tx_timestamp_timeout;
enum servo_type clock_servo;
diff --git a/default.cfg b/default.cfg
index 84cc0df..4694e32 100644
--- a/default.cfg
+++ b/default.cfg
@@ -30,7 +30,7 @@ assume_two_step 0
logging_level 6
path_trace_enabled 0
follow_up_info 0
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/gPTP.cfg b/gPTP.cfg
index f14990b..83a44bf 100644
--- a/gPTP.cfg
+++ b/gPTP.cfg
@@ -29,7 +29,7 @@ assume_two_step 1
logging_level 6
path_trace_enabled 1
follow_up_info 1
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/ptp4l.8 b/ptp4l.8
index 45ef6f3..ee9cccd 100644
--- a/ptp4l.8
+++ b/ptp4l.8
@@ -264,10 +264,10 @@ Treat one-step responses as two-step if enabled. It is used to work around
buggy 802.1AS switches.
The default is 0 (disabled).
.TP
-.B tx_timestamp_retries
-The number of retries to fetch the tx time stamp from the kernel when a message
-is sent.
-The default is 100.
+.B tx_timestamp_timeout
+The number of milliseconds to poll waiting for the tx time stamp from the kernel
+when a message has recently been sent.
+The default is 1.
.TP
.B clock_servo
The servo which is used to synchronize the local clock. Currently only one
diff --git a/ptp4l.c b/ptp4l.c
index 1312cd6..3bf1f86 100644
--- a/ptp4l.c
+++ b/ptp4l.c
@@ -89,7 +89,7 @@ static struct config cfg_settings = {
.transport = TRANS_UDP_IPV4,
.assume_two_step = &assume_two_step,
- .tx_timestamp_retries = &sk_tx_retries,
+ .tx_timestamp_timeout = &sk_tx_timeout,
.clock_servo = CLOCK_SERVO_PI,
diff --git a/sk.c b/sk.c
index 786d4d3..841bc89 100644
--- a/sk.c
+++ b/sk.c
@@ -28,13 +28,14 @@
#include <unistd.h>
#include <ifaddrs.h>
#include <stdlib.h>
+#include <poll.h>
#include "print.h"
#include "sk.h"
/* globals */
-int sk_tx_retries = 100;
+int sk_tx_timeout = 1;
/* private methods */
@@ -200,7 +201,7 @@ int sk_receive(int fd, void *buf, int buflen,
struct hw_timestamp *hwts, int flags)
{
char control[256];
- int cnt = 0, level, try_again, type;
+ int cnt = 0, res = 0, level, try_again, type;
struct cmsghdr *cm;
struct iovec iov = { buf, buflen };
struct msghdr msg;
@@ -213,28 +214,22 @@ int sk_receive(int fd, void *buf, int buflen,
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- try_again = flags == MSG_ERRQUEUE ? sk_tx_retries : 1;
-
- for ( ; try_again; try_again--) {
- cnt = recvmsg(fd, &msg, flags);
- if (cnt >= 0) {
- break;
- }
- if (errno == EINTR) {
- try_again++;
- } else if (errno == EAGAIN) {
- usleep(1);
- } else {
- break;
+ if (flags == MSG_ERRQUEUE) {
+ struct pollfd pfd = { fd, 0, 0 };
+ res = poll(&pfd, 1, sk_tx_timeout);
+ if (res < 1) {
+ pr_err("poll tx timestamp failed: %m");
+ return res;
+ } else if (!(pfd.revents & POLLERR)) {
+ pr_err("poll tx woke up on non ERR event");
+ return -1;
}
}
- if (cnt < 1) {
- if (flags == MSG_ERRQUEUE)
- pr_err("recvmsg tx timestamp failed: %m");
- else
- pr_err("recvmsg failed: %m");
- }
+ cnt = recvmsg(fd, &msg, flags);
+ if (cnt < 1)
+ pr_err("recvmsg%sfailed: %m",
+ flags == MSG_ERRQUEUE ? " tx timestamp " : " ");
for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
level = cm->cmsg_level;
diff --git a/sk.h b/sk.h
index 1923742..6070537 100644
--- a/sk.h
+++ b/sk.h
@@ -97,9 +97,9 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
enum transport_type transport);
/**
- * Limits the number of RECVMSG(2) calls when attempting to obtain a
- * transmit time stamp on an event message.
+ * Limits the time that RECVMSG(2) will poll while waiting for the tx timestamp
+ * if MSG_ERRQUEUE is set. Specified in milliseconds.
*/
-extern int sk_tx_retries;
+extern int sk_tx_timeout;
#endif