Discussion:
[Linuxptp-devel] [-v2 PATCH] ptp4l: enable using poll for tx timestamps on kernels which support it.
Jacob Keller
2013-04-02 23:31:23 UTC
Permalink
A recent patch on netdev has added a new socket option which enables an
application to select on timestamps in the error queue only. This patch will be
able to clean up the sk_receive routine to use select instead of polling every
microsecond. This change should reduce number of errors due to sk_tx_retries
count failures, and vastly improve the overall sequence of code. It may be
possible to further optimize the path by using select farther up the chain
instead of inside sk_timestamp but this initial patch should at least fix some issues.

This patch also introduces a new option for the configuration file
tx_timestamp_timeout which will automatically set the tx_timestamp_retries value
if that setting is not there. It is specified in milliseconds (as the smallest
wait length in poll). This is intended to supersede tx_timestamp_retries option,
but leave support in if the user directly sets the tx_timestamp_retries option.

-v2
* Use poll instead of select
* Create tx_timestamp_timeout configuration option, specified in milliseconds
* Move the SO_SELECT_ERR_QUEUE option to missing.h

Signed-off-by: Jacob Keller <***@intel.com>
---
config.c | 14 ++++++++
config.h | 1 +
default.cfg | 2 +
gPTP.cfg | 2 +
missing.h | 4 ++
ptp4l.8 | 13 +++++++-
ptp4l.c | 1 +
raw.c | 4 +-
sk.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------
sk.h | 20 +++++++++++-
udp.c | 4 +-
udp6.c | 4 +-
12 files changed, 145 insertions(+), 23 deletions(-)

diff --git a/config.c b/config.c
index 4727942..5ef98ef 100644
--- a/config.c
+++ b/config.c
@@ -24,6 +24,9 @@
#include "print.h"
#include "util.h"

+/* global to check whether tx_timestamp_retries was set */
+int config_set_tx_retries = 0;
+
enum config_section {
GLOBAL_SECTION,
PORT_SECTION,
@@ -274,6 +277,17 @@ static enum parser_result parse_global_setting(const char *option,
if (1 != sscanf(value, "%u", &val) || !(val > 0))
return BAD_VALUE;
*cfg->tx_timestamp_retries = val;
+ config_set_tx_retries = 1;
+
+ } else if (!strcmp(option, "tx_timestamp_timeout")) {
+ if (1 != sscanf(value, "%u", &val) || (!val > 0))
+ return BAD_VALUE;
+ *cfg->tx_timestamp_timeout = val;
+
+ /* automatically set the tx_retries count for fallback purposes,
+ * but only if the user hasn't set tx_retries explicitely.*/
+ if (!config_set_tx_retries)
+ *cfg->tx_timestamp_retries = (val * 1000);

} else if (!strcmp(option, "pi_proportional_const")) {
if (1 != sscanf(value, "%lf", &df) || !(df >= 0.0 && df < 1.0))
diff --git a/config.h b/config.h
index 497d683..3a1b866 100644
--- a/config.h
+++ b/config.h
@@ -62,6 +62,7 @@ struct config {
struct port_defaults pod;
int *assume_two_step;
int *tx_timestamp_retries;
+ int *tx_timestamp_timeout;

enum servo_type clock_servo;

diff --git a/default.cfg b/default.cfg
index 84cc0df..4694e32 100644
--- a/default.cfg
+++ b/default.cfg
@@ -30,7 +30,7 @@ assume_two_step 0
logging_level 6
path_trace_enabled 0
follow_up_info 0
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/gPTP.cfg b/gPTP.cfg
index f14990b..83a44bf 100644
--- a/gPTP.cfg
+++ b/gPTP.cfg
@@ -29,7 +29,7 @@ assume_two_step 1
logging_level 6
path_trace_enabled 1
follow_up_info 1
-tx_timestamp_retries 100
+tx_timestamp_timeout 1
use_syslog 1
verbose 0
summary_interval 0
diff --git a/missing.h b/missing.h
index fbd57d4..141a71b 100644
--- a/missing.h
+++ b/missing.h
@@ -36,6 +36,10 @@
#define ADJ_SETOFFSET 0x0100
#endif

+#ifndef SO_SELECT_ERR_QUEUE
+#define SO_SELECT_ERR_QUEUE 45
+#endif
+
#ifndef CLOCK_INVALID
#define CLOCK_INVALID -1
#endif
diff --git a/ptp4l.8 b/ptp4l.8
index 45ef6f3..dc57cc3 100644
--- a/ptp4l.8
+++ b/ptp4l.8
@@ -266,8 +266,17 @@ The default is 0 (disabled).
.TP
.B tx_timestamp_retries
The number of retries to fetch the tx time stamp from the kernel when a message
-is sent.
-The default is 100.
+is sent. This option will be set to tx_timestamp_timeoutx1000 if not directly
+specified in the configuration file.
+The default is tx_timestamp_timeout*1000.
+.TP
+.B tx_timestamp_timeout
+The number of milliseconds to poll the socket for a new tx timestamp. It
+requires kernel support of the SO_SELECT_ERR_QUEUE socket option, and will
+fallback to using tx_timestamp_retries method if polling is unsupported. This
+option automatically sets tx_timestamp_retries=(tx_timestamp_timeout*1000) if
+it is not specified directly.
+The default is 1.
.TP
.B clock_servo
The servo which is used to synchronize the local clock. Currently only one
diff --git a/ptp4l.c b/ptp4l.c
index 1312cd6..c2b3a02 100644
--- a/ptp4l.c
+++ b/ptp4l.c
@@ -90,6 +90,7 @@ static struct config cfg_settings = {

.assume_two_step = &assume_two_step,
.tx_timestamp_retries = &sk_tx_retries,
+ .tx_timestamp_timeout = &sk_tx_timeout,

.clock_servo = CLOCK_SERVO_PI,

diff --git a/raw.c b/raw.c
index 326ccd2..0d5bf5d 100644
--- a/raw.c
+++ b/raw.c
@@ -239,7 +239,7 @@ static int raw_recv(struct transport *t, int fd, void *buf, int buflen,
buflen += hlen;
hdr = (struct eth_hdr *) ptr;

- cnt = sk_receive(fd, ptr, buflen, hwts, 0);
+ cnt = sk_receive(fd, ptr, buflen, hwts);

if (raw->vlan) {
if (ETH_P_1588 == ntohs(hdr->type)) {
@@ -286,7 +286,7 @@ static int raw_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, pkt, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, pkt, len, hwts) : cnt;
}

static void raw_release(struct transport *t)
diff --git a/sk.c b/sk.c
index e09d459..9c9e05a 100644
--- a/sk.c
+++ b/sk.c
@@ -28,13 +28,18 @@
#include <unistd.h>
#include <ifaddrs.h>
#include <stdlib.h>
+#include <poll.h>

#include "print.h"
#include "sk.h"
+#include "missing.h"

/* globals */

-int sk_tx_retries = 100;
+int sk_tx_retries = 1000;
+int sk_tx_timeout = 1;
+
+int sk_use_select = 1;

/* private methods */

@@ -196,11 +201,12 @@ int sk_interface_addr(char *name, int family, uint8_t *addr, int len)
return result;
}

-int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags)
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
{
char control[256];
- int cnt, level, try_again, type;
+ int res, cnt, level, try_again, type;
+ int flags = MSG_ERRQUEUE;
struct cmsghdr *cm;
struct iovec iov = { buf, buflen };
struct msghdr msg;
@@ -213,7 +219,19 @@ int sk_receive(int fd, void *buf, int buflen,
msg.msg_control = control;
msg.msg_controllen = sizeof(control);

- try_again = flags == MSG_ERRQUEUE ? sk_tx_retries : 1;
+ /* poll using select here if it is supported */
+ if (sk_use_select) {
+ struct pollfd errorfd;
+
+ errorfd.fd = fd;
+ errorfd.events = POLLPRI;
+
+ res = poll(&errorfd, 1, sk_tx_timeout);
+ if (res <= 0)
+ pr_err("select timed out: %m");
+ }
+
+ try_again = sk_use_select ? 1 : sk_tx_retries;

for ( ; try_again; try_again--) {
cnt = recvmsg(fd, &msg, flags);
@@ -229,13 +247,64 @@ int sk_receive(int fd, void *buf, int buflen,
}
}

- if (cnt < 1) {
- if (flags == MSG_ERRQUEUE)
- pr_err("recvmsg tx timestamp failed: %m");
- else
- pr_err("recvmsg failed: %m");
+ if (cnt < 1)
+ pr_err("tx timestamp failed: %m");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
+ level = cm->cmsg_level;
+ type = cm->cmsg_type;
+ if (SOL_SOCKET == level && SO_TIMESTAMPING == type) {
+ if (cm->cmsg_len < sizeof(*ts) * 3) {
+ pr_warning("short SO_TIMESTAMPING message");
+ return -1;
+ }
+ ts = (struct timespec *) CMSG_DATA(cm);
+ break;
+ }
+ }
+
+ if (!ts) {
+ memset(&hwts->ts, 0, sizeof(hwts->ts));
+ return cnt;
}

+ switch (hwts->type) {
+ case TS_SOFTWARE:
+ hwts->ts = ts[0];
+ break;
+ case TS_HARDWARE:
+ case TS_ONESTEP:
+ hwts->ts = ts[2];
+ break;
+ case TS_LEGACY_HW:
+ hwts->ts = ts[1];
+ break;
+ }
+ return cnt;
+}
+
+
+int sk_receive(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
+{
+ char control[256];
+ int cnt, level, try_again, type;
+ struct cmsghdr *cm;
+ struct iovec iov = { buf, buflen };
+ struct msghdr msg;
+ struct timespec *ts = NULL;
+
+ memset(control, 0, sizeof(control));
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cnt = recvmsg(fd, &msg, 0);
+ if (cnt < 1)
+ pr_err("recvmsg failed: %m");
+
for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
level = cm->cmsg_level;
type = cm->cmsg_type;
@@ -272,7 +341,7 @@ int sk_receive(int fd, void *buf, int buflen,
int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
enum transport_type transport)
{
- int err, filter1, filter2, flags, one_step;
+ int err, filter1, filter2, flags, one_step, enabled;

switch (type) {
case TS_SOFTWARE:
@@ -329,5 +398,13 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
return -1;
}

+ enabled = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_SELECT_ERR_QUEUE,
+ &enabled, sizeof(enabled)) < 0) {
+ pr_warning("ioctl SO_SELECT_ERR_QUEUE failed: %m");
+ pr_warning("falling back to recvmsg retry method");
+ sk_use_select = 0;
+ }
+
return 0;
}
diff --git a/sk.h b/sk.h
index 1923742..9c88844 100644
--- a/sk.h
+++ b/sk.h
@@ -74,16 +74,26 @@ int sk_interface_macaddr(char *name, unsigned char *mac, int len);
int sk_interface_addr(char *name, int family, uint8_t *addr, int len);

/**
+ * Read a timestamp from a socket.
+ * @param fd An open socket.
+ * @param buf Buffer to receive the message.
+ * @param buflen Size of 'buf' in bytes.
+ * @param hwts Pointer to a buffer to receive the message's time stamp.
+ * @return
+ */
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts);
+
+/**
* Read a message from a socket.
* @param fd An open socket.
* @param buf Buffer to receive the message.
* @param buflen Size of 'buf' in bytes.
* @param hwts Pointer to a buffer to receive the message's time stamp.
- * @param flags Flags to pass to RECV(2).
* @return
*/
int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags);
+ struct hw_timestamp *hwts);

/**
* Enable time stamping on a given network interface.
@@ -102,4 +112,10 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
*/
extern int sk_tx_retries;

+/**
+ * Limits the length of time that poll will be used to wait for a socket
+ * timestamp to be returned. (specified in milliseconds)
+ */
+extern int sk_tx_timeout;
+
#endif
diff --git a/udp.c b/udp.c
index fea0122..384071b 100644
--- a/udp.c
+++ b/udp.c
@@ -194,7 +194,7 @@ no_event:
static int udp_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}

static int udp_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -226,7 +226,7 @@ static int udp_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}

static void udp_release(struct transport *t)
diff --git a/udp6.c b/udp6.c
index 2cb0179..7d22030 100644
--- a/udp6.c
+++ b/udp6.c
@@ -206,7 +206,7 @@ no_event:
static int udp6_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}

static int udp6_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -237,7 +237,7 @@ static int udp6_send(struct transport *t, struct fdarray *fda, int event, int pe
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}

static void udp6_release(struct transport *t)
Libor Pechacek
2013-04-03 06:16:24 UTC
Permalink
On Tue 02-04-13 16:31:23, Jacob Keller wrote:
[...]
Post by Jacob Keller
--- a/ptp4l.8
+++ b/ptp4l.8
@@ -266,8 +266,17 @@ The default is 0 (disabled).
.TP
.B tx_timestamp_retries
The number of retries to fetch the tx time stamp from the kernel when a message
-is sent.
-The default is 100.
+is sent. This option will be set to tx_timestamp_timeoutx1000 if not directly
^
I'd use asterisk instead of 'x' here. Otherwise the patch looks good to me.

Libor

Loading...