Jacob Keller
2013-04-01 22:39:15 UTC
A recent patch on netdev has added a new socket option which enables an
application to select on timestamps in the error queue only. This patch will be
able to clean up the sk_receive routine to use select instead of polling every
microsecond. This change should reduce number of errors due to sk_tx_retries
count failures, and vastly improve the overall sequence of code. It may be
possible to further optimize the path by using select farther up the chain
instead of inside sk_timestamp but this initial patch should at least fix some issues.
Signed-off-by: Jacob Keller <***@intel.com>
---
raw.c | 4 +--
sk.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
sk.h | 21 +++++++++++++-
udp.c | 4 +--
udp6.c | 4 +--
5 files changed, 111 insertions(+), 18 deletions(-)
diff --git a/raw.c b/raw.c
index 326ccd2..0d5bf5d 100644
--- a/raw.c
+++ b/raw.c
@@ -239,7 +239,7 @@ static int raw_recv(struct transport *t, int fd, void *buf, int buflen,
buflen += hlen;
hdr = (struct eth_hdr *) ptr;
- cnt = sk_receive(fd, ptr, buflen, hwts, 0);
+ cnt = sk_receive(fd, ptr, buflen, hwts);
if (raw->vlan) {
if (ETH_P_1588 == ntohs(hdr->type)) {
@@ -286,7 +286,7 @@ static int raw_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, pkt, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, pkt, len, hwts) : cnt;
}
static void raw_release(struct transport *t)
diff --git a/sk.c b/sk.c
index e09d459..a407bb2 100644
--- a/sk.c
+++ b/sk.c
@@ -35,6 +35,7 @@
/* globals */
int sk_tx_retries = 100;
+int use_select = 1;
/* private methods */
@@ -196,11 +197,12 @@ int sk_interface_addr(char *name, int family, uint8_t *addr, int len)
return result;
}
-int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags)
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
{
char control[256];
- int cnt, level, try_again, type;
+ int res, cnt, level, try_again, type;
+ int flags = MSG_ERRQUEUE;
struct cmsghdr *cm;
struct iovec iov = { buf, buflen };
struct msghdr msg;
@@ -213,7 +215,22 @@ int sk_receive(int fd, void *buf, int buflen,
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- try_again = flags == MSG_ERRQUEUE ? sk_tx_retries : 1;
+ /* poll using select here if it is supported */
+ if (use_select) {
+ struct timeval delta;
+ fd_set errorfs;
+ delta.tv_sec = 1;
+ delta.tv_usec = 0;
+
+ FD_ZERO(&errorfs);
+ FD_SET(fd, &errorfs);
+ res = select(fd + 1, 0, 0, &errorfs, &delta);
+ if (res <= 0)
+ pr_err("select timed out: %m");
+
+ }
+
+ try_again = use_select ? 1 : sk_tx_retries;
for ( ; try_again; try_again--) {
cnt = recvmsg(fd, &msg, flags);
@@ -229,12 +246,63 @@ int sk_receive(int fd, void *buf, int buflen,
}
}
- if (cnt < 1) {
- if (flags == MSG_ERRQUEUE)
- pr_err("recvmsg tx timestamp failed: %m");
- else
- pr_err("recvmsg failed: %m");
+ if (cnt < 1)
+ pr_err("tx timestamp failed: %m");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
+ level = cm->cmsg_level;
+ type = cm->cmsg_type;
+ if (SOL_SOCKET == level && SO_TIMESTAMPING == type) {
+ if (cm->cmsg_len < sizeof(*ts) * 3) {
+ pr_warning("short SO_TIMESTAMPING message");
+ return -1;
+ }
+ ts = (struct timespec *) CMSG_DATA(cm);
+ break;
+ }
+ }
+
+ if (!ts) {
+ memset(&hwts->ts, 0, sizeof(hwts->ts));
+ return cnt;
+ }
+
+ switch (hwts->type) {
+ case TS_SOFTWARE:
+ hwts->ts = ts[0];
+ break;
+ case TS_HARDWARE:
+ case TS_ONESTEP:
+ hwts->ts = ts[2];
+ break;
+ case TS_LEGACY_HW:
+ hwts->ts = ts[1];
+ break;
}
+ return cnt;
+}
+
+
+int sk_receive(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
+{
+ char control[256];
+ int cnt, level, try_again, type;
+ struct cmsghdr *cm;
+ struct iovec iov = { buf, buflen };
+ struct msghdr msg;
+ struct timespec *ts = NULL;
+
+ memset(control, 0, sizeof(control));
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cnt = recvmsg(fd, &msg, 0);
+ if (cnt < 1)
+ pr_err("recvmsg failed: %m");
for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
level = cm->cmsg_level;
@@ -272,7 +340,7 @@ int sk_receive(int fd, void *buf, int buflen,
int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
enum transport_type transport)
{
- int err, filter1, filter2, flags, one_step;
+ int err, filter1, filter2, flags, one_step, enabled;
switch (type) {
case TS_SOFTWARE:
@@ -329,5 +397,13 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
return -1;
}
+ enabled = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_SELECT_ERR_QUEUE,
+ &enabled, sizeof(enabled)) < 0) {
+ pr_warning("ioctl SO_SELECT_ERR_QUEUE failed: %m");
+ pr_warning("falling back to polling method");
+ use_select = 0;
+ }
+
return 0;
}
diff --git a/sk.h b/sk.h
index 1923742..b06a795 100644
--- a/sk.h
+++ b/sk.h
@@ -22,6 +22,13 @@
#include "transport.h"
+/* older kernels do not define this. We need the define but old kernels will
+ * always fail the socket option, so we will simply disable the feature if it
+ * is not supported. */
+#ifndef SO_SELECT_ERR_QUEUE
+#define SO_SELECT_ERR_QUEUE 45
+#endif
+
/**
* Contains timestamping information returned by the GET_TS_INFO ioctl.
* @valid: set to non-zero when the info struct contains valid data.
@@ -74,16 +81,26 @@ int sk_interface_macaddr(char *name, unsigned char *mac, int len);
int sk_interface_addr(char *name, int family, uint8_t *addr, int len);
/**
+ * Read a timestamp from a socket.
+ * @param fd An open socket.
+ * @param buf Buffer to receive the message.
+ * @param buflen Size of 'buf' in bytes.
+ * @param hwts Pointer to a buffer to receive the message's time stamp.
+ * @return
+ */
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts);
+
+/**
* Read a message from a socket.
* @param fd An open socket.
* @param buf Buffer to receive the message.
* @param buflen Size of 'buf' in bytes.
* @param hwts Pointer to a buffer to receive the message's time stamp.
- * @param flags Flags to pass to RECV(2).
* @return
*/
int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags);
+ struct hw_timestamp *hwts);
/**
* Enable time stamping on a given network interface.
diff --git a/udp.c b/udp.c
index fea0122..384071b 100644
--- a/udp.c
+++ b/udp.c
@@ -194,7 +194,7 @@ no_event:
static int udp_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}
static int udp_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -226,7 +226,7 @@ static int udp_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}
static void udp_release(struct transport *t)
diff --git a/udp6.c b/udp6.c
index 2cb0179..7d22030 100644
--- a/udp6.c
+++ b/udp6.c
@@ -206,7 +206,7 @@ no_event:
static int udp6_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}
static int udp6_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -237,7 +237,7 @@ static int udp6_send(struct transport *t, struct fdarray *fda, int event, int pe
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}
static void udp6_release(struct transport *t)
application to select on timestamps in the error queue only. This patch will be
able to clean up the sk_receive routine to use select instead of polling every
microsecond. This change should reduce number of errors due to sk_tx_retries
count failures, and vastly improve the overall sequence of code. It may be
possible to further optimize the path by using select farther up the chain
instead of inside sk_timestamp but this initial patch should at least fix some issues.
Signed-off-by: Jacob Keller <***@intel.com>
---
raw.c | 4 +--
sk.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
sk.h | 21 +++++++++++++-
udp.c | 4 +--
udp6.c | 4 +--
5 files changed, 111 insertions(+), 18 deletions(-)
diff --git a/raw.c b/raw.c
index 326ccd2..0d5bf5d 100644
--- a/raw.c
+++ b/raw.c
@@ -239,7 +239,7 @@ static int raw_recv(struct transport *t, int fd, void *buf, int buflen,
buflen += hlen;
hdr = (struct eth_hdr *) ptr;
- cnt = sk_receive(fd, ptr, buflen, hwts, 0);
+ cnt = sk_receive(fd, ptr, buflen, hwts);
if (raw->vlan) {
if (ETH_P_1588 == ntohs(hdr->type)) {
@@ -286,7 +286,7 @@ static int raw_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, pkt, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, pkt, len, hwts) : cnt;
}
static void raw_release(struct transport *t)
diff --git a/sk.c b/sk.c
index e09d459..a407bb2 100644
--- a/sk.c
+++ b/sk.c
@@ -35,6 +35,7 @@
/* globals */
int sk_tx_retries = 100;
+int use_select = 1;
/* private methods */
@@ -196,11 +197,12 @@ int sk_interface_addr(char *name, int family, uint8_t *addr, int len)
return result;
}
-int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags)
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
{
char control[256];
- int cnt, level, try_again, type;
+ int res, cnt, level, try_again, type;
+ int flags = MSG_ERRQUEUE;
struct cmsghdr *cm;
struct iovec iov = { buf, buflen };
struct msghdr msg;
@@ -213,7 +215,22 @@ int sk_receive(int fd, void *buf, int buflen,
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- try_again = flags == MSG_ERRQUEUE ? sk_tx_retries : 1;
+ /* poll using select here if it is supported */
+ if (use_select) {
+ struct timeval delta;
+ fd_set errorfs;
+ delta.tv_sec = 1;
+ delta.tv_usec = 0;
+
+ FD_ZERO(&errorfs);
+ FD_SET(fd, &errorfs);
+ res = select(fd + 1, 0, 0, &errorfs, &delta);
+ if (res <= 0)
+ pr_err("select timed out: %m");
+
+ }
+
+ try_again = use_select ? 1 : sk_tx_retries;
for ( ; try_again; try_again--) {
cnt = recvmsg(fd, &msg, flags);
@@ -229,12 +246,63 @@ int sk_receive(int fd, void *buf, int buflen,
}
}
- if (cnt < 1) {
- if (flags == MSG_ERRQUEUE)
- pr_err("recvmsg tx timestamp failed: %m");
- else
- pr_err("recvmsg failed: %m");
+ if (cnt < 1)
+ pr_err("tx timestamp failed: %m");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
+ level = cm->cmsg_level;
+ type = cm->cmsg_type;
+ if (SOL_SOCKET == level && SO_TIMESTAMPING == type) {
+ if (cm->cmsg_len < sizeof(*ts) * 3) {
+ pr_warning("short SO_TIMESTAMPING message");
+ return -1;
+ }
+ ts = (struct timespec *) CMSG_DATA(cm);
+ break;
+ }
+ }
+
+ if (!ts) {
+ memset(&hwts->ts, 0, sizeof(hwts->ts));
+ return cnt;
+ }
+
+ switch (hwts->type) {
+ case TS_SOFTWARE:
+ hwts->ts = ts[0];
+ break;
+ case TS_HARDWARE:
+ case TS_ONESTEP:
+ hwts->ts = ts[2];
+ break;
+ case TS_LEGACY_HW:
+ hwts->ts = ts[1];
+ break;
}
+ return cnt;
+}
+
+
+int sk_receive(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts)
+{
+ char control[256];
+ int cnt, level, try_again, type;
+ struct cmsghdr *cm;
+ struct iovec iov = { buf, buflen };
+ struct msghdr msg;
+ struct timespec *ts = NULL;
+
+ memset(control, 0, sizeof(control));
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cnt = recvmsg(fd, &msg, 0);
+ if (cnt < 1)
+ pr_err("recvmsg failed: %m");
for (cm = CMSG_FIRSTHDR(&msg); cm != NULL; cm = CMSG_NXTHDR(&msg, cm)) {
level = cm->cmsg_level;
@@ -272,7 +340,7 @@ int sk_receive(int fd, void *buf, int buflen,
int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
enum transport_type transport)
{
- int err, filter1, filter2, flags, one_step;
+ int err, filter1, filter2, flags, one_step, enabled;
switch (type) {
case TS_SOFTWARE:
@@ -329,5 +397,13 @@ int sk_timestamping_init(int fd, char *device, enum timestamp_type type,
return -1;
}
+ enabled = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_SELECT_ERR_QUEUE,
+ &enabled, sizeof(enabled)) < 0) {
+ pr_warning("ioctl SO_SELECT_ERR_QUEUE failed: %m");
+ pr_warning("falling back to polling method");
+ use_select = 0;
+ }
+
return 0;
}
diff --git a/sk.h b/sk.h
index 1923742..b06a795 100644
--- a/sk.h
+++ b/sk.h
@@ -22,6 +22,13 @@
#include "transport.h"
+/* older kernels do not define this. We need the define but old kernels will
+ * always fail the socket option, so we will simply disable the feature if it
+ * is not supported. */
+#ifndef SO_SELECT_ERR_QUEUE
+#define SO_SELECT_ERR_QUEUE 45
+#endif
+
/**
* Contains timestamping information returned by the GET_TS_INFO ioctl.
* @valid: set to non-zero when the info struct contains valid data.
@@ -74,16 +81,26 @@ int sk_interface_macaddr(char *name, unsigned char *mac, int len);
int sk_interface_addr(char *name, int family, uint8_t *addr, int len);
/**
+ * Read a timestamp from a socket.
+ * @param fd An open socket.
+ * @param buf Buffer to receive the message.
+ * @param buflen Size of 'buf' in bytes.
+ * @param hwts Pointer to a buffer to receive the message's time stamp.
+ * @return
+ */
+int sk_timestamp(int fd, void *buf, int buflen,
+ struct hw_timestamp *hwts);
+
+/**
* Read a message from a socket.
* @param fd An open socket.
* @param buf Buffer to receive the message.
* @param buflen Size of 'buf' in bytes.
* @param hwts Pointer to a buffer to receive the message's time stamp.
- * @param flags Flags to pass to RECV(2).
* @return
*/
int sk_receive(int fd, void *buf, int buflen,
- struct hw_timestamp *hwts, int flags);
+ struct hw_timestamp *hwts);
/**
* Enable time stamping on a given network interface.
diff --git a/udp.c b/udp.c
index fea0122..384071b 100644
--- a/udp.c
+++ b/udp.c
@@ -194,7 +194,7 @@ no_event:
static int udp_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}
static int udp_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -226,7 +226,7 @@ static int udp_send(struct transport *t, struct fdarray *fda, int event, int pee
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}
static void udp_release(struct transport *t)
diff --git a/udp6.c b/udp6.c
index 2cb0179..7d22030 100644
--- a/udp6.c
+++ b/udp6.c
@@ -206,7 +206,7 @@ no_event:
static int udp6_recv(struct transport *t, int fd, void *buf, int buflen,
struct hw_timestamp *hwts)
{
- return sk_receive(fd, buf, buflen, hwts, 0);
+ return sk_receive(fd, buf, buflen, hwts);
}
static int udp6_send(struct transport *t, struct fdarray *fda, int event, int peer,
@@ -237,7 +237,7 @@ static int udp6_send(struct transport *t, struct fdarray *fda, int event, int pe
/*
* Get the time stamp right away.
*/
- return event == TRANS_EVENT ? sk_receive(fd, junk, len, hwts, MSG_ERRQUEUE) : cnt;
+ return event == TRANS_EVENT ? sk_timestamp(fd, junk, len, hwts) : cnt;
}
static void udp6_release(struct transport *t)