Discussion:
[PATCH net] ipv6: introduce tcp_v6_iif()
Eric Dumazet
2014-10-17 16:17:20 UTC
Permalink
From: Eric Dumazet <***@google.com>

Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.

This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]

This patch adds tcp_v6_iif() helper and uses it where necessary.

Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.

Signed-off-by: Eric Dumazet <***@google.com>
Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
---
include/net/inet6_hashtables.h | 5 +++--
include/net/tcp.h | 9 +++++++++
net/dccp/ipv6.c | 3 ++-
net/ipv6/syncookies.c | 2 +-
net/ipv6/tcp_ipv6.c | 26 +++++++++++++++-----------
5 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index ae0613544308..d1d272843b3b 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -80,7 +80,8 @@ static inline struct sock *__inet6_lookup(struct net *net,
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
const __be16 sport,
- const __be16 dport)
+ const __be16 dport,
+ int iif)
{
struct sock *sk = skb_steal_sock(skb);

@@ -90,7 +91,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
&ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
- inet6_iif(skb));
+ iif);
}

struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 74efeda994b3..12e895c52139 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -730,6 +730,15 @@ struct tcp_skb_cb {

#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))

+
+/* This is the variant of inet6_iif() that must be used by TCP,
+ * as TCP moves IP6CB into a different location in skb->cb[]
+ */
+static inline int tcp_v6_iif(const struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->header.h6.iif;
+}
+
/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ad2acfe1ca61..6bcaa33cd804 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -757,7 +757,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
- dh->dccph_sport, dh->dccph_dport);
+ dh->dccph_sport, dh->dccph_dport,
+ inet6_iif(skb));
/*
* Step 2:
* If no socket ...
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9a2838e93cc5..2a86a0f00f2b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_iif = tcp_v6_iif(skb);

ireq->ir_mark = inet_request_mark(sk, skb);

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cf2e45ab2fa4..831495529b82 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sock_owned_by_user(sk))
goto out;

+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
&hdr->saddr, inet6_iif(skb));
if (!req)
@@ -738,7 +739,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_iif = tcp_v6_iif(skb);

if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
(ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
@@ -860,7 +861,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,

fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
- fl6.flowi6_oif = inet6_iif(skb);
+ fl6.flowi6_oif = tcp_v6_iif(skb);
else
fl6.flowi6_oif = oif;
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
@@ -918,7 +919,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
&tcp_hashinfo, &ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source), inet6_iif(skb));
+ ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
return;

@@ -1000,13 +1001,14 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
/* Find possible connection requests. */
req = inet6_csk_search_req(sk, &prev, th->source,
&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, inet6_iif(skb));
+ &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev, false);

nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
+ &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest),
+ tcp_v6_iif(skb));

if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1090,7 +1092,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
if (np->repflow)
@@ -1174,7 +1176,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
skb_set_owner_r(newnp->pktoptions, newsk);
}
newnp->opt = NULL;
- newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
if (np->repflow)
@@ -1360,7 +1362,7 @@ ipv6_pktoptions:
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = inet6_iif(opt_skb);
+ np->mcast_oif = tcp_v6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
@@ -1427,7 +1429,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;

- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
+ tcp_v6_iif(skb));
if (!sk)
goto no_tcp_socket;

@@ -1514,7 +1517,7 @@ do_time_wait:
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
- ntohs(th->dest), inet6_iif(skb));
+ ntohs(th->dest), tcp_v6_iif(skb));
if (sk2 != NULL) {
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule(tw, &tcp_death_row);
@@ -1553,6 +1556,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return;

+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
David Laight
2014-10-17 16:26:51 UTC
Permalink
From: Eric Dumazet
Post by Eric Dumazet
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
...
Post by Eric Dumazet
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cf2e45ab2fa4..831495529b82 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sock_owned_by_user(sk))
goto out;
+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
&hdr->saddr, inet6_iif(skb));
if (!req)
That comment isn'
Eric Dumazet
2014-10-17 16:52:38 UTC
Permalink
Post by Eric Dumazet
From: Eric Dumazet
Post by Eric Dumazet
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
...
Post by Eric Dumazet
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cf2e45ab2fa4..831495529b82 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sock_owned_by_user(sk))
goto out;
+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
&hdr->saddr, inet6_iif(skb));
if (!req)
That comment isn't particularly informative....
It is showing I considered this spot, and the right thing here is to use
inet6_iif()

The commit changelog will give to curious people the reasons.

Adding fat comments is superseded with good changelog.
Cong Wang
2014-10-17 16:58:24 UTC
Permalink
Post by Eric Dumazet
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
can be just removed? Does this make any sense to you?

(I have a patch locally since I thought it should be target for net-next.)

Thanks.
Eric Dumazet
2014-10-17 18:19:56 UTC
Permalink
Post by Cong Wang
I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
can be just removed? Does this make any sense to you?
This makes sense and would gain 4 bytes in skb->cb[], so definitely
worth it.
Post by Cong Wang
(I have a patch locally since I thought it should be target for net-next.)
Cong Wang
2014-10-18 00:00:31 UTC
Permalink
Post by Eric Dumazet
Post by Cong Wang
I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
can be just removed? Does this make any sense to you?
This makes sense and would gain 4 bytes in skb->cb[], so definitely
worth it.
Hmm, after testing my patch, I found that some IPv6 code (scope_id) calls
inet6_iif() at socket layer too, where skb dst is already dropped. So we can't
simply use skb_dst() for all inet6_iif(). This is also why IPv4 is different.

Therefore, I think your patch is fine at least for net.
Cong Wang
2014-10-18 00:02:04 UTC
Permalink
Post by Eric Dumazet
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
As I already explained in previous reply, this is probably the best solution
for net, so:

Acked-by: Cong Wang <***@twopensource.com>
David Miller
2014-10-18 03:48:17 UTC
Permalink
From: Eric Dumazet <***@gmail.com>
Date: Fri, 17 Oct 2014 09:17:20 -0700
Post by Eric Dumazet
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Applied.
Eric Dumazet
2014-10-18 15:30:07 UTC
Permalink
Post by David Miller
Date: Fri, 17 Oct 2014 09:17:20 -0700
=20
=20
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache=
line
Post by David Miller
misses") added a regression for SO_BINDTODEVICE on IPv6.
=20
This is because we still use inet6_iif() which expects that IP6 con=
trol
Post by David Miller
block is still at the beginning of skb->cb[]
=20
This patch adds tcp_v6_iif() helper and uses it where necessary.
=20
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
=20
Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache=
line misses")
Post by David Miller
=20
Applied.
Oh well, I forgot IPV6 could be not present in .config

Will send a patch, sorry for this.

$ make M=3Dnet/ipv4
CC net/ipv4/route.o
In file included from net/ipv4/route.c:102:0:
include/net/tcp.h: In function =E2=80=98tcp_v6_iif=E2=80=99:
include/net/tcp.h:738:32: error: =E2=80=98union <anonymous>=E2=80=99 ha=
s no member named =E2=80=98h6=E2=80=99
return TCP_SKB_CB(skb)->header.h6.iif;

Loading...