From 76a9ffcad156592e905b992edaa9339d2941cf43 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 21 Dec 2016 22:08:19 +0800 Subject: [PATCH] ip: Set mbuf hash for output IP packets. This paves the way to implement Flow-Queue-Codel. --- sys/netinet/in_pcb.h | 7 ++++--- sys/netinet/ip_demux.c | 15 +++++++++++++++ sys/netinet/tcp_output.c | 3 +++ sys/netinet/tcp_subr.c | 22 ++++++++++++---------- sys/netinet/tcp_syncache.c | 16 +++++++++++++++- sys/netinet/tcp_usrreq.c | 12 +++++++++--- sys/netinet/tcp_var.h | 6 +++++- sys/netinet/udp_usrreq.c | 22 +++++++++++++++++----- sys/netinet/udp_var.h | 2 ++ 9 files changed, 82 insertions(+), 23 deletions(-) diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index cdb36b78e9..fa198b0504 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -196,7 +196,9 @@ struct inpcb { struct inpcbinfo *inp_pcbinfo; /* PCB list info */ struct socket *inp_socket; /* back pointer to socket */ /* list for this PCB's local port */ - int inp_flags; /* generic IP/datagram flags */ + int inp_flags; /* generic IP/datagram flags */ + uint16_t inp_hashval; /* valid iff INP_HASH */ + uint16_t inp_pad; /* explicit padding */ struct inpcbpolicy *inp_sp; /* for IPSEC */ u_char inp_af; /* AF_INET or AF_INET6 */ @@ -340,8 +342,7 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define INP_CONNECTED 0x1000 /* exact match */ #define INP_FLAG_PROTO1 0x2000 /* protocol specific */ #define INP_PLACEMARKER 0x4000 /* skip this pcb, its a placemarker */ - -/* 0x008000 unused */ +#define INP_HASH 0x8000 /* inp_hashval is valid */ #define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ #define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index 0c3f8769c5..34dbb5113c 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -91,6 +91,21 @@ INP_MPORT_HASH_TCP(in_addr_t faddr, in_addr_t laddr, } /* + * Hash for the network address. + */ +int +tcp_addrhash(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) +{ + return (INP_MPORT_HASH_TCP(faddr, laddr, fport, lport)); +} + +int +udp_addrhash(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) +{ + return (INP_MPORT_HASH_UDP(faddr, laddr, fport, lport)); +} + +/* * Map a network address to a processor. */ int diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 52bcf0bb5c..d3021c2a07 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1255,6 +1255,9 @@ after_th: !(rt->rt_rmx.rmx_locks & RTV_MTU)) ip->ip_off |= IP_DF; + KASSERT(inp->inp_flags & INP_HASH, + ("inpcb has no hash")); + m_sethash(m, inp->inp_hashval); error = ip_output(m, inp->inp_options, &inp->inp_route, (so->so_options & SO_DONTROUTE) | IP_DEBUGROUTE, NULL, inp); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index acbbfa0bfb..154557b339 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -561,6 +561,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, struct route_in6 *ro6 = NULL; struct route_in6 sro6; struct ip6_hdr *ip6 = ipgen; + struct inpcb *inp = NULL; boolean_t use_tmpro = TRUE; #ifdef INET6 boolean_t isipv6 = (IP_VHL_V(ip->ip_vhl) == 6); @@ -569,8 +570,9 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, #endif if (tp != NULL) { + inp = tp->t_inpcb; if (!(flags & TH_RST)) { - win = ssb_space(&tp->t_inpcb->inp_socket->so_rcv); + win = ssb_space(&inp->inp_socket->so_rcv); if (win < 0) win = 0; if (win > (long)TCP_MAXWIN << tp->rcv_scale) @@ -582,9 +584,9 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, */ if (tp->t_state != TCPS_LISTEN) { if (isipv6) - ro6 = &tp->t_inpcb->in6p_route; + ro6 = &inp->in6p_route; else - ro = &tp->t_inpcb->inp_route; + ro = &inp->inp_route; use_tmpro = FALSE; } } @@ -669,9 +671,8 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, nth->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen - sizeof(struct ip6_hdr)); - ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, - (ro6 && ro6->ro_rt) ? - ro6->ro_rt->rt_ifp : NULL); + ip6->ip6_hlim = in6_selecthlim(inp, + (ro6 && ro6->ro_rt) ? ro6->ro_rt->rt_ifp : NULL); } else { nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); @@ -680,19 +681,20 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, m->m_pkthdr.csum_thlen = sizeof(struct tcphdr); } #ifdef TCPDEBUG - if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif if (isipv6) { - ip6_output(m, NULL, ro6, ipflags, NULL, NULL, - tp ? tp->t_inpcb : NULL); + ip6_output(m, NULL, ro6, ipflags, NULL, NULL, inp); if ((ro6 == &sro6) && (ro6->ro_rt != NULL)) { RTFREE(ro6->ro_rt); ro6->ro_rt = NULL; } } else { + if (inp != NULL && (inp->inp_flags & INP_HASH)) + m_sethash(m, inp->inp_hashval); ipflags |= IP_DEBUGROUTE; - ip_output(m, NULL, ro, ipflags, NULL, tp ? tp->t_inpcb : NULL); + ip_output(m, NULL, ro, ipflags, NULL, inp); if ((ro == &sro) && (ro->ro_rt != NULL)) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 0ed40673b6..7f856a8edf 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -710,6 +710,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) struct sockaddr_in6 sin6_faddr; struct sockaddr *faddr; + KASSERT(m->m_flags & M_HASH, ("mbuf has no hash")); + if (isipv6) { faddr = (struct sockaddr *)&sin6_faddr; sin6_faddr.sin6_family = AF_INET6; @@ -817,6 +819,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) inp->inp_laddr = laddr; goto abort; } + + inp->inp_flags |= INP_HASH; + inp->inp_hashval = m->m_pkthdr.hash; } /* @@ -975,6 +980,8 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct mbuf *ipopts = NULL; int win; + KASSERT(m->m_flags & M_HASH, ("mbuf has no hash")); + syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; tp = sototcpcb(so); @@ -996,6 +1003,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ sc = syncache_lookup(inc, &sch); if (sc != NULL) { + KASSERT(sc->sc_flags & SCF_HASH, ("syncache has no hash")); + KASSERT(sc->sc_hashval == m->m_pkthdr.hash, + ("syncache/mbuf hash mismatches")); + tcpstat.tcps_sc_dupsyn++; if (ipopts) { /* @@ -1059,7 +1070,8 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc->sc_route.ro_rt = NULL; } sc->sc_irs = th->th_seq; - sc->sc_flags = 0; + sc->sc_flags = SCF_HASH; + sc->sc_hashval = m->m_pkthdr.hash; sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0; if (tcp_syncookies) sc->sc_iss = syncookie_generate(sc); @@ -1312,6 +1324,8 @@ no_options: m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); m->m_pkthdr.csum_thlen = sizeof(struct tcphdr) + optlen; + KASSERT(sc->sc_flags & SCF_HASH, ("syncache has no hash")); + m_sethash(m, sc->sc_hashval); error = ip_output(m, sc->sc_ipopts, &sc->sc_route, IP_DEBUGROUTE, NULL, sc->sc_tp->t_inpcb); } diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 5c7f9a7c78..b013c4f19e 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1027,7 +1027,8 @@ struct pr_usrreqs tcp6_usrreqs = { static int tcp_connect_oncpu(struct tcpcb *tp, int flags, struct mbuf *m, - struct sockaddr_in *sin, struct sockaddr_in *if_sin) + struct sockaddr_in *sin, struct sockaddr_in *if_sin, + uint16_t hash) { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; @@ -1051,6 +1052,9 @@ tcp_connect_oncpu(struct tcpcb *tp, int flags, struct mbuf *m, inp->inp_fport = sin->sin_port; in_pcbinsconnhash(inp); + inp->inp_flags |= INP_HASH; + inp->inp_hashval = hash; + /* * We are now on the inpcb's owner CPU, if the cached route was * freed because the rtentry's owner CPU is not the current CPU @@ -1132,6 +1136,7 @@ tcp_connect(netmsg_t msg) struct inpcb *inp; struct tcpcb *tp; int error; + uint16_t hash; lwkt_port_t port; COMMON_START(so, inp, 0); @@ -1182,10 +1187,11 @@ tcp_connect(netmsg_t msg) } KKASSERT(inp->inp_socket == so); - port = tcp_addrport(sin->sin_addr.s_addr, sin->sin_port, + hash = tcp_addrhash(sin->sin_addr.s_addr, sin->sin_port, (inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr), inp->inp_lport); + port = netisr_hashport(hash); if (port != &curthread->td_msgport) { lwkt_msg_t lmsg = &msg->connect.base.lmsg; @@ -1258,7 +1264,7 @@ tcp_connect(netmsg_t msg) msg->connect.nm_flags &= ~PRUC_HELDTD; } error = tcp_connect_oncpu(tp, msg->connect.nm_sndflags, - msg->connect.nm_m, sin, if_sin); + msg->connect.nm_m, sin, if_sin, hash); msg->connect.nm_m = NULL; out: if (msg->connect.nm_m) { diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 4f27cec820..ef708ccec0 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -504,6 +504,8 @@ struct syncache { #define sc_route sc_inc.inc_route #define sc_route6 sc_inc.inc6_route u_int32_t sc_tsrecent; + uint16_t sc_hashval; /* connection hash */ + uint16_t sc_pad; /* explicit padding */ tcp_seq sc_irs; /* seq from peer */ tcp_seq sc_iss; /* our ISS */ u_long sc_rxttime; /* retransmit time */ @@ -516,7 +518,7 @@ struct syncache { #define SCF_NOOPT 0x01 /* no TCP options */ #define SCF_WINSCALE 0x02 /* negotiated window scaling */ #define SCF_TIMESTAMP 0x04 /* negotiated timestamps */ -#define SCF_UNUSED 0x08 /* unused */ +#define SCF_HASH 0x08 /* sc_hashval is valid */ #define SCF_UNREACH 0x10 /* icmp unreachable received */ #define SCF_SACK_PERMITTED 0x20 /* saw SACK permitted option */ #define SCF_SIGNATURE 0x40 /* send MD5 digests */ @@ -645,6 +647,8 @@ union netmsg; int tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport); +int tcp_addrhash(in_addr_t faddr, in_port_t fport, + in_addr_t laddr, in_port_t lport); struct lwkt_port * tcp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 103a5f8c94..20877f528b 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -189,7 +189,7 @@ static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, struct sockaddr_in *udp_in); static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, - struct sockaddr_in *if_sin); + struct sockaddr_in *if_sin, uint16_t hash); static boolean_t udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error); @@ -931,6 +931,7 @@ udp_send(netmsg_t msg) int pru_flags = msg->send.nm_flags; struct inpcb *inp = so->so_pcb; struct thread *td = msg->send.nm_td; + uint16_t hash; int flags; struct udpiphdr *ui; @@ -1096,11 +1097,16 @@ udp_send(netmsg_t msg) * For connected socket, this datagram has already * been in the correct netisr; no need to rehash. */ + KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); + m_sethash(m, inp->inp_hashval); goto sendit; } - cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, + hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, ui->ui_src.s_addr, ui->ui_sport); + m_sethash(m, hash); + + cpu = netisr_hashcpu(hash); if (cpu != mycpuid) { struct mbuf *m_opt = NULL; struct netmsg_pru_send *smsg; @@ -1489,6 +1495,7 @@ udp_connect(netmsg_t msg) struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct sockaddr_in *if_sin; struct lwkt_port *port; + uint16_t hash; int error; KKASSERT(msg->connect.nm_m == NULL); @@ -1531,9 +1538,10 @@ udp_connect(netmsg_t msg) goto out; } - port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, + hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); + port = netisr_hashport(hash); if (port != &curthread->td_msgport) { lwkt_msg_t lmsg = &msg->connect.base.lmsg; int nm_flags = PRUC_RECONNECT; @@ -1586,7 +1594,7 @@ udp_connect(netmsg_t msg) /* msg invalid now */ return; } - error = udp_connect_oncpu(inp, sin, if_sin); + error = udp_connect_oncpu(inp, sin, if_sin, hash); out: if (msg->connect.nm_flags & PRUC_HELDTD) lwkt_rele(td); @@ -1652,7 +1660,7 @@ udp_remwildcardhash(struct inpcb *inp) static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, - struct sockaddr_in *if_sin) + struct sockaddr_in *if_sin, uint16_t hash) { struct socket *so = inp->inp_socket; struct inpcb *oinp; @@ -1680,6 +1688,9 @@ udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, inp->inp_fport = sin->sin_port; in_pcbinsconnhash(inp); + inp->inp_flags |= INP_HASH; + inp->inp_hashval = hash; + soisconnected(so); return 0; @@ -1852,6 +1863,7 @@ udp_disconnect(netmsg_t msg) soclrstate(so, SS_ISCONNECTED); /* XXX */ in_pcbdisconnect(inp); + inp->inp_flags &= ~INP_HASH; /* * Follow traditional BSD behavior and retain the local port diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 2dca2f86ad..d427d6c613 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -154,6 +154,8 @@ extern int log_in_vain; int udp_addrcpu (in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport); +int udp_addrhash (in_addr_t faddr, in_port_t fport, + in_addr_t laddr, in_port_t lport); struct lwkt_port *udp_addrport (in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport); void udp_ctlinput(netmsg_t msg); -- 2.11.4.GIT