target/linux/patches/3.14.45/mptcp.patch

   1 diff -Nur linux-3.14.45.orig/drivers/infiniband/hw/cxgb4/cm.c linux-3.14.45/drivers/infiniband/hw/cxgb4/cm.c
   2 --- linux-3.14.45.orig/drivers/infiniband/hw/cxgb4/cm.c 2015-06-23 02:01:36.000000000 +0200
   3 +++ linux-3.14.45/drivers/infiniband/hw/cxgb4/cm.c      2015-06-24 14:15:48.871862463 +0200
   4 @@ -3162,7 +3162,7 @@
   5          */
   6         memset(&tmp_opt, 0, sizeof(tmp_opt));
   7         tcp_clear_options(&tmp_opt);
   8 -       tcp_parse_options(skb, &tmp_opt, 0, NULL);
   9 +       tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL);
  10
  11         req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
  12         memset(req, 0, sizeof(*req));
  13 diff -Nur linux-3.14.45.orig/include/linux/ipv6.h linux-3.14.45/include/linux/ipv6.h
  14 --- linux-3.14.45.orig/include/linux/ipv6.h     2015-06-23 02:01:36.000000000 +0200
  15 +++ linux-3.14.45/include/linux/ipv6.h  2015-06-24 14:15:48.871862463 +0200
  16 @@ -309,12 +309,6 @@
  17         return NULL;
  18  }
  19
  20 -static inline struct inet6_request_sock *
  21 -                       inet6_rsk(const struct request_sock *rsk)
  22 -{
  23 -       return NULL;
  24 -}
  25 -
  26  static inline struct raw6_sock *raw6_sk(const struct sock *sk)
  27  {
  28         return NULL;
  29 diff -Nur linux-3.14.45.orig/include/linux/tcp.h linux-3.14.45/include/linux/tcp.h
  30 --- linux-3.14.45.orig/include/linux/tcp.h      2015-06-23 02:01:36.000000000 +0200
  31 +++ linux-3.14.45/include/linux/tcp.h   2015-06-24 14:15:48.871862463 +0200
  32 @@ -72,6 +72,53 @@
  33         u32     end_seq;
  34  };
  35
  36 +struct tcp_out_options {
  37 +       u16     options;        /* bit field of OPTION_* */
  38 +       u8      ws;             /* window scale, 0 to disable */
  39 +       u8      num_sack_blocks;/* number of SACK blocks to include */
  40 +       u8      hash_size;      /* bytes in hash_location */
  41 +       u16     mss;            /* 0 to disable */
  42 +       __u8    *hash_location; /* temporary pointer, overloaded */
  43 +       __u32   tsval, tsecr;   /* need to include OPTION_TS */
  44 +       struct tcp_fastopen_cookie *fastopen_cookie;    /* Fast open cookie */
  45 +#ifdef CONFIG_MPTCP
  46 +       u16     mptcp_options;  /* bit field of MPTCP related OPTION_* */
  47 +       u8      dss_csum:1,
  48 +               add_addr_v4:1,
  49 +               add_addr_v6:1;  /* dss-checksum required? */
  50 +
  51 +       __u32   data_seq;       /* data sequence number, for MPTCP */
  52 +       __u32   data_ack;       /* data ack, for MPTCP */
  53 +
  54 +       union {
  55 +               struct {
  56 +                       __u64   sender_key;     /* sender's key for mptcp */
  57 +                       __u64   receiver_key;   /* receiver's key for mptcp */
  58 +               } mp_capable;
  59 +
  60 +               struct {
  61 +                       __u64   sender_truncated_mac;
  62 +                       __u32   sender_nonce;
  63 +                                       /* random number of the sender */
  64 +                       __u32   token;  /* token for mptcp */
  65 +               } mp_join_syns;
  66 +       };
  67 +
  68 +       struct {
  69 +               struct in_addr addr;
  70 +               u8 addr_id;
  71 +       } add_addr4;
  72 +
  73 +       struct {
  74 +               struct in6_addr addr;
  75 +               u8 addr_id;
  76 +       } add_addr6;
  77 +
  78 +       u16     remove_addrs;   /* list of address id */
  79 +       u8      addr_id;        /* address id (mp_join or add_address) */
  80 +#endif /* CONFIG_MPTCP */
  81 +};
  82 +
  83  /*These are used to set the sack_ok field in struct tcp_options_received */
  84  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
  85  #define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
  86 @@ -95,6 +142,9 @@
  87         u16     mss_clamp;      /* Maximal mss, negotiated at connection setup */
  88  };
  89
  90 +struct mptcp_cb;
  91 +struct mptcp_tcp_sock;
  92 +
  93  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
  94  {
  95         rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
  96 @@ -123,6 +173,7 @@
  97                                                   * FastOpen it's the seq#
  98                                                   * after data-in-SYN.
  99                                                   */
 100 +       u8                              saw_mpc:1;
 101  };
 102
 103  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 104 @@ -130,6 +181,8 @@
 105         return (struct tcp_request_sock *)req;
 106  }
 107
 108 +struct tcp_md5sig_key;
 109 +
 110  struct tcp_sock {
 111         /* inet_connection_sock has to be the first member of tcp_sock */
 112         struct inet_connection_sock     inet_conn;
 113 @@ -323,6 +376,45 @@
 114          * socket. Used to retransmit SYNACKs etc.
 115          */
 116         struct request_sock *fastopen_rsk;
 117 +
 118 +
 119 +       struct mptcp_cb         *mpcb;
 120 +       struct sock             *meta_sk;
 121 +       /* We keep these flags even if CONFIG_MPTCP is not checked, because
 122 +        * it allows checking MPTCP capability just by checking the mpc flag,
 123 +        * rather than adding ifdefs everywhere.
 124 +        */
 125 +       u16     mpc:1,          /* Other end is multipath capable */
 126 +               inside_tk_table:1, /* Is the tcp_sock inside the token-table? */
 127 +               send_mp_fclose:1,
 128 +               request_mptcp:1, /* Did we send out an MP_CAPABLE?
 129 +                                 * (this speeds up mptcp_doit() in tcp_recvmsg)
 130 +                                 */
 131 +               mptcp_enabled:1, /* Is MPTCP enabled from the application ? */
 132 +               pf:1, /* Potentially Failed state: when this flag is set, we
 133 +                      * stop using the subflow
 134 +                      */
 135 +               mp_killed:1, /* Killed with a tcp_done in mptcp? */
 136 +               was_meta_sk:1,  /* This was a meta sk (in case of reuse) */
 137 +               close_it:1,     /* Must close socket in mptcp_data_ready? */
 138 +               closing:1;
 139 +       struct mptcp_tcp_sock *mptcp;
 140 +#ifdef CONFIG_MPTCP
 141 +       struct hlist_nulls_node tk_table;
 142 +       u32             mptcp_loc_token;
 143 +       u64             mptcp_loc_key;
 144 +#endif /* CONFIG_MPTCP */
 145 +
 146 +       /* Functions that depend on the value of the mpc flag */
 147 +       u32 (*__select_window)(struct sock *sk);
 148 +       u16 (*select_window)(struct sock *sk);
 149 +       void (*select_initial_window)(int __space, __u32 mss, __u32 *rcv_wnd,
 150 +                                       __u32 *window_clamp, int wscale_ok,
 151 +                                       __u8 *rcv_wscale, __u32 init_rcv_wnd,
 152 +                                       const struct sock *sk);
 153 +       void (*init_buffer_space)(struct sock *sk);
 154 +       void (*set_rto)(struct sock *sk);
 155 +       bool (*should_expand_sndbuf)(const struct sock *sk);
 156  };
 157
 158  enum tsq_flags {
 159 @@ -334,6 +426,8 @@
 160         TCP_MTU_REDUCED_DEFERRED,  /* tcp_v{4|6}_err() could not call
 161                                     * tcp_v{4|6}_mtu_reduced()
 162                                     */
 163 +       MPTCP_PATH_MANAGER, /* MPTCP deferred creation of new subflows */
 164 +       MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */
 165  };
 166
 167  static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 168 @@ -352,6 +446,7 @@
 169  #ifdef CONFIG_TCP_MD5SIG
 170         struct tcp_md5sig_key     *tw_md5_key;
 171  #endif
 172 +       struct mptcp_tw           *mptcp_tw;
 173  };
 174
 175  static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 176 diff -Nur linux-3.14.45.orig/include/net/inet6_connection_sock.h linux-3.14.45/include/net/inet6_connection_sock.h
 177 --- linux-3.14.45.orig/include/net/inet6_connection_sock.h      2015-06-23 02:01:36.000000000 +0200
 178 +++ linux-3.14.45/include/net/inet6_connection_sock.h   2015-06-24 14:15:48.871862463 +0200
 179 @@ -27,6 +27,8 @@
 180
 181  struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6,
 182                                       const struct request_sock *req);
 183 +u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
 184 +                   const u32 rnd, const u32 synq_hsize);
 185
 186  struct request_sock *inet6_csk_search_req(const struct sock *sk,
 187                                           struct request_sock ***prevp,
 188 diff -Nur linux-3.14.45.orig/include/net/inet_common.h linux-3.14.45/include/net/inet_common.h
 189 --- linux-3.14.45.orig/include/net/inet_common.h        2015-06-23 02:01:36.000000000 +0200
 190 +++ linux-3.14.45/include/net/inet_common.h     2015-06-24 14:15:48.871862463 +0200
 191 @@ -1,6 +1,8 @@
 192  #ifndef _INET_COMMON_H
 193  #define _INET_COMMON_H
 194
 195 +#include <net/sock.h>
 196 +
 197  extern const struct proto_ops inet_stream_ops;
 198  extern const struct proto_ops inet_dgram_ops;
 199
 200 @@ -13,6 +15,8 @@
 201  struct sockaddr;
 202  struct socket;
 203
 204 +int inet_create(struct net *net, struct socket *sock, int protocol, int kern);
 205 +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern);
 206  int inet_release(struct socket *sock);
 207  int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 208                         int addr_len, int flags);
 209 diff -Nur linux-3.14.45.orig/include/net/inet_connection_sock.h linux-3.14.45/include/net/inet_connection_sock.h
 210 --- linux-3.14.45.orig/include/net/inet_connection_sock.h       2015-06-23 02:01:36.000000000 +0200
 211 +++ linux-3.14.45/include/net/inet_connection_sock.h    2015-06-24 14:15:48.871862463 +0200
 212 @@ -244,6 +244,9 @@
 213
 214  struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
 215
 216 +u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd,
 217 +                  const u32 synq_hsize);
 218 +
 219  struct request_sock *inet_csk_search_req(const struct sock *sk,
 220                                          struct request_sock ***prevp,
 221                                          const __be16 rport,
 222 diff -Nur linux-3.14.45.orig/include/net/mptcp.h linux-3.14.45/include/net/mptcp.h
 223 --- linux-3.14.45.orig/include/net/mptcp.h      1970-01-01 01:00:00.000000000 +0100
 224 +++ linux-3.14.45/include/net/mptcp.h   2015-06-24 14:15:48.871862463 +0200
 225 @@ -0,0 +1,1471 @@
 226 +/*
 227 + *     MPTCP implementation
 228 + *
 229 + *     Initial Design & Implementation:
 230 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
 231 + *
 232 + *     Current Maintainer & Author:
 233 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
 234 + *
 235 + *     Additional authors:
 236 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
 237 + *     Gregory Detal <gregory.detal@uclouvain.be>
 238 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
 239 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
 240 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
 241 + *     Andreas Ripke <ripke@neclab.eu>
 242 + *     Vlad Dogaru <vlad.dogaru@intel.com>
 243 + *     Octavian Purdila <octavian.purdila@intel.com>
 244 + *     John Ronan <jronan@tssg.org>
 245 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
 246 + *     Brandon Heller <brandonh@stanford.edu>
 247 + *
 248 + *
 249 + *     This program is free software; you can redistribute it and/or
 250 + *      modify it under the terms of the GNU General Public License
 251 + *      as published by the Free Software Foundation; either version
 252 + *      2 of the License, or (at your option) any later version.
 253 + */
 254 +
 255 +#ifndef _MPTCP_H
 256 +#define _MPTCP_H
 257 +
 258 +#include <linux/inetdevice.h>
 259 +#include <linux/ipv6.h>
 260 +#include <linux/list.h>
 261 +#include <linux/net.h>
 262 +#include <linux/netpoll.h>
 263 +#include <linux/skbuff.h>
 264 +#include <linux/socket.h>
 265 +#include <linux/tcp.h>
 266 +#include <linux/kernel.h>
 267 +
 268 +#include <asm/byteorder.h>
 269 +#include <asm/unaligned.h>
 270 +#include <crypto/hash.h>
 271 +#include <net/tcp.h>
 272 +
 273 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 274 +       #define ntohll(x)  be64_to_cpu(x)
 275 +       #define htonll(x)  cpu_to_be64(x)
 276 +#elif defined(__BIG_ENDIAN_BITFIELD)
 277 +       #define ntohll(x) (x)
 278 +       #define htonll(x) (x)
 279 +#endif
 280 +
 281 +/* Max number of local or remote addresses we can store.
 282 + * When changing, see the bitfield below in mptcp_loc4/6. */
 283 +#define MPTCP_MAX_ADDR 8
 284 +
 285 +#define MPTCP_SUBFLOW_RETRY_DELAY      1000
 286 +
 287 +struct mptcp_loc4 {
 288 +       u8              loc4_id;
 289 +       u8              low_prio:1;
 290 +       struct in_addr  addr;
 291 +};
 292 +
 293 +struct mptcp_rem4 {
 294 +       u8              rem4_id;
 295 +       u8              bitfield;
 296 +       u8              retry_bitfield;
 297 +       __be16          port;
 298 +       struct in_addr  addr;
 299 +};
 300 +
 301 +struct mptcp_loc6 {
 302 +       u8              loc6_id;
 303 +       u8              low_prio:1;
 304 +       struct in6_addr addr;
 305 +};
 306 +
 307 +struct mptcp_rem6 {
 308 +       u8              rem6_id;
 309 +       u8              bitfield;
 310 +       u8              retry_bitfield;
 311 +       __be16          port;
 312 +       struct in6_addr addr;
 313 +};
 314 +
 315 +struct mptcp_request_sock {
 316 +       struct tcp_request_sock         req;
 317 +       struct mptcp_cb                 *mpcb;
 318 +       /* Collision list in the tuple hashtable. We need to find
 319 +        * the req sock when receiving the third msg of the 3-way handshake,
 320 +        * since that one does not contain the token. If this makes
 321 +        * the request sock too long, we can use kmalloc'ed specific entries for
 322 +        * that tuple hashtable. At the moment, though, I extend the
 323 +        * request_sock.
 324 +        */
 325 +       struct list_head                collide_tuple;
 326 +       struct hlist_nulls_node         collide_tk;
 327 +       u32                             mptcp_rem_nonce;
 328 +       u32                             mptcp_loc_token;
 329 +       u64                             mptcp_loc_key;
 330 +       u64                             mptcp_rem_key;
 331 +       u64                             mptcp_hash_tmac;
 332 +       u32                             mptcp_loc_nonce;
 333 +       u8                              loc_id;
 334 +       u8                              rem_id; /* Address-id in the MP_JOIN */
 335 +       u8                              dss_csum:1,
 336 +                                       low_prio:1;
 337 +};
 338 +
 339 +struct mptcp_options_received {
 340 +       u16     saw_mpc:1,
 341 +               dss_csum:1,
 342 +               drop_me:1,
 343 +
 344 +               is_mp_join:1,
 345 +               join_ack:1,
 346 +
 347 +               saw_low_prio:2, /* 0x1 - low-prio set for this subflow
 348 +                                * 0x2 - low-prio set for another subflow
 349 +                                */
 350 +               low_prio:1,
 351 +
 352 +               saw_add_addr:2, /* Saw at least one add_addr option:
 353 +                                * 0x1: IPv4 - 0x2: IPv6
 354 +                                */
 355 +               more_add_addr:1, /* Saw one more add-addr. */
 356 +
 357 +               saw_rem_addr:1, /* Saw at least one rem_addr option */
 358 +               more_rem_addr:1, /* Saw one more rem-addr. */
 359 +
 360 +               mp_fail:1,
 361 +               mp_fclose:1;
 362 +       u8      rem_id;         /* Address-id in the MP_JOIN */
 363 +       u8      prio_addr_id;   /* Address-id in the MP_PRIO */
 364 +
 365 +       const unsigned char *add_addr_ptr; /* Pointer to add-address option */
 366 +       const unsigned char *rem_addr_ptr; /* Pointer to rem-address option */
 367 +
 368 +       u32     data_ack;
 369 +       u32     data_seq;
 370 +       u16     data_len;
 371 +
 372 +       u32     mptcp_rem_token;/* Remote token */
 373 +
 374 +       /* Key inside the option (from mp_capable or fast_close) */
 375 +       u64     mptcp_key;
 376 +
 377 +       u32     mptcp_recv_nonce;
 378 +       u64     mptcp_recv_tmac;
 379 +       u8      mptcp_recv_mac[20];
 380 +};
 381 +
 382 +struct mptcp_tcp_sock {
 383 +       struct tcp_sock *next;          /* Next subflow socket */
 384 +       struct list_head cb_list;
 385 +       struct mptcp_options_received rx_opt;
 386 +
 387 +        /* Those three fields record the current mapping */
 388 +       u64     map_data_seq;
 389 +       u32     map_subseq;
 390 +       u16     map_data_len;
 391 +       u16     slave_sk:1,
 392 +               fully_established:1,
 393 +               establish_increased:1,
 394 +               second_packet:1,
 395 +               attached:1,
 396 +               send_mp_fail:1,
 397 +               include_mpc:1,
 398 +               mapping_present:1,
 399 +               map_data_fin:1,
 400 +               low_prio:1, /* use this socket as backup */
 401 +               rcv_low_prio:1, /* Peer sent low-prio option to us */
 402 +               send_mp_prio:1, /* Trigger to send mp_prio on this socket */
 403 +               pre_established:1; /* State between sending 3rd ACK and
 404 +                                   * receiving the fourth ack of new subflows.
 405 +                                   */
 406 +
 407 +       /* isn: needed to translate abs to relative subflow seqnums */
 408 +       u32     snt_isn;
 409 +       u32     rcv_isn;
 410 +       u32     last_data_seq;
 411 +       u8      path_index;
 412 +       u8      loc_id;
 413 +       u8      rem_id;
 414 +
 415 +       u32     last_rbuf_opti; /* Timestamp of last rbuf optimization */
 416 +       unsigned int sent_pkts;
 417 +
 418 +       struct sk_buff  *shortcut_ofoqueue; /* Shortcut to the current modified
 419 +                                            * skb in the ofo-queue.
 420 +                                            */
 421 +
 422 +       int     init_rcv_wnd;
 423 +       u32     infinite_cutoff_seq;
 424 +       struct delayed_work work;
 425 +       u32     mptcp_loc_nonce;
 426 +       struct tcp_sock *tp; /* Where is my daddy? */
 427 +       u32     last_end_data_seq;
 428 +
 429 +       /* MP_JOIN subflow: timer for retransmitting the 3rd ack */
 430 +       struct timer_list mptcp_ack_timer;
 431 +
 432 +       /* HMAC of the third ack */
 433 +       char sender_mac[20];
 434 +};
 435 +
 436 +struct mptcp_tw {
 437 +       struct list_head list;
 438 +       u64 loc_key;
 439 +       u64 rcv_nxt;
 440 +       struct mptcp_cb __rcu *mpcb;
 441 +       u8 meta_tw:1,
 442 +          in_list:1;
 443 +};
 444 +
 445 +#define MPTCP_PM_NAME_MAX 16
 446 +struct mptcp_pm_ops {
 447 +       struct list_head list;
 448 +
 449 +       /* Signal the creation of a new MPTCP-session. */
 450 +       void (*new_session)(struct sock *meta_sk, int index);
 451 +       void (*release_sock)(struct sock *meta_sk);
 452 +       void (*fully_established)(struct sock *meta_sk);
 453 +       void (*new_remote_address)(struct sock *meta_sk);
 454 +       int  (*get_local_index)(sa_family_t family, union inet_addr *addr,
 455 +                               struct net *net);
 456 +       int  (*get_local_id)(sa_family_t family, union inet_addr *addr,
 457 +                            struct net *net);
 458 +       void (*addr_signal)(struct sock *sk, unsigned *size,
 459 +                           struct tcp_out_options *opts, struct sk_buff *skb);
 460 +
 461 +       char            name[MPTCP_PM_NAME_MAX];
 462 +       struct module   *owner;
 463 +};
 464 +
 465 +struct mptcp_cb {
 466 +       struct sock *meta_sk;
 467 +
 468 +       /* list of sockets in this multipath connection */
 469 +       struct tcp_sock *connection_list;
 470 +       /* list of sockets that need a call to release_cb */
 471 +       struct list_head callback_list;
 472 +
 473 +       spinlock_t       tw_lock;
 474 +       struct list_head tw_list;
 475 +       unsigned char    mptw_state;
 476 +
 477 +       atomic_t        mpcb_refcnt;
 478 +
 479 +       /* High-order bits of 64-bit sequence numbers */
 480 +       u32 snd_high_order[2];
 481 +       u32 rcv_high_order[2];
 482 +
 483 +       u16     send_infinite_mapping:1,
 484 +               in_time_wait:1,
 485 +               list_rcvd:1, /* XXX TO REMOVE */
 486 +               dss_csum:1,
 487 +               server_side:1,
 488 +               infinite_mapping_rcv:1,
 489 +               infinite_mapping_snd:1,
 490 +               dfin_combined:1,   /* Was the DFIN combined with subflow-fin? */
 491 +               passive_close:1,
 492 +               snd_hiseq_index:1, /* Index in snd_high_order of snd_nxt */
 493 +               rcv_hiseq_index:1; /* Index in rcv_high_order of rcv_nxt */
 494 +
 495 +       /* socket count in this connection */
 496 +       u8 cnt_subflows;
 497 +       u8 cnt_established;
 498 +
 499 +       u32 noneligible;        /* Path mask of temporarily non
 500 +                                * eligible subflows by the scheduler
 501 +                                */
 502 +
 503 +       struct sk_buff_head reinject_queue;
 504 +
 505 +       u8 dfin_path_index;
 506 +
 507 +#define MPTCP_PM_SIZE 320
 508 +       u8 mptcp_pm[MPTCP_PM_SIZE] __aligned(8);
 509 +       struct mptcp_pm_ops *pm_ops;
 510 +
 511 +       /* Mutex needed, because otherwise mptcp_close will complain that the
 512 +        * socket is owned by the user.
 513 +        * E.g., mptcp_sub_close_wq is taking the meta-lock.
 514 +        */
 515 +       struct mutex mpcb_mutex;
 516 +
 517 +       /* Master socket, also part of the connection_list, this
 518 +        * socket is the one that the application sees.
 519 +        */
 520 +       struct sock *master_sk;
 521 +
 522 +       u64     csum_cutoff_seq;
 523 +
 524 +       __u64   mptcp_loc_key;
 525 +       __u32   mptcp_loc_token;
 526 +       __u64   mptcp_rem_key;
 527 +       __u32   mptcp_rem_token;
 528 +
 529 +       /* Create a new subflow - necessary because the meta-sk may be IPv4, but
 530 +        * the new subflow can be IPv6
 531 +        */
 532 +       struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
 533 +                                     struct request_sock *req,
 534 +                                     struct dst_entry *dst);
 535 +
 536 +       /* Remote addresses */
 537 +       struct mptcp_rem4 remaddr4[MPTCP_MAX_ADDR];
 538 +       u8 rem4_bits;
 539 +
 540 +       struct mptcp_rem6 remaddr6[MPTCP_MAX_ADDR];
 541 +       u8 rem6_bits;
 542 +
 543 +       u32 path_index_bits;
 544 +       /* Next pi to pick up in case a new path becomes available */
 545 +       u8 next_path_index;
 546 +
 547 +       /* Original snd/rcvbuf of the initial subflow.
 548 +        * Used for the new subflows on the server-side to allow correct
 549 +        * autotuning
 550 +        */
 551 +       int orig_sk_rcvbuf;
 552 +       int orig_sk_sndbuf;
 553 +       u32 orig_window_clamp;
 554 +};
 555 +
 556 +#define MPTCP_SUB_CAPABLE                      0
 557 +#define MPTCP_SUB_LEN_CAPABLE_SYN              12
 558 +#define MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN                12
 559 +#define MPTCP_SUB_LEN_CAPABLE_ACK              20
 560 +#define MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN                20
 561 +
 562 +#define MPTCP_SUB_JOIN                 1
 563 +#define MPTCP_SUB_LEN_JOIN_SYN         12
 564 +#define MPTCP_SUB_LEN_JOIN_SYN_ALIGN   12
 565 +#define MPTCP_SUB_LEN_JOIN_SYNACK      16
 566 +#define MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN        16
 567 +#define MPTCP_SUB_LEN_JOIN_ACK         24
 568 +#define MPTCP_SUB_LEN_JOIN_ACK_ALIGN   24
 569 +
 570 +#define MPTCP_SUB_DSS          2
 571 +#define MPTCP_SUB_LEN_DSS      4
 572 +#define MPTCP_SUB_LEN_DSS_ALIGN        4
 573 +
 574 +/* Lengths for seq and ack are the ones without the generic MPTCP-option header,
 575 + * as they are part of the DSS-option.
 576 + * To get the total length, just add the different options together.
 577 + */
 578 +#define MPTCP_SUB_LEN_SEQ      10
 579 +#define MPTCP_SUB_LEN_SEQ_CSUM 12
 580 +#define MPTCP_SUB_LEN_SEQ_ALIGN        12
 581 +
 582 +#define MPTCP_SUB_LEN_SEQ_64           14
 583 +#define MPTCP_SUB_LEN_SEQ_CSUM_64      16
 584 +#define MPTCP_SUB_LEN_SEQ_64_ALIGN     16
 585 +
 586 +#define MPTCP_SUB_LEN_ACK      4
 587 +#define MPTCP_SUB_LEN_ACK_ALIGN        4
 588 +
 589 +#define MPTCP_SUB_LEN_ACK_64           8
 590 +#define MPTCP_SUB_LEN_ACK_64_ALIGN     8
 591 +
 592 +/* This is the "default" option-length we will send out most often.
 593 + * MPTCP DSS-header
 594 + * 32-bit data sequence number
 595 + * 32-bit data ack
 596 + *
 597 + * It is necessary to calculate the effective MSS we will be using when
 598 + * sending data.
 599 + */
 600 +#define MPTCP_SUB_LEN_DSM_ALIGN  (MPTCP_SUB_LEN_DSS_ALIGN +            \
 601 +                                 MPTCP_SUB_LEN_SEQ_ALIGN +             \
 602 +                                 MPTCP_SUB_LEN_ACK_ALIGN)
 603 +
 604 +#define MPTCP_SUB_ADD_ADDR             3
 605 +#define MPTCP_SUB_LEN_ADD_ADDR4                8
 606 +#define MPTCP_SUB_LEN_ADD_ADDR6                20
 607 +#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN  8
 608 +#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN  20
 609 +
 610 +#define MPTCP_SUB_REMOVE_ADDR  4
 611 +#define MPTCP_SUB_LEN_REMOVE_ADDR      4
 612 +
 613 +#define MPTCP_SUB_PRIO         5
 614 +#define MPTCP_SUB_LEN_PRIO     3
 615 +#define MPTCP_SUB_LEN_PRIO_ADDR        4
 616 +#define MPTCP_SUB_LEN_PRIO_ALIGN       4
 617 +
 618 +#define MPTCP_SUB_FAIL         6
 619 +#define MPTCP_SUB_LEN_FAIL     12
 620 +#define MPTCP_SUB_LEN_FAIL_ALIGN       12
 621 +
 622 +#define MPTCP_SUB_FCLOSE       7
 623 +#define MPTCP_SUB_LEN_FCLOSE   12
 624 +#define MPTCP_SUB_LEN_FCLOSE_ALIGN     12
 625 +
 626 +
 627 +#define OPTION_MPTCP           (1 << 5)
 628 +
 629 +static inline void reset_mpc(struct tcp_sock *tp)
 630 +{
 631 +       tp->mpc = 0;
 632 +
 633 +       tp->__select_window             = __tcp_select_window;
 634 +       tp->select_window               = tcp_select_window;
 635 +       tp->select_initial_window       = tcp_select_initial_window;
 636 +       tp->init_buffer_space           = tcp_init_buffer_space;
 637 +       tp->set_rto                     = tcp_set_rto;
 638 +       tp->should_expand_sndbuf        = tcp_should_expand_sndbuf;
 639 +}
 640 +
 641 +/* Initializes MPTCP flags in tcp_sock (and other tcp_sock members that depend
 642 + * on those flags).
 643 + */
 644 +static inline void mptcp_init_tcp_sock(struct tcp_sock *tp)
 645 +{
 646 +       reset_mpc(tp);
 647 +}
 648 +
 649 +#ifdef CONFIG_MPTCP
 650 +
 651 +/* Used for checking if the mptcp initialization has been successful */
 652 +extern bool mptcp_init_failed;
 653 +
 654 +/* MPTCP options */
 655 +#define OPTION_TYPE_SYN                (1 << 0)
 656 +#define OPTION_TYPE_SYNACK     (1 << 1)
 657 +#define OPTION_TYPE_ACK                (1 << 2)
 658 +#define OPTION_MP_CAPABLE      (1 << 3)
 659 +#define OPTION_DATA_ACK                (1 << 4)
 660 +#define OPTION_ADD_ADDR                (1 << 5)
 661 +#define OPTION_MP_JOIN         (1 << 6)
 662 +#define OPTION_MP_FAIL         (1 << 7)
 663 +#define OPTION_MP_FCLOSE       (1 << 8)
 664 +#define OPTION_REMOVE_ADDR     (1 << 9)
 665 +#define OPTION_MP_PRIO         (1 << 10)
 666 +
 667 +/* MPTCP flags */
 668 +#define MPTCPHDR_ACK           0x01
 669 +#define MPTCPHDR_SEQ           0x02
 670 +#define MPTCPHDR_FIN           0x04
 671 +#define MPTCPHDR_INF           0x08
 672 +#define MPTCPHDR_SEQ64_SET     0x10 /* Did we received a 64-bit seq number */
 673 +#define MPTCPHDR_SEQ64_OFO     0x20 /* Is it not in our circular array? */
 674 +#define MPTCPHDR_SEQ64_INDEX   0x40 /* Index of seq in mpcb->snd_high_order */
 675 +#define MPTCPHDR_DSS_CSUM      0x80
 676 +
 677 +/* It is impossible, that all 8 bits of mptcp_flags are set to 1 with the above
 678 + * Thus, defining MPTCPHDR_JOIN as 0xFF is safe.
 679 + */
 680 +#define MPTCPHDR_JOIN          0xFF
 681 +
 682 +struct mptcp_option {
 683 +       __u8    kind;
 684 +       __u8    len;
 685 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 686 +       __u8    ver:4,
 687 +               sub:4;
 688 +#elif defined(__BIG_ENDIAN_BITFIELD)
 689 +       __u8    sub:4,
 690 +               ver:4;
 691 +#else
 692 +#error "Adjust your <asm/byteorder.h> defines"
 693 +#endif
 694 +};
 695 +
 696 +struct mp_capable {
 697 +       __u8    kind;
 698 +       __u8    len;
 699 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 700 +       __u8    ver:4,
 701 +               sub:4;
 702 +       __u8    h:1,
 703 +               rsv:5,
 704 +               b:1,
 705 +               a:1;
 706 +#elif defined(__BIG_ENDIAN_BITFIELD)
 707 +       __u8    sub:4,
 708 +               ver:4;
 709 +       __u8    a:1,
 710 +               b:1,
 711 +               rsv:5,
 712 +               h:1;
 713 +#else
 714 +#error "Adjust your <asm/byteorder.h> defines"
 715 +#endif
 716 +       __u64   sender_key;
 717 +       __u64   receiver_key;
 718 +} __attribute__((__packed__));
 719 +
 720 +struct mp_join {
 721 +       __u8    kind;
 722 +       __u8    len;
 723 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 724 +       __u8    b:1,
 725 +               rsv:3,
 726 +               sub:4;
 727 +#elif defined(__BIG_ENDIAN_BITFIELD)
 728 +       __u8    sub:4,
 729 +               rsv:3,
 730 +               b:1;
 731 +#else
 732 +#error "Adjust your <asm/byteorder.h> defines"
 733 +#endif
 734 +       __u8    addr_id;
 735 +       union {
 736 +               struct {
 737 +                       u32     token;
 738 +                       u32     nonce;
 739 +               } syn;
 740 +               struct {
 741 +                       __u64   mac;
 742 +                       u32     nonce;
 743 +               } synack;
 744 +               struct {
 745 +                       __u8    mac[20];
 746 +               } ack;
 747 +       } u;
 748 +} __attribute__((__packed__));
 749 +
 750 +struct mp_dss {
 751 +       __u8    kind;
 752 +       __u8    len;
 753 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 754 +       __u16   rsv1:4,
 755 +               sub:4,
 756 +               A:1,
 757 +               a:1,
 758 +               M:1,
 759 +               m:1,
 760 +               F:1,
 761 +               rsv2:3;
 762 +#elif defined(__BIG_ENDIAN_BITFIELD)
 763 +       __u16   sub:4,
 764 +               rsv1:4,
 765 +               rsv2:3,
 766 +               F:1,
 767 +               m:1,
 768 +               M:1,
 769 +               a:1,
 770 +               A:1;
 771 +#else
 772 +#error "Adjust your <asm/byteorder.h> defines"
 773 +#endif
 774 +};
 775 +
 776 +struct mp_add_addr {
 777 +       __u8    kind;
 778 +       __u8    len;
 779 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 780 +       __u8    ipver:4,
 781 +               sub:4;
 782 +#elif defined(__BIG_ENDIAN_BITFIELD)
 783 +       __u8    sub:4,
 784 +               ipver:4;
 785 +#else
 786 +#error "Adjust your <asm/byteorder.h> defines"
 787 +#endif
 788 +       __u8    addr_id;
 789 +       union {
 790 +               struct {
 791 +                       struct in_addr  addr;
 792 +                       __be16          port;
 793 +               } v4;
 794 +               struct {
 795 +                       struct in6_addr addr;
 796 +                       __be16          port;
 797 +               } v6;
 798 +       } u;
 799 +} __attribute__((__packed__));
 800 +
 801 +struct mp_remove_addr {
 802 +       __u8    kind;
 803 +       __u8    len;
 804 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 805 +       __u8    rsv:4,
 806 +               sub:4;
 807 +#elif defined(__BIG_ENDIAN_BITFIELD)
 808 +       __u8    sub:4,
 809 +               rsv:4;
 810 +#else
 811 +#error "Adjust your <asm/byteorder.h> defines"
 812 +#endif
 813 +       /* list of addr_id */
 814 +       __u8    addrs_id;
 815 +};
 816 +
 817 +struct mp_fail {
 818 +       __u8    kind;
 819 +       __u8    len;
 820 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 821 +       __u16   rsv1:4,
 822 +               sub:4,
 823 +               rsv2:8;
 824 +#elif defined(__BIG_ENDIAN_BITFIELD)
 825 +       __u16   sub:4,
 826 +               rsv1:4,
 827 +               rsv2:8;
 828 +#else
 829 +#error "Adjust your <asm/byteorder.h> defines"
 830 +#endif
 831 +       __be64  data_seq;
 832 +} __attribute__((__packed__));
 833 +
 834 +struct mp_fclose {
 835 +       __u8    kind;
 836 +       __u8    len;
 837 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 838 +       __u16   rsv1:4,
 839 +               sub:4,
 840 +               rsv2:8;
 841 +#elif defined(__BIG_ENDIAN_BITFIELD)
 842 +       __u16   sub:4,
 843 +               rsv1:4,
 844 +               rsv2:8;
 845 +#else
 846 +#error "Adjust your <asm/byteorder.h> defines"
 847 +#endif
 848 +       __u64   key;
 849 +} __attribute__((__packed__));
 850 +
 851 +struct mp_prio {
 852 +       __u8    kind;
 853 +       __u8    len;
 854 +#if defined(__LITTLE_ENDIAN_BITFIELD)
 855 +       __u8    b:1,
 856 +               rsv:3,
 857 +               sub:4;
 858 +#elif defined(__BIG_ENDIAN_BITFIELD)
 859 +       __u8    sub:4,
 860 +               rsv:3,
 861 +               b:1;
 862 +#else
 863 +#error "Adjust your <asm/byteorder.h> defines"
 864 +#endif
 865 +       __u8    addr_id;
 866 +} __attribute__((__packed__));
 867 +
 868 +static inline int mptcp_sub_len_dss(struct mp_dss *m, int csum)
 869 +{
 870 +       return 4 + m->A * (4 + m->a * 4) + m->M * (10 + m->m * 4 + csum * 2);
 871 +}
 872 +
 873 +#define MPTCP_APP      2
 874 +
 875 +extern int sysctl_mptcp_enabled;
 876 +extern int sysctl_mptcp_checksum;
 877 +extern int sysctl_mptcp_debug;
 878 +extern int sysctl_mptcp_syn_retries;
 879 +
 880 +extern struct workqueue_struct *mptcp_wq;
 881 +
 882 +#define mptcp_debug(fmt, args...)                                      \
 883 +       do {                                                            \
 884 +               if (unlikely(sysctl_mptcp_debug))                       \
 885 +                       pr_err(__FILE__ ": " fmt, ##args);      \
 886 +       } while (0)
 887 +
 888 +/* Iterates over all subflows */
 889 +#define mptcp_for_each_tp(mpcb, tp)                                    \
 890 +       for ((tp) = (mpcb)->connection_list; (tp); (tp) = (tp)->mptcp->next)
 891 +
 892 +#define mptcp_for_each_sk(mpcb, sk)                                    \
 893 +       for ((sk) = (struct sock *)(mpcb)->connection_list;             \
 894 +            sk;                                                        \
 895 +            sk = (struct sock *)tcp_sk(sk)->mptcp->next)
 896 +
 897 +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp)                   \
 898 +       for (__sk = (struct sock *)(__mpcb)->connection_list,           \
 899 +            __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL; \
 900 +            __sk;                                                      \
 901 +            __sk = __temp,                                             \
 902 +            __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL)
 903 +
 904 +/* Iterates over all bit set to 1 in a bitset */
 905 +#define mptcp_for_each_bit_set(b, i)                                   \
 906 +       for (i = ffs(b) - 1; i >= 0; i = ffs(b >> (i + 1) << (i + 1)) - 1)
 907 +
 908 +#define mptcp_for_each_bit_unset(b, i)                                 \
 909 +       mptcp_for_each_bit_set(~b, i)
 910 +
 911 +extern struct lock_class_key meta_key;
 912 +extern struct lock_class_key meta_slock_key;
 913 +extern u32 mptcp_secret[MD5_MESSAGE_BYTES / 4];
 914 +
 915 +/* This is needed to ensure that two subsequent key-generation result in
 916 + * different keys if the IPs and ports are the same.
 917 + */
 918 +extern u32 mptcp_key_seed;
 919 +
 920 +#define MPTCP_HASH_SIZE                1024
 921 +
 922 +extern struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE];
 923 +
 924 +/* This second hashtable is needed to retrieve request socks
 925 + * created as a result of a join request. While the SYN contains
 926 + * the token, the final ack does not, so we need a separate hashtable
 927 + * to retrieve the mpcb.
 928 + */
 929 +extern struct list_head mptcp_reqsk_htb[MPTCP_HASH_SIZE];
 930 +extern spinlock_t mptcp_reqsk_hlock;   /* hashtable protection */
 931 +
 932 +/* Lock, protecting the two hash-tables that hold the token. Namely,
 933 + * mptcp_reqsk_tk_htb and tk_hashtable
 934 + */
 935 +extern spinlock_t mptcp_tk_hashlock;   /* hashtable protection */
 936 +
 937 +void mptcp_data_ready(struct sock *sk, int bytes);
 938 +void mptcp_write_space(struct sock *sk);
 939 +
 940 +void mptcp_add_meta_ofo_queue(struct sock *meta_sk, struct sk_buff *skb,
 941 +                             struct sock *sk);
 942 +void mptcp_ofo_queue(struct sock *meta_sk);
 943 +void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp);
 944 +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied);
 945 +int mptcp_alloc_mpcb(struct sock *master_sk, __u64 remote_key, u32 window);
 946 +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id,
 947 +                  gfp_t flags);
 948 +void mptcp_del_sock(struct sock *sk);
 949 +void mptcp_update_metasocket(struct sock *sock, struct sock *meta_sk);
 950 +void mptcp_reinject_data(struct sock *orig_sk, int clone_it);
 951 +void mptcp_update_sndbuf(struct mptcp_cb *mpcb);
 952 +struct sk_buff *mptcp_next_segment(struct sock *sk, int *reinject);
 953 +void mptcp_send_fin(struct sock *meta_sk);
 954 +void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority);
 955 +int mptcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 956 +                    int push_one, gfp_t gfp);
 957 +void mptcp_parse_options(const uint8_t *ptr, int opsize,
 958 +                        struct tcp_options_received *opt_rx,
 959 +                        struct mptcp_options_received *mopt,
 960 +                        const struct sk_buff *skb);
 961 +void mptcp_syn_options(struct sock *sk, struct tcp_out_options *opts,
 962 +                      unsigned *remaining);
 963 +void mptcp_synack_options(struct request_sock *req,
 964 +                         struct tcp_out_options *opts,
 965 +                         unsigned *remaining);
 966 +void mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 967 +                              struct tcp_out_options *opts, unsigned *size);
 968 +void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 969 +                        struct tcp_out_options *opts,
 970 +                        struct sk_buff *skb);
 971 +void mptcp_close(struct sock *meta_sk, long timeout);
 972 +int mptcp_doit(struct sock *sk);
 973 +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, u32 window);
 974 +int mptcp_check_req_master(struct sock *sk, struct sock *child,
 975 +                          struct request_sock *req,
 976 +                          struct request_sock **prev,
 977 +                          struct mptcp_options_received *mopt);
 978 +struct sock *mptcp_check_req_child(struct sock *sk, struct sock *child,
 979 +                                  struct request_sock *req,
 980 +                                  struct request_sock **prev,
 981 +                                  struct mptcp_options_received *mopt);
 982 +u32 __mptcp_select_window(struct sock *sk);
 983 +void mptcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
 984 +                                       __u32 *window_clamp, int wscale_ok,
 985 +                                       __u8 *rcv_wscale, __u32 init_rcv_wnd,
 986 +                                       const struct sock *sk);
 987 +unsigned int mptcp_current_mss(struct sock *meta_sk);
 988 +int mptcp_select_size(const struct sock *meta_sk, bool sg);
 989 +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn);
 990 +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u8 *rand_1, u8 *rand_2,
 991 +                    u32 *hash_out);
 992 +void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk);
 993 +void mptcp_fin(struct sock *meta_sk);
 994 +void mptcp_retransmit_timer(struct sock *meta_sk);
 995 +int mptcp_write_wakeup(struct sock *meta_sk);
 996 +void mptcp_sub_close_wq(struct work_struct *work);
 997 +void mptcp_sub_close(struct sock *sk, unsigned long delay);
 998 +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk, int copied);
 999 +void mptcp_fallback_meta_sk(struct sock *meta_sk);
1000 +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb);
1001 +struct sock *mptcp_sk_clone(const struct sock *sk, int family, const gfp_t priority);
1002 +void mptcp_ack_handler(unsigned long);
1003 +int mptcp_check_rtt(const struct tcp_sock *tp, int time);
1004 +int mptcp_check_snd_buf(const struct tcp_sock *tp);
1005 +int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, struct sk_buff *skb);
1006 +void __init mptcp_init(void);
1007 +int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len);
1008 +int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1009 +                  unsigned int mss_now, int reinject);
1010 +int mptso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1011 +                  unsigned int mss_now, gfp_t gfp, int reinject);
1012 +void mptcp_destroy_sock(struct sock *sk);
1013 +int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr,
1014 +                                   struct sk_buff *skb,
1015 +                                   struct mptcp_options_received *mopt);
1016 +unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, u32 mss_now,
1017 +                                 int large_allowed);
1018 +int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw);
1019 +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw);
1020 +void mptcp_update_tw_socks(const struct tcp_sock *tp, int state);
1021 +void mptcp_disconnect(struct sock *sk);
1022 +bool mptcp_should_expand_sndbuf(const struct sock *sk);
1023 +int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb);
1024 +void mptcp_tsq_flags(struct sock *sk);
1025 +void mptcp_tsq_sub_deferred(struct sock *meta_sk);
1026 +struct mp_join *mptcp_find_join(struct sk_buff *skb);
1027 +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp);
1028 +void mptcp_hash_remove(struct tcp_sock *meta_tp);
1029 +struct sock *mptcp_hash_find(struct net *net, u32 token);
1030 +int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw);
1031 +int mptcp_do_join_short(struct sk_buff *skb, struct mptcp_options_received *mopt,
1032 +                       struct tcp_options_received *tmp_opt, struct net *net);
1033 +void mptcp_reqsk_destructor(struct request_sock *req);
1034 +void mptcp_reqsk_new_mptcp(struct request_sock *req,
1035 +                          const struct tcp_options_received *rx_opt,
1036 +                          const struct mptcp_options_received *mopt,
1037 +                          const struct sk_buff *skb);
1038 +int mptcp_check_req(struct sk_buff *skb, struct net *net);
1039 +void mptcp_connect_init(struct sock *sk);
1040 +void mptcp_sub_force_close(struct sock *sk);
1041 +int mptcp_sub_len_remove_addr_align(u16 bitfield);
1042 +void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
1043 +                           const struct sk_buff *skb);
1044 +void mptcp_init_buffer_space(struct sock *sk);
1045 +
1046 +/* MPTCP-path-manager registration/initialization functions */
1047 +int mptcp_register_path_manager(struct mptcp_pm_ops *pm);
1048 +void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm);
1049 +void mptcp_init_path_manager(struct mptcp_cb *mpcb);
1050 +void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb);
1051 +void mptcp_fallback_default(struct mptcp_cb *mpcb);
1052 +void mptcp_get_default_path_manager(char *name);
1053 +int mptcp_set_default_path_manager(const char *name);
1054 +extern struct mptcp_pm_ops mptcp_pm_default;
1055 +
1056 +static inline
1057 +struct mptcp_request_sock *mptcp_rsk(const struct request_sock *req)
1058 +{
1059 +       return (struct mptcp_request_sock *)req;
1060 +}
1061 +
1062 +static inline
1063 +struct request_sock *rev_mptcp_rsk(const struct mptcp_request_sock *req)
1064 +{
1065 +       return (struct request_sock *)req;
1066 +}
1067 +
1068 +static inline bool mptcp_can_sendpage(struct sock *sk)
1069 +{
1070 +       struct sock *sk_it;
1071 +
1072 +       if (tcp_sk(sk)->mpcb->dss_csum)
1073 +               return false;
1074 +
1075 +       mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it) {
1076 +               if (!(sk_it->sk_route_caps & NETIF_F_SG) ||
1077 +                   !(sk_it->sk_route_caps & NETIF_F_ALL_CSUM))
1078 +                       return false;
1079 +       }
1080 +
1081 +       return true;
1082 +}
1083 +
1084 +static inline void mptcp_push_pending_frames(struct sock *meta_sk)
1085 +{
1086 +       if (mptcp_next_segment(meta_sk, NULL)) {
1087 +               struct tcp_sock *tp = tcp_sk(meta_sk);
1088 +
1089 +               /* We don't care about the MSS, because it will be set in
1090 +                * mptcp_write_xmit.
1091 +                */
1092 +               __tcp_push_pending_frames(meta_sk, 0, tp->nonagle);
1093 +       }
1094 +}
1095 +
1096 +static inline void mptcp_send_reset(struct sock *sk)
1097 +{
1098 +       tcp_send_active_reset(sk, GFP_ATOMIC);
1099 +       mptcp_sub_force_close(sk);
1100 +}
1101 +
1102 +static inline int mptcp_is_data_seq(const struct sk_buff *skb)
1103 +{
1104 +       return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ;
1105 +}
1106 +
1107 +static inline int mptcp_is_data_fin(const struct sk_buff *skb)
1108 +{
1109 +       return mptcp_is_data_seq(skb) &&
1110 +              (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_FIN);
1111 +}
1112 +
1113 +/* Is it a data-fin while in infinite mapping mode?
1114 + * In infinite mode, a subflow-fin is in fact a data-fin.
1115 + */
1116 +static inline int mptcp_is_data_fin2(const struct sk_buff *skb,
1117 +                                    const struct tcp_sock *tp)
1118 +{
1119 +       return mptcp_is_data_fin(skb) ||
1120 +              (tp->mpcb->infinite_mapping_rcv && tcp_hdr(skb)->fin);
1121 +}
1122 +
1123 +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp,
1124 +                                        struct sk_buff *skb)
1125 +{
1126 +               TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_SEQ;
1127 +}
1128 +
1129 +static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb)
1130 +{
1131 +       u64 data_seq_high = (u32)(data_seq >> 32);
1132 +
1133 +       if (mpcb->rcv_high_order[0] == data_seq_high)
1134 +               return 0;
1135 +       else if (mpcb->rcv_high_order[1] == data_seq_high)
1136 +               return MPTCPHDR_SEQ64_INDEX;
1137 +       else
1138 +               return MPTCPHDR_SEQ64_OFO;
1139 +}
1140 +
1141 +/* Sets the data_seq and returns pointer to the in-skb field of the data_seq.
1142 + * If the packet has a 64-bit dseq, the pointer points to the last 32 bits.
1143 + */
1144 +static inline __u32 *mptcp_skb_set_data_seq(const struct sk_buff *skb,
1145 +                                           u32 *data_seq,
1146 +                                           struct mptcp_cb *mpcb)
1147 +{
1148 +       __u32 *ptr = (__u32 *)(skb_transport_header(skb) + TCP_SKB_CB(skb)->dss_off);
1149 +
1150 +       if (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ64_SET) {
1151 +               u64 data_seq64 = get_unaligned_be64(ptr);
1152 +
1153 +               if (mpcb)
1154 +                       TCP_SKB_CB(skb)->mptcp_flags |= mptcp_get_64_bit(data_seq64, mpcb);
1155 +
1156 +               *data_seq = (u32)data_seq64 ;
1157 +               ptr++;
1158 +       } else {
1159 +               *data_seq = get_unaligned_be32(ptr);
1160 +       }
1161 +
1162 +       return ptr;
1163 +}
1164 +
1165 +static inline struct sock *mptcp_meta_sk(const struct sock *sk)
1166 +{
1167 +       return tcp_sk(sk)->meta_sk;
1168 +}
1169 +
1170 +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp)
1171 +{
1172 +       return tcp_sk(tp->meta_sk);
1173 +}
1174 +
1175 +static inline int is_meta_tp(const struct tcp_sock *tp)
1176 +{
1177 +       return tp->mpcb && mptcp_meta_tp(tp) == tp;
1178 +}
1179 +
1180 +static inline int is_meta_sk(const struct sock *sk)
1181 +{
1182 +       return sk->sk_type == SOCK_STREAM  && sk->sk_protocol == IPPROTO_TCP &&
1183 +              tcp_sk(sk)->mpc && mptcp_meta_sk(sk) == sk;
1184 +}
1185 +
1186 +static inline int is_master_tp(const struct tcp_sock *tp)
1187 +{
1188 +       return !tp->mpc || (!tp->mptcp->slave_sk && !is_meta_tp(tp));
1189 +}
1190 +
1191 +static inline void mptcp_hash_request_remove(struct request_sock *req)
1192 +{
1193 +       int in_softirq = 0;
1194 +
1195 +       if (list_empty(&mptcp_rsk(req)->collide_tuple))
1196 +               return;
1197 +
1198 +       if (in_softirq()) {
1199 +               spin_lock(&mptcp_reqsk_hlock);
1200 +               in_softirq = 1;
1201 +       } else {
1202 +               spin_lock_bh(&mptcp_reqsk_hlock);
1203 +       }
1204 +
1205 +       list_del(&mptcp_rsk(req)->collide_tuple);
1206 +
1207 +       if (in_softirq)
1208 +               spin_unlock(&mptcp_reqsk_hlock);
1209 +       else
1210 +               spin_unlock_bh(&mptcp_reqsk_hlock);
1211 +}
1212 +
1213 +static inline void mptcp_init_mp_opt(struct mptcp_options_received *mopt)
1214 +{
1215 +       mopt->saw_mpc = 0;
1216 +       mopt->dss_csum = 0;
1217 +       mopt->drop_me = 0;
1218 +
1219 +       mopt->is_mp_join = 0;
1220 +       mopt->join_ack = 0;
1221 +
1222 +       mopt->saw_low_prio = 0;
1223 +       mopt->low_prio = 0;
1224 +
1225 +       mopt->saw_add_addr = 0;
1226 +       mopt->more_add_addr = 0;
1227 +
1228 +       mopt->saw_rem_addr = 0;
1229 +       mopt->more_rem_addr = 0;
1230 +
1231 +       mopt->mp_fail = 0;
1232 +       mopt->mp_fclose = 0;
1233 +}
1234 +
1235 +static inline void mptcp_reset_mopt(struct tcp_sock *tp)
1236 +{
1237 +       struct mptcp_options_received *mopt = &tp->mptcp->rx_opt;
1238 +
1239 +       mopt->saw_low_prio = 0;
1240 +       mopt->saw_add_addr = 0;
1241 +       mopt->more_add_addr = 0;
1242 +       mopt->saw_rem_addr = 0;
1243 +       mopt->more_rem_addr = 0;
1244 +       mopt->join_ack = 0;
1245 +       mopt->mp_fail = 0;
1246 +       mopt->mp_fclose = 0;
1247 +}
1248 +
1249 +static inline __be32 mptcp_get_highorder_sndbits(const struct sk_buff *skb,
1250 +                                                const struct mptcp_cb *mpcb)
1251 +{
1252 +       return htonl(mpcb->snd_high_order[(TCP_SKB_CB(skb)->mptcp_flags &
1253 +                       MPTCPHDR_SEQ64_INDEX) ? 1 : 0]);
1254 +}
1255 +
1256 +static inline u64 mptcp_get_data_seq_64(const struct mptcp_cb *mpcb, int index,
1257 +                                       u32 data_seq_32)
1258 +{
1259 +       return ((u64)mpcb->rcv_high_order[index] << 32) | data_seq_32;
1260 +}
1261 +
1262 +static inline u64 mptcp_get_rcv_nxt_64(const struct tcp_sock *meta_tp)
1263 +{
1264 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
1265 +       return mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index,
1266 +                                    meta_tp->rcv_nxt);
1267 +}
1268 +
1269 +static inline void mptcp_check_sndseq_wrap(struct tcp_sock *meta_tp, int inc)
1270 +{
1271 +       if (unlikely(meta_tp->snd_nxt > meta_tp->snd_nxt + inc)) {
1272 +               struct mptcp_cb *mpcb = meta_tp->mpcb;
1273 +               mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1;
1274 +               mpcb->snd_high_order[mpcb->snd_hiseq_index] += 2;
1275 +       }
1276 +}
1277 +
1278 +static inline void mptcp_check_rcvseq_wrap(struct tcp_sock *meta_tp,
1279 +                                          u32 old_rcv_nxt)
1280 +{
1281 +       if (unlikely(old_rcv_nxt > meta_tp->rcv_nxt)) {
1282 +               struct mptcp_cb *mpcb = meta_tp->mpcb;
1283 +               mpcb->rcv_high_order[mpcb->rcv_hiseq_index] += 2;
1284 +               mpcb->rcv_hiseq_index = mpcb->rcv_hiseq_index ? 0 : 1;
1285 +       }
1286 +}
1287 +
1288 +static inline int mptcp_sk_can_send(const struct sock *sk)
1289 +{
1290 +       return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1291 +              !tcp_sk(sk)->mptcp->pre_established;
1292 +}
1293 +
1294 +static inline int mptcp_sk_can_recv(const struct sock *sk)
1295 +{
1296 +       return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCP_FIN_WAIT1 | TCP_FIN_WAIT2);
1297 +}
1298 +
1299 +static inline int mptcp_sk_can_send_ack(const struct sock *sk)
1300 +{
1301 +       return !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV |
1302 +                                       TCPF_CLOSE | TCPF_LISTEN)) &&
1303 +              !tcp_sk(sk)->mptcp->pre_established;
1304 +}
1305 +
1306 +/* Only support GSO if all subflows supports it */
1307 +static inline bool mptcp_sk_can_gso(const struct sock *meta_sk)
1308 +{
1309 +       struct sock *sk;
1310 +
1311 +       if (tcp_sk(meta_sk)->mpcb->dss_csum)
1312 +               return 0;
1313 +
1314 +       mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
1315 +               if (!mptcp_sk_can_send(sk))
1316 +                       continue;
1317 +               if (!sk_can_gso(sk))
1318 +                       return false;
1319 +       }
1320 +       return true;
1321 +}
1322 +
1323 +static inline bool mptcp_can_sg(const struct sock *meta_sk)
1324 +{
1325 +       struct sock *sk;
1326 +
1327 +       if (tcp_sk(meta_sk)->mpcb->dss_csum)
1328 +               return 0;
1329 +
1330 +       mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
1331 +               if (!mptcp_sk_can_send(sk))
1332 +                       continue;
1333 +               if (!(sk->sk_route_caps & NETIF_F_SG))
1334 +                       return false;
1335 +       }
1336 +       return true;
1337 +}
1338 +
1339 +static inline void mptcp_set_rto(struct sock *sk)
1340 +{
1341 +       struct tcp_sock *tp = tcp_sk(sk);
1342 +       struct sock *sk_it;
1343 +       struct inet_connection_sock *micsk = inet_csk(mptcp_meta_sk(sk));
1344 +       __u32 max_rto = 0;
1345 +
1346 +       /* We are in recovery-phase on the MPTCP-level. Do not update the
1347 +        * RTO, because this would kill exponential backoff.
1348 +        */
1349 +       if (micsk->icsk_retransmits)
1350 +               return;
1351 +
1352 +       mptcp_for_each_sk(tp->mpcb, sk_it) {
1353 +               if (mptcp_sk_can_send(sk_it) &&
1354 +                   inet_csk(sk_it)->icsk_rto > max_rto)
1355 +                       max_rto = inet_csk(sk_it)->icsk_rto;
1356 +       }
1357 +       if (max_rto) {
1358 +               micsk->icsk_rto = max_rto << 1;
1359 +
1360 +               /* A successfull rto-measurement - reset backoff counter */
1361 +               micsk->icsk_backoff = 0;
1362 +       }
1363 +}
1364 +
1365 +static inline int mptcp_sysctl_syn_retries(void)
1366 +{
1367 +       return sysctl_mptcp_syn_retries;
1368 +}
1369 +
1370 +static inline void mptcp_sub_close_passive(struct sock *sk)
1371 +{
1372 +       struct sock *meta_sk = mptcp_meta_sk(sk);
1373 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(meta_sk);
1374 +
1375 +       /* Only close, if the app did a send-shutdown (passive close), and we
1376 +        * received the data-ack of the data-fin.
1377 +        */
1378 +       if (tp->mpcb->passive_close && meta_tp->snd_una == meta_tp->write_seq)
1379 +               mptcp_sub_close(sk, 0);
1380 +}
1381 +
1382 +static inline bool mptcp_fallback_infinite(struct sock *sk, int flag)
1383 +{
1384 +       struct tcp_sock *tp = tcp_sk(sk);
1385 +
1386 +       /* If data has been acknowleged on the meta-level, fully_established
1387 +        * will have been set before and thus we will not fall back to infinite
1388 +        * mapping.
1389 +        */
1390 +       if (likely(tp->mptcp->fully_established))
1391 +               return false;
1392 +
1393 +       if (!(flag & MPTCP_FLAG_DATA_ACKED))
1394 +               return false;
1395 +
1396 +       /* Don't fallback twice ;) */
1397 +       if (tp->mpcb->infinite_mapping_snd)
1398 +               return false;
1399 +
1400 +       pr_err("%s %#x will fallback - pi %d, src %pI4 dst %pI4 from %pS\n",
1401 +              __func__, tp->mpcb->mptcp_loc_token, tp->mptcp->path_index,
1402 +              &inet_sk(sk)->inet_saddr, &inet_sk(sk)->inet_daddr,
1403 +              __builtin_return_address(0));
1404 +       if (!is_master_tp(tp))
1405 +               return true;
1406 +
1407 +       tp->mpcb->infinite_mapping_snd = 1;
1408 +       tp->mpcb->infinite_mapping_rcv = 1;
1409 +       tp->mptcp->fully_established = 1;
1410 +
1411 +       return false;
1412 +}
1413 +
1414 +/* Find the first free index in the bitfield */
1415 +static inline int __mptcp_find_free_index(u8 bitfield, int j, u8 base)
1416 +{
1417 +       int i;
1418 +       mptcp_for_each_bit_unset(bitfield >> base, i) {
1419 +               /* We wrapped at the bitfield - try from 0 on */
1420 +               if (i + base >= sizeof(bitfield) * 8) {
1421 +                       mptcp_for_each_bit_unset(bitfield, i) {
1422 +                               if (i >= sizeof(bitfield) * 8)
1423 +                                       goto exit;
1424 +
1425 +                               if (i != j)
1426 +                                       return i;
1427 +                       }
1428 +                       goto exit;
1429 +               }
1430 +               if (i + base >= sizeof(bitfield) * 8)
1431 +                       break;
1432 +
1433 +               if (i + base != j)
1434 +                       return i + base;
1435 +       }
1436 +exit:
1437 +       return -1;
1438 +}
1439 +
1440 +static inline int mptcp_find_free_index(u8 bitfield)
1441 +{
1442 +       return __mptcp_find_free_index(bitfield, -1, 0);
1443 +}
1444 +
1445 +/* Find the first index whose bit in the bit-field == 0 */
1446 +static inline u8 mptcp_set_new_pathindex(struct mptcp_cb *mpcb)
1447 +{
1448 +       u8 base = mpcb->next_path_index;
1449 +       int i;
1450 +
1451 +       /* Start at 1, because 0 is reserved for the meta-sk */
1452 +       mptcp_for_each_bit_unset(mpcb->path_index_bits >> base, i) {
1453 +               if (i + base < 1)
1454 +                       continue;
1455 +               if (i + base >= sizeof(mpcb->path_index_bits) * 8)
1456 +                       break;
1457 +               i += base;
1458 +               mpcb->path_index_bits |= (1 << i);
1459 +               mpcb->next_path_index = i + 1;
1460 +               return i;
1461 +       }
1462 +       mptcp_for_each_bit_unset(mpcb->path_index_bits, i) {
1463 +               if (i >= sizeof(mpcb->path_index_bits) * 8)
1464 +                       break;
1465 +               if (i < 1)
1466 +                       continue;
1467 +               mpcb->path_index_bits |= (1 << i);
1468 +               mpcb->next_path_index = i + 1;
1469 +               return i;
1470 +       }
1471 +
1472 +       return 0;
1473 +}
1474 +
1475 +static inline int mptcp_v6_is_v4_mapped(struct sock *sk)
1476 +{
1477 +       return sk->sk_family == AF_INET6 &&
1478 +              ipv6_addr_type(&inet6_sk(sk)->saddr) == IPV6_ADDR_MAPPED;
1479 +}
1480 +
1481 +/* TCP and MPTCP mpc flag-depending functions */
1482 +u16 mptcp_select_window(struct sock *sk);
1483 +void mptcp_init_buffer_space(struct sock *sk);
1484 +void mptcp_tcp_set_rto(struct sock *sk);
1485 +
1486 +static inline void set_mpc(struct tcp_sock *tp)
1487 +{
1488 +       tp->mpc = 1;
1489 +
1490 +       tp->__select_window             = __mptcp_select_window;
1491 +       tp->select_window               = mptcp_select_window;
1492 +       tp->select_initial_window       = mptcp_select_initial_window;
1493 +       tp->init_buffer_space           = mptcp_init_buffer_space;
1494 +       tp->set_rto                     = mptcp_tcp_set_rto;
1495 +       tp->should_expand_sndbuf        = mptcp_should_expand_sndbuf;
1496 +}
1497 +
1498 +#else /* CONFIG_MPTCP */
1499 +#define mptcp_debug(fmt, args...)      \
1500 +       do {                            \
1501 +       } while (0)
1502 +
1503 +/* Without MPTCP, we just do one iteration
1504 + * over the only socket available. This assumes that
1505 + * the sk/tp arg is the socket in that case.
1506 + */
1507 +#define mptcp_for_each_sk(mpcb, sk)
1508 +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp)
1509 +
1510 +static inline int mptcp_is_data_fin(const struct sk_buff *skb)
1511 +{
1512 +       return 0;
1513 +}
1514 +static inline int mptcp_is_data_seq(const struct sk_buff *skb)
1515 +{
1516 +       return 0;
1517 +}
1518 +static inline struct sock *mptcp_meta_sk(const struct sock *sk)
1519 +{
1520 +       return NULL;
1521 +}
1522 +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp)
1523 +{
1524 +       return NULL;
1525 +}
1526 +static inline int is_meta_sk(const struct sock *sk)
1527 +{
1528 +       return 0;
1529 +}
1530 +static inline int is_master_tp(const struct tcp_sock *tp)
1531 +{
1532 +       return 0;
1533 +}
1534 +static inline void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp) {}
1535 +static inline void mptcp_cleanup_rbuf(const struct sock *meta_sk, int copied) {}
1536 +static inline void mptcp_del_sock(const struct sock *sk) {}
1537 +static inline void mptcp_reinject_data(struct sock *orig_sk, int clone_it) {}
1538 +static inline void mptcp_update_sndbuf(const struct mptcp_cb *mpcb) {}
1539 +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp,
1540 +                                        const struct sk_buff *skb) {}
1541 +static inline void mptcp_clean_rtx_infinite(const struct sk_buff *skb,
1542 +                                           const struct sock *sk) {}
1543 +static inline void mptcp_retransmit_timer(const struct sock *meta_sk) {}
1544 +static inline int mptcp_write_wakeup(struct sock *meta_sk)
1545 +{
1546 +       return 0;
1547 +}
1548 +static inline void mptcp_sub_close(struct sock *sk, unsigned long delay) {}
1549 +static inline void mptcp_set_rto(const struct sock *sk) {}
1550 +static inline void mptcp_send_fin(const struct sock *meta_sk) {}
1551 +static inline void mptcp_parse_options(const uint8_t *ptr, const int opsize,
1552 +                                      const struct tcp_options_received *opt_rx,
1553 +                                      const struct mptcp_options_received *mopt,
1554 +                                      const struct sk_buff *skb) {}
1555 +static inline void mptcp_syn_options(struct sock *sk,
1556 +                                    struct tcp_out_options *opts,
1557 +                                    unsigned *remaining) {}
1558 +static inline void mptcp_synack_options(struct request_sock *req,
1559 +                                       struct tcp_out_options *opts,
1560 +                                       unsigned *remaining) {}
1561 +
1562 +static inline void mptcp_established_options(struct sock *sk,
1563 +                                            struct sk_buff *skb,
1564 +                                            struct tcp_out_options *opts,
1565 +                                            unsigned *size) {}
1566 +static inline void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
1567 +                                      struct tcp_out_options *opts,
1568 +                                      struct sk_buff *skb) {}
1569 +static inline void mptcp_close(struct sock *meta_sk, long timeout) {}
1570 +static inline int mptcp_doit(struct sock *sk)
1571 +{
1572 +       return 0;
1573 +}
1574 +static inline int mptcp_check_req_master(const struct sock *sk,
1575 +                                        const struct sock *child,
1576 +                                        struct request_sock *req,
1577 +                                        struct request_sock **prev,
1578 +                                        const struct mptcp_options_received *mopt)
1579 +{
1580 +       return 1;
1581 +}
1582 +static inline struct sock *mptcp_check_req_child(struct sock *sk,
1583 +                                                struct sock *child,
1584 +                                                struct request_sock *req,
1585 +                                                struct request_sock **prev,
1586 +                                                struct mptcp_options_received *mopt)
1587 +{
1588 +       return NULL;
1589 +}
1590 +static inline unsigned int mptcp_current_mss(struct sock *meta_sk)
1591 +{
1592 +       return 0;
1593 +}
1594 +static inline int mptcp_select_size(const struct sock *meta_sk, bool sg)
1595 +{
1596 +       return 0;
1597 +}
1598 +static inline void mptcp_sub_close_passive(struct sock *sk) {}
1599 +static inline bool mptcp_fallback_infinite(const struct sock *sk, int flag)
1600 +{
1601 +       return false;
1602 +}
1603 +static inline void mptcp_init_mp_opt(const struct mptcp_options_received *mopt) {}
1604 +static inline int mptcp_check_rtt(const struct tcp_sock *tp, int time)
1605 +{
1606 +       return 0;
1607 +}
1608 +static inline int mptcp_check_snd_buf(const struct tcp_sock *tp)
1609 +{
1610 +       return 0;
1611 +}
1612 +static inline int mptcp_sysctl_syn_retries(void)
1613 +{
1614 +       return 0;
1615 +}
1616 +static inline void mptcp_send_reset(const struct sock *sk) {}
1617 +static inline void mptcp_send_active_reset(struct sock *meta_sk,
1618 +                                          gfp_t priority) {}
1619 +static inline int mptcp_write_xmit(struct sock *sk, unsigned int mss_now,
1620 +                                  int nonagle, int push_one, gfp_t gfp)
1621 +{
1622 +       return 0;
1623 +}
1624 +static inline struct sock *mptcp_sk_clone(const struct sock *sk, int family,
1625 +                                         const gfp_t priority)
1626 +{
1627 +       return NULL;
1628 +}
1629 +static inline int mptcp_handle_options(struct sock *sk,
1630 +                                      const struct tcphdr *th,
1631 +                                      struct sk_buff *skb)
1632 +{
1633 +       return 0;
1634 +}
1635 +static inline void mptcp_reset_mopt(struct tcp_sock *tp) {}
1636 +static inline void  __init mptcp_init(void) {}
1637 +static inline int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1638 +{
1639 +       return 0;
1640 +}
1641 +static inline int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1642 +                                unsigned int mss_now, int reinject)
1643 +{
1644 +       return 0;
1645 +}
1646 +static inline int mptso_fragment(struct sock *sk, struct sk_buff *skb,
1647 +                                unsigned int len, unsigned int mss_now,
1648 +                                gfp_t gfp, int reinject)
1649 +{
1650 +       return 0;
1651 +}
1652 +static inline bool mptcp_sk_can_gso(const struct sock *sk)
1653 +{
1654 +       return false;
1655 +}
1656 +static inline bool mptcp_can_sg(const struct sock *meta_sk)
1657 +{
1658 +       return false;
1659 +}
1660 +static inline unsigned int mptcp_xmit_size_goal(struct sock *meta_sk,
1661 +                                               u32 mss_now, int large_allowed)
1662 +{
1663 +       return 0;
1664 +}
1665 +static inline void mptcp_destroy_sock(struct sock *sk) {}
1666 +static inline int mptcp_rcv_synsent_state_process(struct sock *sk,
1667 +                                                 struct sock **skptr,
1668 +                                                 struct sk_buff *skb,
1669 +                                                 struct mptcp_options_received *mopt)
1670 +{
1671 +       return 0;
1672 +}
1673 +static inline bool mptcp_can_sendpage(struct sock *sk)
1674 +{
1675 +       return false;
1676 +}
1677 +static inline int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw)
1678 +{
1679 +       return 0;
1680 +}
1681 +static inline void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) {}
1682 +static inline void mptcp_update_tw_socks(const struct tcp_sock *tp, int state) {}
1683 +static inline void mptcp_disconnect(struct sock *sk) {}
1684 +static inline void mptcp_tsq_flags(struct sock *sk) {}
1685 +static inline void mptcp_tsq_sub_deferred(struct sock *meta_sk) {}
1686 +static inline void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) {}
1687 +static inline void mptcp_hash_remove(struct tcp_sock *meta_tp) {}
1688 +static inline void mptcp_reqsk_new_mptcp(struct request_sock *req,
1689 +                                        const struct tcp_options_received *rx_opt,
1690 +                                        const struct mptcp_options_received *mopt,
1691 +                                        const struct sk_buff *skb) {}
1692 +static inline void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
1693 +                                         const struct sk_buff *skb) {}
1694 +#endif /* CONFIG_MPTCP */
1695 +
1696 +#endif /* _MPTCP_H */
1697 diff -Nur linux-3.14.45.orig/include/net/mptcp_v4.h linux-3.14.45/include/net/mptcp_v4.h
1698 --- linux-3.14.45.orig/include/net/mptcp_v4.h   1970-01-01 01:00:00.000000000 +0100
1699 +++ linux-3.14.45/include/net/mptcp_v4.h        2015-06-24 14:15:48.871862463 +0200
1700 @@ -0,0 +1,69 @@
1701 +/*
1702 + *     MPTCP implementation
1703 + *
1704 + *     Initial Design & Implementation:
1705 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
1706 + *
1707 + *     Current Maintainer & Author:
1708 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
1709 + *
1710 + *     Additional authors:
1711 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
1712 + *     Gregory Detal <gregory.detal@uclouvain.be>
1713 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
1714 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
1715 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
1716 + *     Andreas Ripke <ripke@neclab.eu>
1717 + *     Vlad Dogaru <vlad.dogaru@intel.com>
1718 + *     Octavian Purdila <octavian.purdila@intel.com>
1719 + *     John Ronan <jronan@tssg.org>
1720 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
1721 + *     Brandon Heller <brandonh@stanford.edu>
1722 + *
1723 + *
1724 + *     This program is free software; you can redistribute it and/or
1725 + *      modify it under the terms of the GNU General Public License
1726 + *      as published by the Free Software Foundation; either version
1727 + *      2 of the License, or (at your option) any later version.
1728 + */
1729 +
1730 +#ifndef MPTCP_V4_H_
1731 +#define MPTCP_V4_H_
1732 +
1733 +
1734 +#include <linux/in.h>
1735 +#include <linux/skbuff.h>
1736 +#include <net/mptcp.h>
1737 +#include <net/request_sock.h>
1738 +#include <net/sock.h>
1739 +
1740 +extern struct request_sock_ops mptcp_request_sock_ops;
1741 +
1742 +#ifdef CONFIG_MPTCP
1743 +
1744 +int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
1745 +int mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id);
1746 +int mptcp_v4_add_raddress(struct mptcp_cb *mpcb, const struct in_addr *addr,
1747 +                         __be16 port, u8 id);
1748 +void mptcp_v4_set_init_addr_bit(struct mptcp_cb *mpcb, __be32 daddr, int index);
1749 +struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
1750 +                                const __be32 laddr, const struct net *net);
1751 +int mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
1752 +                          struct mptcp_rem4 *rem);
1753 +int mptcp_pm_v4_init(void);
1754 +void mptcp_pm_v4_undo(void);
1755 +u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
1756 +                      u32 seq);
1757 +u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport);
1758 +
1759 +#else
1760 +
1761 +static inline int mptcp_v4_do_rcv(const struct sock *meta_sk,
1762 +                                 const struct sk_buff *skb)
1763 +{
1764 +       return 0;
1765 +}
1766 +
1767 +#endif /* CONFIG_MPTCP */
1768 +
1769 +#endif /* MPTCP_V4_H_ */
1770 diff -Nur linux-3.14.45.orig/include/net/mptcp_v6.h linux-3.14.45/include/net/mptcp_v6.h
1771 --- linux-3.14.45.orig/include/net/mptcp_v6.h   1970-01-01 01:00:00.000000000 +0100
1772 +++ linux-3.14.45/include/net/mptcp_v6.h        2015-06-24 14:15:48.871862463 +0200
1773 @@ -0,0 +1,72 @@
1774 +/*
1775 + *     MPTCP implementation
1776 + *
1777 + *     Initial Design & Implementation:
1778 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
1779 + *
1780 + *     Current Maintainer & Author:
1781 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
1782 + *
1783 + *     Additional authors:
1784 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
1785 + *     Gregory Detal <gregory.detal@uclouvain.be>
1786 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
1787 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
1788 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
1789 + *     Andreas Ripke <ripke@neclab.eu>
1790 + *     Vlad Dogaru <vlad.dogaru@intel.com>
1791 + *     Octavian Purdila <octavian.purdila@intel.com>
1792 + *     John Ronan <jronan@tssg.org>
1793 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
1794 + *     Brandon Heller <brandonh@stanford.edu>
1795 + *
1796 + *
1797 + *     This program is free software; you can redistribute it and/or
1798 + *      modify it under the terms of the GNU General Public License
1799 + *      as published by the Free Software Foundation; either version
1800 + *      2 of the License, or (at your option) any later version.
1801 + */
1802 +
1803 +#ifndef _MPTCP_V6_H
1804 +#define _MPTCP_V6_H
1805 +
1806 +#include <linux/in6.h>
1807 +#include <net/if_inet6.h>
1808 +
1809 +#include <net/mptcp.h>
1810 +
1811 +extern struct request_sock_ops mptcp6_request_sock_ops;
1812 +extern struct proto mptcpv6_prot;
1813 +
1814 +#ifdef CONFIG_MPTCP
1815 +
1816 +int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
1817 +int mptcp_v6_rem_raddress(struct mptcp_cb *mpcb, u8 id);
1818 +int mptcp_v6_add_raddress(struct mptcp_cb *mpcb, const struct in6_addr *addr,
1819 +                         __be16 port, u8 id);
1820 +void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb,
1821 +                               const struct in6_addr *daddr, int index);
1822 +struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
1823 +                                const struct in6_addr *laddr, const struct net *net);
1824 +int mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
1825 +                          struct mptcp_rem6 *rem);
1826 +int mptcp_pm_v6_init(void);
1827 +void mptcp_pm_v6_undo(void);
1828 +struct sock *mptcp_v6v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1829 +                                     struct request_sock *req,
1830 +                                     struct dst_entry *dst);
1831 +__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
1832 +                        __be16 sport, __be16 dport, u32 seq);
1833 +u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
1834 +                    __be16 sport, __be16 dport);
1835 +
1836 +#else /* CONFIG_MPTCP */
1837 +
1838 +static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
1839 +{
1840 +       return 0;
1841 +}
1842 +
1843 +#endif /* CONFIG_MPTCP */
1844 +
1845 +#endif /* _MPTCP_V6_H */
1846 diff -Nur linux-3.14.45.orig/include/net/net_namespace.h linux-3.14.45/include/net/net_namespace.h
1847 --- linux-3.14.45.orig/include/net/net_namespace.h      2015-06-23 02:01:36.000000000 +0200
1848 +++ linux-3.14.45/include/net/net_namespace.h   2015-06-24 14:15:48.871862463 +0200
1849 @@ -15,6 +15,7 @@
1850  #include <net/netns/packet.h>
1851  #include <net/netns/ipv4.h>
1852  #include <net/netns/ipv6.h>
1853 +#include <net/netns/mptcp.h>
1854  #include <net/netns/sctp.h>
1855  #include <net/netns/dccp.h>
1856  #include <net/netns/netfilter.h>
1857 @@ -90,6 +91,9 @@
1858  #if IS_ENABLED(CONFIG_IPV6)
1859         struct netns_ipv6       ipv6;
1860  #endif
1861 +#if IS_ENABLED(CONFIG_MPTCP)
1862 +       struct netns_mptcp      mptcp;
1863 +#endif
1864  #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
1865         struct netns_sctp       sctp;
1866  #endif
1867 diff -Nur linux-3.14.45.orig/include/net/netns/mptcp.h linux-3.14.45/include/net/netns/mptcp.h
1868 --- linux-3.14.45.orig/include/net/netns/mptcp.h        1970-01-01 01:00:00.000000000 +0100
1869 +++ linux-3.14.45/include/net/netns/mptcp.h     2015-06-24 14:15:48.871862463 +0200
1870 @@ -0,0 +1,44 @@
1871 +/*
1872 + *     MPTCP implementation - MPTCP namespace
1873 + *
1874 + *     Initial Design & Implementation:
1875 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
1876 + *
1877 + *     Current Maintainer:
1878 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
1879 + *
1880 + *     Additional authors:
1881 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
1882 + *     Gregory Detal <gregory.detal@uclouvain.be>
1883 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
1884 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
1885 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
1886 + *     Andreas Ripke <ripke@neclab.eu>
1887 + *     Vlad Dogaru <vlad.dogaru@intel.com>
1888 + *     Octavian Purdila <octavian.purdila@intel.com>
1889 + *     John Ronan <jronan@tssg.org>
1890 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
1891 + *     Brandon Heller <brandonh@stanford.edu>
1892 + *
1893 + *
1894 + *     This program is free software; you can redistribute it and/or
1895 + *      modify it under the terms of the GNU General Public License
1896 + *      as published by the Free Software Foundation; either version
1897 + *      2 of the License, or (at your option) any later version.
1898 + */
1899 +
1900 +#ifndef __NETNS_MPTCP_H__
1901 +#define __NETNS_MPTCP_H__
1902 +
1903 +#include <linux/compiler.h>
1904 +
1905 +enum {
1906 +       MPTCP_PM_FULLMESH = 0,
1907 +       MPTCP_PM_MAX
1908 +};
1909 +
1910 +struct netns_mptcp {
1911 +       void *path_managers[MPTCP_PM_MAX];
1912 +};
1913 +
1914 +#endif /* __NETNS_MPTCP_H__ */
1915 diff -Nur linux-3.14.45.orig/include/net/request_sock.h linux-3.14.45/include/net/request_sock.h
1916 --- linux-3.14.45.orig/include/net/request_sock.h       2015-06-23 02:01:36.000000000 +0200
1917 +++ linux-3.14.45/include/net/request_sock.h    2015-06-24 14:15:48.871862463 +0200
1918 @@ -164,7 +164,7 @@
1919  };
1920
1921  int reqsk_queue_alloc(struct request_sock_queue *queue,
1922 -                     unsigned int nr_table_entries);
1923 +                     unsigned int nr_table_entries, gfp_t flags);
1924
1925  void __reqsk_queue_destroy(struct request_sock_queue *queue);
1926  void reqsk_queue_destroy(struct request_sock_queue *queue);
1927 diff -Nur linux-3.14.45.orig/include/net/sock.h linux-3.14.45/include/net/sock.h
1928 --- linux-3.14.45.orig/include/net/sock.h       2015-06-23 02:01:36.000000000 +0200
1929 +++ linux-3.14.45/include/net/sock.h    2015-06-24 14:15:48.871862463 +0200
1930 @@ -899,6 +899,16 @@
1931
1932  int sk_wait_data(struct sock *sk, long *timeo);
1933
1934 +/* START - needed for MPTCP */
1935 +extern void sock_def_error_report(struct sock *sk);
1936 +extern struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1937 +                                 int family);
1938 +extern void sock_lock_init(struct sock *sk);
1939 +
1940 +extern struct lock_class_key af_callback_keys[AF_MAX];
1941 +extern char *const af_family_clock_key_strings[AF_MAX+1];
1942 +/* END - needed for MPTCP */
1943 +
1944  struct request_sock_ops;
1945  struct timewait_sock_ops;
1946  struct inet_hashinfo;
1947 diff -Nur linux-3.14.45.orig/include/net/tcp.h linux-3.14.45/include/net/tcp.h
1948 --- linux-3.14.45.orig/include/net/tcp.h        2015-06-23 02:01:36.000000000 +0200
1949 +++ linux-3.14.45/include/net/tcp.h     2015-06-24 14:15:48.875862469 +0200
1950 @@ -176,6 +176,7 @@
1951  #define TCPOPT_SACK             5       /* SACK Block */
1952  #define TCPOPT_TIMESTAMP       8       /* Better RTT estimations/PAWS */
1953  #define TCPOPT_MD5SIG          19      /* MD5 Signature (RFC2385) */
1954 +#define TCPOPT_MPTCP           30
1955  #define TCPOPT_EXP             254     /* Experimental */
1956  /* Magic number to be after the option value for sharing TCP
1957   * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
1958 @@ -234,6 +235,27 @@
1959   */
1960  #define        TFO_SERVER_ALWAYS       0x1000
1961
1962 +/* Flags from tcp_input.c for tcp_ack */
1963 +#define FLAG_DATA               0x01 /* Incoming frame contained data.          */
1964 +#define FLAG_WIN_UPDATE         0x02 /* Incoming ACK was a window update.       */
1965 +#define FLAG_DATA_ACKED         0x04 /* This ACK acknowledged new data.         */
1966 +#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted.  */
1967 +#define FLAG_SYN_ACKED          0x10 /* This ACK acknowledged SYN.              */
1968 +#define FLAG_DATA_SACKED        0x20 /* New SACK.                               */
1969 +#define FLAG_ECE                0x40 /* ECE in this ACK                         */
1970 +#define FLAG_SLOWPATH           0x100 /* Do not skip RFC checks for window update.*/
1971 +#define FLAG_ORIG_SACK_ACKED    0x200 /* Never retransmitted data are (s)acked  */
1972 +#define FLAG_SND_UNA_ADVANCED   0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
1973 +#define FLAG_DSACKING_ACK       0x800 /* SACK blocks contained D-SACK info */
1974 +#define FLAG_SACK_RENEGING      0x2000 /* snd_una advanced to a sacked seq */
1975 +#define FLAG_UPDATE_TS_RECENT   0x4000 /* tcp_replace_ts_recent() */
1976 +#define MPTCP_FLAG_DATA_ACKED  0x8000
1977 +
1978 +#define FLAG_ACKED              (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
1979 +#define FLAG_NOT_DUP            (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
1980 +#define FLAG_CA_ALERT           (FLAG_DATA_SACKED|FLAG_ECE)
1981 +#define FLAG_FORWARD_PROGRESS   (FLAG_ACKED|FLAG_DATA_SACKED)
1982 +
1983  extern struct inet_timewait_death_row tcp_death_row;
1984
1985  /* sysctl variables for tcp */
1986 @@ -349,6 +371,112 @@
1987  #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
1988  #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
1989
1990 +/**** START - Exports needed for MPTCP ****/
1991 +extern const struct inet_connection_sock_af_ops ipv4_specific;
1992 +extern const struct inet_connection_sock_af_ops ipv6_specific;
1993 +extern const struct inet_connection_sock_af_ops ipv6_mapped;
1994 +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
1995 +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
1996 +
1997 +struct mptcp_options_received;
1998 +
1999 +int tcp_close_state(struct sock *sk);
2000 +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int
2001 +             size_goal);
2002 +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
2003 +                        const struct sk_buff *skb);
2004 +int tcp_xmit_probe_skb(struct sock *sk, int urgent);
2005 +void tcp_cwnd_validate(struct sock *sk);
2006 +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb);
2007 +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
2008 +                    gfp_t gfp_mask);
2009 +unsigned int tcp_mss_split_point(const struct sock *sk,
2010 +                                const struct sk_buff *skb,
2011 +                                unsigned int mss_now,
2012 +                                unsigned int max_segs,
2013 +                                int nonagle);
2014 +bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb);
2015 +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
2016 +                   unsigned int cur_mss, int nonagle);
2017 +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
2018 +                     unsigned int cur_mss);
2019 +unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb);
2020 +int tcp_mtu_probe(struct sock *sk);
2021 +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
2022 +                     unsigned int mss_now);
2023 +void __pskb_trim_head(struct sk_buff *skb, int len);
2024 +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
2025 +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
2026 +void tcp_reset(struct sock *sk);
2027 +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
2028 +                          const u32 ack_seq, const u32 nwin);
2029 +bool tcp_urg_mode(const struct tcp_sock *tp);
2030 +void tcp_ack_probe(struct sock *sk);
2031 +void tcp_rearm_rto(struct sock *sk);
2032 +int tcp_write_timeout(struct sock *sk);
2033 +bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
2034 +                          unsigned int timeout, bool syn_set);
2035 +void tcp_write_err(struct sock *sk);
2036 +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr);
2037 +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
2038 +                         unsigned int mss_now);
2039 +
2040 +int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req);
2041 +void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
2042 +                          struct request_sock *req);
2043 +__u32 tcp_v4_init_sequence(const struct sk_buff *skb);
2044 +int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
2045 +                      struct request_sock *req,
2046 +                      u16 queue_mapping);
2047 +void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb);
2048 +struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb);
2049 +struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb);
2050 +void tcp_v4_reqsk_destructor(struct request_sock *req);
2051 +
2052 +int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req);
2053 +void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
2054 +                          struct request_sock *req);
2055 +__u32 tcp_v6_init_sequence(const struct sk_buff *skb);
2056 +int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
2057 +                      struct flowi6 *fl6, struct request_sock *req,
2058 +                      u16 queue_mapping);
2059 +void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
2060 +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
2061 +int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
2062 +void tcp_v6_destroy_sock(struct sock *sk);
2063 +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
2064 +void tcp_v6_hash(struct sock *sk);
2065 +struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb);
2066 +struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
2067 +                                 struct request_sock *req,
2068 +                                 struct dst_entry *dst);
2069 +void tcp_v6_reqsk_destructor(struct request_sock *req);
2070 +
2071 +void sock_valbool_flag(struct sock *sk, int bit, int valbool);
2072 +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
2073 +                                      int large_allowed);
2074 +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb);
2075 +
2076 +void skb_clone_fraglist(struct sk_buff *skb);
2077 +void copy_skb_header(struct sk_buff *new, const struct sk_buff *old);
2078 +
2079 +void inet_twsk_free(struct inet_timewait_sock *tw);
2080 +/* These states need RST on ABORT according to RFC793 */
2081 +static inline bool tcp_need_reset(int state)
2082 +{
2083 +       return (1 << state) &
2084 +              (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2085 +               TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2086 +}
2087 +
2088 +bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
2089 +                           int hlen);
2090 +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
2091 +                              bool *fragstolen);
2092 +bool tcp_try_coalesce(struct sock *sk, struct sk_buff *to,
2093 +                     struct sk_buff *from, bool *fragstolen);
2094 +/**** END - Exports needed for MPTCP ****/
2095 +
2096  void tcp_tasklet_init(void);
2097
2098  void tcp_v4_err(struct sk_buff *skb, u32);
2099 @@ -445,6 +573,7 @@
2100                 size_t len, int nonblock, int flags, int *addr_len);
2101  void tcp_parse_options(const struct sk_buff *skb,
2102                        struct tcp_options_received *opt_rx,
2103 +                      struct mptcp_options_received *mopt_rx,
2104                        int estab, struct tcp_fastopen_cookie *foc);
2105  const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
2106
2107 @@ -558,11 +687,15 @@
2108  void tcp_send_loss_probe(struct sock *sk);
2109  bool tcp_schedule_loss_probe(struct sock *sk);
2110
2111 +u16 tcp_select_window(struct sock *sk);
2112 +
2113  /* tcp_input.c */
2114  void tcp_cwnd_application_limited(struct sock *sk);
2115  void tcp_resume_early_retransmit(struct sock *sk);
2116  void tcp_rearm_rto(struct sock *sk);
2117  void tcp_reset(struct sock *sk);
2118 +void tcp_set_rto(struct sock *sk);
2119 +bool tcp_should_expand_sndbuf(const struct sock *sk);
2120
2121  /* tcp_timer.c */
2122  void tcp_init_xmit_timers(struct sock *);
2123 @@ -706,14 +839,24 @@
2124   */
2125  struct tcp_skb_cb {
2126         union {
2127 -               struct inet_skb_parm    h4;
2128 +               union {
2129 +                       struct inet_skb_parm    h4;
2130  #if IS_ENABLED(CONFIG_IPV6)
2131 -               struct inet6_skb_parm   h6;
2132 +                       struct inet6_skb_parm   h6;
2133 +#endif
2134 +               } header;       /* For incoming frames          */
2135 +#ifdef CONFIG_MPTCP
2136 +               __u32 path_mask; /* path indices that tried to send this skb */
2137  #endif
2138 -       } header;       /* For incoming frames          */
2139 +       };
2140         __u32           seq;            /* Starting sequence number     */
2141         __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
2142         __u32           when;           /* used to compute rtt's        */
2143 +#ifdef CONFIG_MPTCP
2144 +       __u8            mptcp_flags;    /* flags for the MPTCP layer    */
2145 +       __u8            dss_off;        /* Number of 4-byte words until
2146 +                                        * seq-number */
2147 +#endif
2148         __u8            tcp_flags;      /* TCP header flags. (tcp[13])  */
2149
2150         __u8            sacked;         /* State flags for SACK/FACK.   */
2151 @@ -1061,7 +1204,8 @@
2152  /* Determine a window scaling and initial window to offer. */
2153  void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
2154                                __u32 *window_clamp, int wscale_ok,
2155 -                              __u8 *rcv_wscale, __u32 init_rcv_wnd);
2156 +                              __u8 *rcv_wscale, __u32 init_rcv_wnd,
2157 +                              const struct sock *sk);
2158
2159  static inline int tcp_win_from_space(int space)
2160  {
2161 @@ -1073,12 +1217,18 @@
2162  /* Note: caller must be prepared to deal with negative returns */
2163  static inline int tcp_space(const struct sock *sk)
2164  {
2165 +       if (tcp_sk(sk)->mpc)
2166 +               sk = tcp_sk(sk)->meta_sk;
2167 +
2168         return tcp_win_from_space(sk->sk_rcvbuf -
2169                                   atomic_read(&sk->sk_rmem_alloc));
2170  }
2171
2172  static inline int tcp_full_space(const struct sock *sk)
2173  {
2174 +       if (tcp_sk(sk)->mpc)
2175 +               sk = tcp_sk(sk)->meta_sk;
2176 +
2177         return tcp_win_from_space(sk->sk_rcvbuf);
2178  }
2179
2180 @@ -1093,6 +1243,7 @@
2181         tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
2182         tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
2183         tcp_rsk(req)->snt_synack = 0;
2184 +       tcp_rsk(req)->saw_mpc = 0;
2185         req->mss = rx_opt->mss_clamp;
2186         req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
2187         ireq->tstamp_ok = rx_opt->tstamp_ok;
2188 diff -Nur linux-3.14.45.orig/include/uapi/linux/if.h linux-3.14.45/include/uapi/linux/if.h
2189 --- linux-3.14.45.orig/include/uapi/linux/if.h  2015-06-23 02:01:36.000000000 +0200
2190 +++ linux-3.14.45/include/uapi/linux/if.h       2015-06-24 14:15:48.875862469 +0200
2191 @@ -53,6 +53,9 @@
2192
2193  #define IFF_ECHO       0x40000         /* echo sent packets            */
2194
2195 +#define IFF_NOMULTIPATH        0x80000         /* Disable for MPTCP            */
2196 +#define IFF_MPBACKUP   0x100000        /* Use as backup path for MPTCP */
2197 +
2198  #define IFF_VOLATILE   (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
2199                 IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
2200
2201 diff -Nur linux-3.14.45.orig/include/uapi/linux/tcp.h linux-3.14.45/include/uapi/linux/tcp.h
2202 --- linux-3.14.45.orig/include/uapi/linux/tcp.h 2015-06-23 02:01:36.000000000 +0200
2203 +++ linux-3.14.45/include/uapi/linux/tcp.h      2015-06-24 14:15:48.875862469 +0200
2204 @@ -112,6 +112,7 @@
2205  #define TCP_FASTOPEN           23      /* Enable FastOpen on listeners */
2206  #define TCP_TIMESTAMP          24
2207  #define TCP_NOTSENT_LOWAT      25      /* limit number of unsent bytes in write queue */
2208 +#define MPTCP_ENABLED          26
2209
2210  struct tcp_repair_opt {
2211         __u32   opt_code;
2212 diff -Nur linux-3.14.45.orig/net/Kconfig linux-3.14.45/net/Kconfig
2213 --- linux-3.14.45.orig/net/Kconfig      2015-06-23 02:01:36.000000000 +0200
2214 +++ linux-3.14.45/net/Kconfig   2015-06-24 14:15:48.875862469 +0200
2215 @@ -79,6 +79,7 @@
2216  source "net/ipv4/Kconfig"
2217  source "net/ipv6/Kconfig"
2218  source "net/netlabel/Kconfig"
2219 +source "net/mptcp/Kconfig"
2220
2221  endif # if INET
2222
2223 diff -Nur linux-3.14.45.orig/net/Makefile linux-3.14.45/net/Makefile
2224 --- linux-3.14.45.orig/net/Makefile     2015-06-23 02:01:36.000000000 +0200
2225 +++ linux-3.14.45/net/Makefile  2015-06-24 14:15:48.875862469 +0200
2226 @@ -20,6 +20,7 @@
2227  obj-$(CONFIG_XFRM)             += xfrm/
2228  obj-$(CONFIG_UNIX)             += unix/
2229  obj-$(CONFIG_NET)              += ipv6/
2230 +obj-$(CONFIG_MPTCP)            += mptcp/
2231  obj-$(CONFIG_PACKET)           += packet/
2232  obj-$(CONFIG_NET_KEY)          += key/
2233  obj-$(CONFIG_BRIDGE)           += bridge/
2234 diff -Nur linux-3.14.45.orig/net/core/dev.c linux-3.14.45/net/core/dev.c
2235 --- linux-3.14.45.orig/net/core/dev.c   2015-06-23 02:01:36.000000000 +0200
2236 +++ linux-3.14.45/net/core/dev.c        2015-06-24 14:15:48.875862469 +0200
2237 @@ -5399,7 +5399,7 @@
2238
2239         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2240                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2241 -                              IFF_AUTOMEDIA)) |
2242 +                              IFF_AUTOMEDIA | IFF_NOMULTIPATH | IFF_MPBACKUP)) |
2243                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2244                                     IFF_ALLMULTI));
2245
2246 diff -Nur linux-3.14.45.orig/net/core/request_sock.c linux-3.14.45/net/core/request_sock.c
2247 --- linux-3.14.45.orig/net/core/request_sock.c  2015-06-23 02:01:36.000000000 +0200
2248 +++ linux-3.14.45/net/core/request_sock.c       2015-06-24 14:15:48.875862469 +0200
2249 @@ -38,7 +38,8 @@
2250  EXPORT_SYMBOL(sysctl_max_syn_backlog);
2251
2252  int reqsk_queue_alloc(struct request_sock_queue *queue,
2253 -                     unsigned int nr_table_entries)
2254 +                     unsigned int nr_table_entries,
2255 +                     gfp_t flags)
2256  {
2257         size_t lopt_size = sizeof(struct listen_sock);
2258         struct listen_sock *lopt;
2259 @@ -48,9 +49,11 @@
2260         nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
2261         lopt_size += nr_table_entries * sizeof(struct request_sock *);
2262         if (lopt_size > PAGE_SIZE)
2263 -               lopt = vzalloc(lopt_size);
2264 +               lopt = __vmalloc(lopt_size,
2265 +                       flags | __GFP_HIGHMEM | __GFP_ZERO,
2266 +                       PAGE_KERNEL);
2267         else
2268 -               lopt = kzalloc(lopt_size, GFP_KERNEL);
2269 +               lopt = kzalloc(lopt_size, flags);
2270         if (lopt == NULL)
2271                 return -ENOMEM;
2272
2273 diff -Nur linux-3.14.45.orig/net/core/skbuff.c linux-3.14.45/net/core/skbuff.c
2274 --- linux-3.14.45.orig/net/core/skbuff.c        2015-06-23 02:01:36.000000000 +0200
2275 +++ linux-3.14.45/net/core/skbuff.c     2015-06-24 14:15:48.875862469 +0200
2276 @@ -491,7 +491,7 @@
2277         skb_drop_list(&skb_shinfo(skb)->frag_list);
2278  }
2279
2280 -static void skb_clone_fraglist(struct sk_buff *skb)
2281 +void skb_clone_fraglist(struct sk_buff *skb)
2282  {
2283         struct sk_buff *list;
2284
2285 @@ -913,7 +913,7 @@
2286         skb->inner_mac_header += off;
2287  }
2288
2289 -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
2290 +void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
2291  {
2292         __copy_skb_header(new, old);
2293
2294 diff -Nur linux-3.14.45.orig/net/core/sock.c linux-3.14.45/net/core/sock.c
2295 --- linux-3.14.45.orig/net/core/sock.c  2015-06-23 02:01:36.000000000 +0200
2296 +++ linux-3.14.45/net/core/sock.c       2015-06-24 14:15:48.875862469 +0200
2297 @@ -280,7 +280,7 @@
2298    "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
2299    "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_MAX"
2300  };
2301 -static const char *const af_family_clock_key_strings[AF_MAX+1] = {
2302 +char *const af_family_clock_key_strings[AF_MAX+1] = {
2303    "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
2304    "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
2305    "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
2306 @@ -301,7 +301,7 @@
2307   * sk_callback_lock locking rules are per-address-family,
2308   * so split the lock classes by using a per-AF key:
2309   */
2310 -static struct lock_class_key af_callback_keys[AF_MAX];
2311 +struct lock_class_key af_callback_keys[AF_MAX];
2312
2313  /* Take into consideration the size of the struct sk_buff overhead in the
2314   * determination of these values, since that is non-constant across
2315 @@ -651,7 +651,7 @@
2316         return ret;
2317  }
2318
2319 -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
2320 +void sock_valbool_flag(struct sock *sk, int bit, int valbool)
2321  {
2322         if (valbool)
2323                 sock_set_flag(sk, bit);
2324 @@ -1272,7 +1272,7 @@
2325   *
2326   * (We also register the sk_lock with the lock validator.)
2327   */
2328 -static inline void sock_lock_init(struct sock *sk)
2329 +void sock_lock_init(struct sock *sk)
2330  {
2331         sock_lock_init_class_and_name(sk,
2332                         af_family_slock_key_strings[sk->sk_family],
2333 @@ -1320,7 +1320,7 @@
2334  }
2335  EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
2336
2337 -static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
2338 +struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
2339                 int family)
2340  {
2341         struct sock *sk;
2342 @@ -2252,7 +2252,7 @@
2343         rcu_read_unlock();
2344  }
2345
2346 -static void sock_def_error_report(struct sock *sk)
2347 +void sock_def_error_report(struct sock *sk)
2348  {
2349         struct socket_wq *wq;
2350
2351 diff -Nur linux-3.14.45.orig/net/ipv4/Kconfig linux-3.14.45/net/ipv4/Kconfig
2352 --- linux-3.14.45.orig/net/ipv4/Kconfig 2015-06-23 02:01:36.000000000 +0200
2353 +++ linux-3.14.45/net/ipv4/Kconfig      2015-06-24 14:15:48.875862469 +0200
2354 @@ -556,6 +556,30 @@
2355         For further details see:
2356           http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
2357
2358 +config TCP_CONG_COUPLED
2359 +       tristate "MPTCP COUPLED CONGESTION CONTROL"
2360 +       depends on MPTCP
2361 +       default n
2362 +       ---help---
2363 +       MultiPath TCP Coupled Congestion Control
2364 +       To enable it, just put 'coupled' in tcp_congestion_control
2365 +
2366 +config TCP_CONG_OLIA
2367 +       tristate "MPTCP Opportunistic Linked Increase"
2368 +       depends on MPTCP
2369 +       default n
2370 +       ---help---
2371 +       MultiPath TCP Opportunistic Linked Increase Congestion Control
2372 +       To enable it, just put 'olia' in tcp_congestion_control
2373 +
2374 +config TCP_CONG_WVEGAS
2375 +       tristate "MPTCP WVEGAS CONGESTION CONTROL"
2376 +       depends on MPTCP
2377 +       default n
2378 +       ---help---
2379 +       wVegas congestion control for MPTCP
2380 +       To enable it, just put 'wvegas' in tcp_congestion_control
2381 +
2382  choice
2383         prompt "Default TCP congestion control"
2384         default DEFAULT_CUBIC
2385 @@ -584,6 +608,15 @@
2386         config DEFAULT_WESTWOOD
2387                 bool "Westwood" if TCP_CONG_WESTWOOD=y
2388
2389 +       config DEFAULT_COUPLED
2390 +               bool "Coupled" if TCP_CONG_COUPLED=y
2391 +
2392 +       config DEFAULT_OLIA
2393 +               bool "Olia" if TCP_CONG_OLIA=y
2394 +
2395 +       config DEFAULT_WVEGAS
2396 +               bool "Wvegas" if TCP_CONG_WVEGAS=y
2397 +
2398         config DEFAULT_RENO
2399                 bool "Reno"
2400
2401 @@ -605,6 +638,8 @@
2402         default "vegas" if DEFAULT_VEGAS
2403         default "westwood" if DEFAULT_WESTWOOD
2404         default "veno" if DEFAULT_VENO
2405 +       default "coupled" if DEFAULT_COUPLED
2406 +       default "wvegas" if DEFAULT_WVEGAS
2407         default "reno" if DEFAULT_RENO
2408         default "cubic"
2409
2410 diff -Nur linux-3.14.45.orig/net/ipv4/af_inet.c linux-3.14.45/net/ipv4/af_inet.c
2411 --- linux-3.14.45.orig/net/ipv4/af_inet.c       2015-06-23 02:01:36.000000000 +0200
2412 +++ linux-3.14.45/net/ipv4/af_inet.c    2015-06-24 14:15:48.875862469 +0200
2413 @@ -104,6 +104,7 @@
2414  #include <net/ip_fib.h>
2415  #include <net/inet_connection_sock.h>
2416  #include <net/tcp.h>
2417 +#include <net/mptcp.h>
2418  #include <net/udp.h>
2419  #include <net/udplite.h>
2420  #include <net/ping.h>
2421 @@ -246,8 +247,7 @@
2422   *     Create an inet socket.
2423   */
2424
2425 -static int inet_create(struct net *net, struct socket *sock, int protocol,
2426 -                      int kern)
2427 +int inet_create(struct net *net, struct socket *sock, int protocol, int kern)
2428  {
2429         struct sock *sk;
2430         struct inet_protosw *answer;
2431 @@ -679,6 +679,23 @@
2432         lock_sock(sk2);
2433
2434         sock_rps_record_flow(sk2);
2435 +
2436 +       if (sk2->sk_protocol == IPPROTO_TCP && tcp_sk(sk2)->mpc) {
2437 +               struct sock *sk_it = sk2;
2438 +
2439 +               mptcp_for_each_sk(tcp_sk(sk2)->mpcb, sk_it)
2440 +                       sock_rps_record_flow(sk_it);
2441 +
2442 +               if (tcp_sk(sk2)->mpcb->master_sk) {
2443 +                       sk_it = tcp_sk(sk2)->mpcb->master_sk;
2444 +
2445 +                       write_lock_bh(&sk_it->sk_callback_lock);
2446 +                       sk_it->sk_wq = newsock->wq;
2447 +                       sk_it->sk_socket = newsock;
2448 +                       write_unlock_bh(&sk_it->sk_callback_lock);
2449 +               }
2450 +       }
2451 +
2452         WARN_ON(!((1 << sk2->sk_state) &
2453                   (TCPF_ESTABLISHED | TCPF_SYN_RECV |
2454                   TCPF_CLOSE_WAIT | TCPF_CLOSE)));
2455 @@ -1770,6 +1787,9 @@
2456
2457         ip_init();
2458
2459 +       /* We must initialize MPTCP before TCP. */
2460 +       mptcp_init();
2461 +
2462         tcp_v4_init();
2463
2464         /* Setup TCP slab cache for open requests. */
2465 diff -Nur linux-3.14.45.orig/net/ipv4/inet_connection_sock.c linux-3.14.45/net/ipv4/inet_connection_sock.c
2466 --- linux-3.14.45.orig/net/ipv4/inet_connection_sock.c  2015-06-23 02:01:36.000000000 +0200
2467 +++ linux-3.14.45/net/ipv4/inet_connection_sock.c       2015-06-24 14:15:48.875862469 +0200
2468 @@ -23,6 +23,7 @@
2469  #include <net/route.h>
2470  #include <net/tcp_states.h>
2471  #include <net/xfrm.h>
2472 +#include <net/mptcp.h>
2473
2474  #ifdef INET_CSK_DEBUG
2475  const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
2476 @@ -468,8 +469,8 @@
2477  }
2478  EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
2479
2480 -static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
2481 -                                const u32 rnd, const u32 synq_hsize)
2482 +u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd,
2483 +                  const u32 synq_hsize)
2484  {
2485         return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
2486  }
2487 @@ -667,7 +668,12 @@
2488                                  const struct request_sock *req,
2489                                  const gfp_t priority)
2490  {
2491 -       struct sock *newsk = sk_clone_lock(sk, priority);
2492 +       struct sock *newsk;
2493 +
2494 +       if (sk->sk_protocol == IPPROTO_TCP && tcp_sk(sk)->mpc)
2495 +               newsk = mptcp_sk_clone(sk, req->rsk_ops->family, priority);
2496 +       else
2497 +               newsk = sk_clone_lock(sk, priority);
2498
2499         if (newsk != NULL) {
2500                 struct inet_connection_sock *newicsk = inet_csk(newsk);
2501 @@ -744,7 +750,8 @@
2502  {
2503         struct inet_sock *inet = inet_sk(sk);
2504         struct inet_connection_sock *icsk = inet_csk(sk);
2505 -       int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
2506 +       int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries,
2507 +                                  GFP_KERNEL);
2508
2509         if (rc != 0)
2510                 return rc;
2511 @@ -802,9 +809,14 @@
2512
2513         while ((req = acc_req) != NULL) {
2514                 struct sock *child = req->sk;
2515 +               bool mutex_taken = false;
2516
2517                 acc_req = req->dl_next;
2518
2519 +               if (is_meta_sk(child)) {
2520 +                       mutex_lock(&tcp_sk(child)->mpcb->mpcb_mutex);
2521 +                       mutex_taken = true;
2522 +               }
2523                 local_bh_disable();
2524                 bh_lock_sock(child);
2525                 WARN_ON(sock_owned_by_user(child));
2526 @@ -833,6 +845,8 @@
2527
2528                 bh_unlock_sock(child);
2529                 local_bh_enable();
2530 +               if (mutex_taken)
2531 +                       mutex_unlock(&tcp_sk(child)->mpcb->mpcb_mutex);
2532                 sock_put(child);
2533
2534                 sk_acceptq_removed(sk);
2535 diff -Nur linux-3.14.45.orig/net/ipv4/syncookies.c linux-3.14.45/net/ipv4/syncookies.c
2536 --- linux-3.14.45.orig/net/ipv4/syncookies.c    2015-06-23 02:01:36.000000000 +0200
2537 +++ linux-3.14.45/net/ipv4/syncookies.c 2015-06-24 14:15:48.875862469 +0200
2538 @@ -284,7 +284,7 @@
2539
2540         /* check for timestamp cookie support */
2541         memset(&tcp_opt, 0, sizeof(tcp_opt));
2542 -       tcp_parse_options(skb, &tcp_opt, 0, NULL);
2543 +       tcp_parse_options(skb, &tcp_opt, NULL, 0, NULL);
2544
2545         if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
2546                 goto out;
2547 @@ -354,10 +354,10 @@
2548         /* Try to redo what tcp_v4_send_synack did. */
2549         req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
2550
2551 -       tcp_select_initial_window(tcp_full_space(sk), req->mss,
2552 +       tp->select_initial_window(tcp_full_space(sk), req->mss,
2553                                   &req->rcv_wnd, &req->window_clamp,
2554                                   ireq->wscale_ok, &rcv_wscale,
2555 -                                 dst_metric(&rt->dst, RTAX_INITRWND));
2556 +                                 dst_metric(&rt->dst, RTAX_INITRWND), sk);
2557
2558         ireq->rcv_wscale  = rcv_wscale;
2559
2560 diff -Nur linux-3.14.45.orig/net/ipv4/tcp.c linux-3.14.45/net/ipv4/tcp.c
2561 --- linux-3.14.45.orig/net/ipv4/tcp.c   2015-06-23 02:01:36.000000000 +0200
2562 +++ linux-3.14.45/net/ipv4/tcp.c        2015-06-24 14:15:48.879862472 +0200
2563 @@ -271,6 +271,7 @@
2564
2565  #include <net/icmp.h>
2566  #include <net/inet_common.h>
2567 +#include <net/mptcp.h>
2568  #include <net/tcp.h>
2569  #include <net/xfrm.h>
2570  #include <net/ip.h>
2571 @@ -419,6 +420,9 @@
2572         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2573         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2574
2575 +       /* Set function pointers in tcp_sock to tcp functions. */
2576 +       mptcp_init_tcp_sock(tp);
2577 +
2578         local_bh_disable();
2579         sock_update_memcg(sk);
2580         sk_sockets_allocated_inc(sk);
2581 @@ -607,6 +611,8 @@
2582         tcb->seq     = tcb->end_seq = tp->write_seq;
2583         tcb->tcp_flags = TCPHDR_ACK;
2584         tcb->sacked  = 0;
2585 +       if (tp->mpc)
2586 +               mptcp_skb_entail_init(tp, skb);
2587         skb_header_release(skb);
2588         tcp_add_write_queue_tail(sk, skb);
2589         sk->sk_wmem_queued += skb->truesize;
2590 @@ -640,8 +646,8 @@
2591                atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
2592  }
2593
2594 -static void tcp_push(struct sock *sk, int flags, int mss_now,
2595 -                    int nonagle, int size_goal)
2596 +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
2597 +             int size_goal)
2598  {
2599         struct tcp_sock *tp = tcp_sk(sk);
2600         struct sk_buff *skb;
2601 @@ -726,6 +732,14 @@
2602         int ret;
2603
2604         sock_rps_record_flow(sk);
2605 +
2606 +#ifdef CONFIG_MPTCP
2607 +       if (tcp_sk(sk)->mpc) {
2608 +               struct sock *sk_it;
2609 +               mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it)
2610 +                       sock_rps_record_flow(sk_it);
2611 +       }
2612 +#endif
2613         /*
2614          * We can't seek on a socket input
2615          */
2616 @@ -821,8 +835,7 @@
2617         return NULL;
2618  }
2619
2620 -static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
2621 -                                      int large_allowed)
2622 +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed)
2623  {
2624         struct tcp_sock *tp = tcp_sk(sk);
2625         u32 xmit_size_goal, old_size_goal;
2626 @@ -872,8 +885,13 @@
2627  {
2628         int mss_now;
2629
2630 -       mss_now = tcp_current_mss(sk);
2631 -       *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
2632 +       if (tcp_sk(sk)->mpc) {
2633 +               mss_now = mptcp_current_mss(sk);
2634 +               *size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
2635 +       } else {
2636 +               mss_now = tcp_current_mss(sk);
2637 +               *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
2638 +       }
2639
2640         return mss_now;
2641  }
2642 @@ -897,6 +915,26 @@
2643                         goto out_err;
2644         }
2645
2646 +       if (tp->mpc) {
2647 +               struct sock *sk_it = sk;
2648 +
2649 +               /* We must check this with socket-lock hold because we iterate
2650 +                * over the subflows.
2651 +                */
2652 +               if (!mptcp_can_sendpage(sk)) {
2653 +                       ssize_t ret;
2654 +
2655 +                       release_sock(sk);
2656 +                       ret = sock_no_sendpage(sk->sk_socket, page, offset,
2657 +                                              size, flags);
2658 +                       lock_sock(sk);
2659 +                       return ret;
2660 +               }
2661 +
2662 +               mptcp_for_each_sk(tp->mpcb, sk_it)
2663 +                       sock_rps_record_flow(sk_it);
2664 +       }
2665 +
2666         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2667
2668         mss_now = tcp_send_mss(sk, &size_goal, flags);
2669 @@ -1001,8 +1039,9 @@
2670  {
2671         ssize_t res;
2672
2673 -       if (!(sk->sk_route_caps & NETIF_F_SG) ||
2674 -           !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
2675 +       /* If MPTCP is enabled, we check it later after establishment */
2676 +       if (!tcp_sk(sk)->mpc && (!(sk->sk_route_caps & NETIF_F_SG) ||
2677 +           !(sk->sk_route_caps & NETIF_F_ALL_CSUM)))
2678                 return sock_no_sendpage(sk->sk_socket, page, offset, size,
2679                                         flags);
2680
2681 @@ -1018,6 +1057,9 @@
2682         const struct tcp_sock *tp = tcp_sk(sk);
2683         int tmp = tp->mss_cache;
2684
2685 +       if (tp->mpc)
2686 +               return mptcp_select_size(sk, sg);
2687 +
2688         if (sg) {
2689                 if (sk_can_gso(sk)) {
2690                         /* Small frames wont use a full page:
2691 @@ -1105,6 +1147,12 @@
2692                         goto do_error;
2693         }
2694
2695 +       if (tp->mpc) {
2696 +               struct sock *sk_it = sk;
2697 +               mptcp_for_each_sk(tp->mpcb, sk_it)
2698 +                       sock_rps_record_flow(sk_it);
2699 +       }
2700 +
2701         if (unlikely(tp->repair)) {
2702                 if (tp->repair_queue == TCP_RECV_QUEUE) {
2703                         copied = tcp_send_rcvq(sk, msg, size);
2704 @@ -1132,7 +1180,10 @@
2705         if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
2706                 goto out_err;
2707
2708 -       sg = !!(sk->sk_route_caps & NETIF_F_SG);
2709 +       if (tp->mpc)
2710 +               sg = mptcp_can_sg(sk);
2711 +       else
2712 +               sg = !!(sk->sk_route_caps & NETIF_F_SG);
2713
2714         while (--iovlen >= 0) {
2715                 size_t seglen = iov->iov_len;
2716 @@ -1176,8 +1227,15 @@
2717
2718                                 /*
2719                                  * Check whether we can use HW checksum.
2720 +                                *
2721 +                                * If dss-csum is enabled, we do not do hw-csum.
2722 +                                * In case of non-mptcp we check the
2723 +                                * device-capabilities.
2724 +                                * In case of mptcp, hw-csum's will be handled
2725 +                                * later in mptcp_write_xmit.
2726                                  */
2727 -                               if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
2728 +                               if (((tp->mpc && !tp->mpcb->dss_csum) || !tp->mpc) &&
2729 +                                   (tp->mpc || sk->sk_route_caps & NETIF_F_ALL_CSUM))
2730                                         skb->ip_summed = CHECKSUM_PARTIAL;
2731
2732                                 skb_entail(sk, skb);
2733 @@ -1386,6 +1444,11 @@
2734
2735         struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
2736
2737 +       if (is_meta_sk(sk)) {
2738 +               mptcp_cleanup_rbuf(sk, copied);
2739 +               return;
2740 +       }
2741 +
2742         WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
2743              "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
2744              tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
2745 @@ -1422,7 +1485,7 @@
2746
2747                 /* Optimize, __tcp_select_window() is not cheap. */
2748                 if (2*rcv_window_now <= tp->window_clamp) {
2749 -                       __u32 new_window = __tcp_select_window(sk);
2750 +                       __u32 new_window = tp->__select_window(sk);
2751
2752                         /* Send ACK now, if this read freed lots of space
2753                          * in our buffer. Certainly, new_window is new window.
2754 @@ -1623,6 +1686,14 @@
2755
2756         lock_sock(sk);
2757
2758 +#ifdef CONFIG_MPTCP
2759 +       if (tp->mpc) {
2760 +               struct sock *sk_it;
2761 +               mptcp_for_each_sk(tp->mpcb, sk_it)
2762 +                       sock_rps_record_flow(sk_it);
2763 +       }
2764 +#endif
2765 +
2766         err = -ENOTCONN;
2767         if (sk->sk_state == TCP_LISTEN)
2768                 goto out;
2769 @@ -2070,7 +2141,7 @@
2770    /* TCP_CLOSING       */ TCP_CLOSING,
2771  };
2772
2773 -static int tcp_close_state(struct sock *sk)
2774 +int tcp_close_state(struct sock *sk)
2775  {
2776         int next = (int)new_state[sk->sk_state];
2777         int ns = next & TCP_STATE_MASK;
2778 @@ -2099,8 +2170,12 @@
2779             (TCPF_ESTABLISHED | TCPF_SYN_SENT |
2780              TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
2781                 /* Clear out any half completed packets.  FIN if needed. */
2782 -               if (tcp_close_state(sk))
2783 -                       tcp_send_fin(sk);
2784 +               if (tcp_close_state(sk)) {
2785 +                       if (!is_meta_sk(sk))
2786 +                               tcp_send_fin(sk);
2787 +                       else
2788 +                               mptcp_send_fin(sk);
2789 +               }
2790         }
2791  }
2792  EXPORT_SYMBOL(tcp_shutdown);
2793 @@ -2125,6 +2200,11 @@
2794         int data_was_unread = 0;
2795         int state;
2796
2797 +       if (is_meta_sk(sk)) {
2798 +               mptcp_close(sk, timeout);
2799 +               return;
2800 +       }
2801 +
2802         lock_sock(sk);
2803         sk->sk_shutdown = SHUTDOWN_MASK;
2804
2805 @@ -2291,15 +2371,6 @@
2806  }
2807  EXPORT_SYMBOL(tcp_close);
2808
2809 -/* These states need RST on ABORT according to RFC793 */
2810 -
2811 -static inline bool tcp_need_reset(int state)
2812 -{
2813 -       return (1 << state) &
2814 -              (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2815 -               TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2816 -}
2817 -
2818  int tcp_disconnect(struct sock *sk, int flags)
2819  {
2820         struct inet_sock *inet = inet_sk(sk);
2821 @@ -2340,6 +2411,13 @@
2822         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2823                 inet_reset_saddr(sk);
2824
2825 +       if (is_meta_sk(sk)) {
2826 +               mptcp_disconnect(sk);
2827 +       } else {
2828 +               if (tp->inside_tk_table)
2829 +                       mptcp_hash_remove_bh(tp);
2830 +       }
2831 +
2832         sk->sk_shutdown = 0;
2833         sock_reset_flag(sk, SOCK_DONE);
2834         tp->srtt = 0;
2835 @@ -2699,6 +2777,18 @@
2836                 tp->notsent_lowat = val;
2837                 sk->sk_write_space(sk);
2838                 break;
2839 +#ifdef CONFIG_MPTCP
2840 +       case MPTCP_ENABLED:
2841 +               if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_LISTEN) {
2842 +                       if (val)
2843 +                               tp->mptcp_enabled = 1;
2844 +                       else
2845 +                               tp->mptcp_enabled = 0;
2846 +               } else {
2847 +                       err = -EPERM;
2848 +               }
2849 +               break;
2850 +#endif
2851         default:
2852                 err = -ENOPROTOOPT;
2853                 break;
2854 @@ -2918,6 +3008,11 @@
2855         case TCP_NOTSENT_LOWAT:
2856                 val = tp->notsent_lowat;
2857                 break;
2858 +#ifdef CONFIG_MPTCP
2859 +       case MPTCP_ENABLED:
2860 +               val = tp->mptcp_enabled;
2861 +               break;
2862 +#endif
2863         default:
2864                 return -ENOPROTOOPT;
2865         }
2866 @@ -3088,8 +3183,11 @@
2867         if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2868                 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
2869
2870 +       WARN_ON(sk->sk_state == TCP_CLOSE);
2871         tcp_set_state(sk, TCP_CLOSE);
2872 +
2873         tcp_clear_xmit_timers(sk);
2874 +
2875         if (req != NULL)
2876                 reqsk_fastopen_remove(sk, req, false);
2877
2878 diff -Nur linux-3.14.45.orig/net/ipv4/tcp_input.c linux-3.14.45/net/ipv4/tcp_input.c
2879 --- linux-3.14.45.orig/net/ipv4/tcp_input.c     2015-06-23 02:01:36.000000000 +0200
2880 +++ linux-3.14.45/net/ipv4/tcp_input.c  2015-06-24 14:15:48.883862476 +0200
2881 @@ -74,6 +74,9 @@
2882  #include <linux/ipsec.h>
2883  #include <asm/unaligned.h>
2884  #include <net/netdma.h>
2885 +#include <net/mptcp.h>
2886 +#include <net/mptcp_v4.h>
2887 +#include <net/mptcp_v6.h>
2888
2889  int sysctl_tcp_timestamps __read_mostly = 1;
2890  int sysctl_tcp_window_scaling __read_mostly = 1;
2891 @@ -99,25 +102,6 @@
2892  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
2893  int sysctl_tcp_early_retrans __read_mostly = 3;
2894
2895 -#define FLAG_DATA              0x01 /* Incoming frame contained data.          */
2896 -#define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window update.       */
2897 -#define FLAG_DATA_ACKED                0x04 /* This ACK acknowledged new data.         */
2898 -#define FLAG_RETRANS_DATA_ACKED        0x08 /* "" "" some of which was retransmitted.  */
2899 -#define FLAG_SYN_ACKED         0x10 /* This ACK acknowledged SYN.              */
2900 -#define FLAG_DATA_SACKED       0x20 /* New SACK.                               */
2901 -#define FLAG_ECE               0x40 /* ECE in this ACK                         */
2902 -#define FLAG_SLOWPATH          0x100 /* Do not skip RFC checks for window update.*/
2903 -#define FLAG_ORIG_SACK_ACKED   0x200 /* Never retransmitted data are (s)acked  */
2904 -#define FLAG_SND_UNA_ADVANCED  0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
2905 -#define FLAG_DSACKING_ACK      0x800 /* SACK blocks contained D-SACK info */
2906 -#define FLAG_SACK_RENEGING     0x2000 /* snd_una advanced to a sacked seq */
2907 -#define FLAG_UPDATE_TS_RECENT  0x4000 /* tcp_replace_ts_recent() */
2908 -
2909 -#define FLAG_ACKED             (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
2910 -#define FLAG_NOT_DUP           (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
2911 -#define FLAG_CA_ALERT          (FLAG_DATA_SACKED|FLAG_ECE)
2912 -#define FLAG_FORWARD_PROGRESS  (FLAG_ACKED|FLAG_DATA_SACKED)
2913 -
2914  #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
2915  #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
2916
2917 @@ -283,8 +267,12 @@
2918         per_mss = roundup_pow_of_two(per_mss) +
2919                   SKB_DATA_ALIGN(sizeof(struct sk_buff));
2920
2921 -       nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
2922 -       nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
2923 +       if (tp->mpc) {
2924 +               nr_segs = mptcp_check_snd_buf(tp);
2925 +       } else {
2926 +               nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
2927 +               nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
2928 +       }
2929
2930         /* Fast Recovery (RFC 5681 3.2) :
2931          * Cubic needs 1.7 factor, rounded to 2 to include
2932 @@ -292,8 +280,16 @@
2933          */
2934         sndmem = 2 * nr_segs * per_mss;
2935
2936 -       if (sk->sk_sndbuf < sndmem)
2937 +       /* MPTCP: after this sndmem is the new contribution of the
2938 +        * current subflow to the aggregated sndbuf */
2939 +       if (sk->sk_sndbuf < sndmem) {
2940 +               int old_sndbuf = sk->sk_sndbuf;
2941                 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
2942 +               /* MPTCP: ok, the subflow sndbuf has grown, reflect
2943 +                * this in the aggregate buffer.*/
2944 +               if (tp->mpc && old_sndbuf != sk->sk_sndbuf)
2945 +                       mptcp_update_sndbuf(tp->mpcb);
2946 +       }
2947  }
2948
2949  /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
2950 @@ -342,10 +338,12 @@
2951  static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
2952  {
2953         struct tcp_sock *tp = tcp_sk(sk);
2954 +       struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
2955 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
2956
2957         /* Check #1 */
2958 -       if (tp->rcv_ssthresh < tp->window_clamp &&
2959 -           (int)tp->rcv_ssthresh < tcp_space(sk) &&
2960 +       if (meta_tp->rcv_ssthresh < meta_tp->window_clamp &&
2961 +           (int)meta_tp->rcv_ssthresh < tcp_space(sk) &&
2962             !sk_under_memory_pressure(sk)) {
2963                 int incr;
2964
2965 @@ -353,14 +351,14 @@
2966                  * will fit to rcvbuf in future.
2967                  */
2968                 if (tcp_win_from_space(skb->truesize) <= skb->len)
2969 -                       incr = 2 * tp->advmss;
2970 +                       incr = 2 * meta_tp->advmss;
2971                 else
2972 -                       incr = __tcp_grow_window(sk, skb);
2973 +                       incr = __tcp_grow_window(meta_sk, skb);
2974
2975                 if (incr) {
2976                         incr = max_t(int, incr, 2 * skb->len);
2977 -                       tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
2978 -                                              tp->window_clamp);
2979 +                       meta_tp->rcv_ssthresh = min(meta_tp->rcv_ssthresh + incr,
2980 +                                                   meta_tp->window_clamp);
2981                         inet_csk(sk)->icsk_ack.quick |= 1;
2982                 }
2983         }
2984 @@ -543,7 +541,10 @@
2985         int copied;
2986
2987         time = tcp_time_stamp - tp->rcvq_space.time;
2988 -       if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
2989 +       if (tp->mpc) {
2990 +               if (mptcp_check_rtt(tp, time))
2991 +                       return;
2992 +       } else if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
2993                 return;
2994
2995         /* Number of bytes copied to user in last RTT */
2996 @@ -768,7 +769,7 @@
2997  /* Calculate rto without backoff.  This is the second half of Van Jacobson's
2998   * routine referred to above.
2999   */
3000 -static void tcp_set_rto(struct sock *sk)
3001 +void tcp_set_rto(struct sock *sk)
3002  {
3003         const struct tcp_sock *tp = tcp_sk(sk);
3004         /* Old crap is replaced with new one. 8)
3005 @@ -2909,7 +2910,7 @@
3006                 return false;
3007
3008         tcp_rtt_estimator(sk, seq_rtt);
3009 -       tcp_set_rto(sk);
3010 +       tp->set_rto(sk);
3011
3012         /* RFC6298: only reset backoff on valid RTT measurement. */
3013         inet_csk(sk)->icsk_backoff = 0;
3014 @@ -2993,7 +2994,7 @@
3015  }
3016
3017  /* If we get here, the whole TSO packet has not been acked. */
3018 -static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3019 +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3020  {
3021         struct tcp_sock *tp = tcp_sk(sk);
3022         u32 packets_acked;
3023 @@ -3088,6 +3089,8 @@
3024                  */
3025                 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3026                         flag |= FLAG_DATA_ACKED;
3027 +                       if (tp->mpc && mptcp_is_data_seq(skb))
3028 +                               flag |= MPTCP_FLAG_DATA_ACKED;
3029                 } else {
3030                         flag |= FLAG_SYN_ACKED;
3031                         tp->retrans_stamp = 0;
3032 @@ -3190,7 +3193,7 @@
3033         return flag;
3034  }
3035
3036 -static void tcp_ack_probe(struct sock *sk)
3037 +void tcp_ack_probe(struct sock *sk)
3038  {
3039         const struct tcp_sock *tp = tcp_sk(sk);
3040         struct inet_connection_sock *icsk = inet_csk(sk);
3041 @@ -3237,9 +3240,8 @@
3042  /* Check that window update is acceptable.
3043   * The function assumes that snd_una<=ack<=snd_next.
3044   */
3045 -static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3046 -                                       const u32 ack, const u32 ack_seq,
3047 -                                       const u32 nwin)
3048 +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
3049 +                          const u32 ack_seq, const u32 nwin)
3050  {
3051         return  after(ack, tp->snd_una) ||
3052                 after(ack_seq, tp->snd_wl1) ||
3053 @@ -3358,7 +3360,7 @@
3054  }
3055
3056  /* This routine deals with incoming acks, but not outgoing ones. */
3057 -static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3058 +static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3059  {
3060         struct inet_connection_sock *icsk = inet_csk(sk);
3061         struct tcp_sock *tp = tcp_sk(sk);
3062 @@ -3453,6 +3455,16 @@
3063         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
3064         acked -= tp->packets_out;
3065
3066 +       if (tp->mpc) {
3067 +               if (mptcp_fallback_infinite(sk, flag)) {
3068 +                       pr_err("%s resetting flow\n", __func__);
3069 +                       mptcp_send_reset(sk);
3070 +                       goto invalid_ack;
3071 +               }
3072 +
3073 +               mptcp_clean_rtx_infinite(skb, sk);
3074 +       }
3075 +
3076         /* Advance cwnd if state allows */
3077         if (tcp_may_raise_cwnd(sk, flag))
3078                 tcp_cong_avoid(sk, ack, acked, prior_in_flight);
3079 @@ -3517,8 +3529,9 @@
3080   * the fast version below fails.
3081   */
3082  void tcp_parse_options(const struct sk_buff *skb,
3083 -                      struct tcp_options_received *opt_rx, int estab,
3084 -                      struct tcp_fastopen_cookie *foc)
3085 +                      struct tcp_options_received *opt_rx,
3086 +                      struct mptcp_options_received *mopt,
3087 +                      int estab, struct tcp_fastopen_cookie *foc)
3088  {
3089         const unsigned char *ptr;
3090         const struct tcphdr *th = tcp_hdr(skb);
3091 @@ -3601,6 +3614,10 @@
3092                                  */
3093                                 break;
3094  #endif
3095 +                       case TCPOPT_MPTCP:
3096 +                               mptcp_parse_options(ptr - 2, opsize, opt_rx,
3097 +                                                   mopt, skb);
3098 +                               break;
3099                         case TCPOPT_EXP:
3100                                 /* Fast Open option shares code 254 using a
3101                                  * 16 bits magic number. It's valid only in
3102 @@ -3662,8 +3679,8 @@
3103                 if (tcp_parse_aligned_timestamp(tp, th))
3104                         return true;
3105         }
3106 -
3107 -       tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3108 +       tcp_parse_options(skb, &tp->rx_opt, tp->mpc ? &tp->mptcp->rx_opt : NULL,
3109 +                         1, NULL);
3110         if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3111                 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3112
3113 @@ -3836,6 +3853,8 @@
3114                 dst = __sk_dst_get(sk);
3115                 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3116                         inet_csk(sk)->icsk_ack.pingpong = 1;
3117 +               if (tp->mpc)
3118 +                       mptcp_sub_close_passive(sk);
3119                 break;
3120
3121         case TCP_CLOSE_WAIT:
3122 @@ -3857,6 +3876,13 @@
3123                 tcp_set_state(sk, TCP_CLOSING);
3124                 break;
3125         case TCP_FIN_WAIT2:
3126 +               if (tp->mpc) {
3127 +                       /* The socket will get closed by mptcp_data_ready.
3128 +                        * We first have to process all data-sequences.
3129 +                        */
3130 +                       tp->close_it = 1;
3131 +                       break;
3132 +               }
3133                 /* Received a FIN -- send ACK and enter TIME_WAIT. */
3134                 tcp_send_ack(sk);
3135                 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3136 @@ -3881,6 +3907,10 @@
3137         if (!sock_flag(sk, SOCK_DEAD)) {
3138                 sk->sk_state_change(sk);
3139
3140 +               /* Don't wake up MPTCP-subflows */
3141 +               if (tp->mpc)
3142 +                       return;
3143 +
3144                 /* Do not send POLL_HUP for half duplex close. */
3145                 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3146                     sk->sk_state == TCP_CLOSE)
3147 @@ -4078,7 +4108,11 @@
3148                         tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
3149                 }
3150
3151 -               if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3152 +               /* In case of MPTCP, the segment may be empty if it's a
3153 +                * non-data DATA_FIN. (see beginning of tcp_data_queue)
3154 +                */
3155 +               if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt) &&
3156 +                   !(tp->mpc && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)) {
3157                         SOCK_DEBUG(sk, "ofo packet was already received\n");
3158                         __skb_unlink(skb, &tp->out_of_order_queue);
3159                         __kfree_skb(skb);
3160 @@ -4102,6 +4136,9 @@
3161  static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
3162                                  unsigned int size)
3163  {
3164 +       if (tcp_sk(sk)->mpc)
3165 +               sk = mptcp_meta_sk(sk);
3166 +
3167         if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3168             !sk_rmem_schedule(sk, skb, size)) {
3169
3170 @@ -4132,15 +4169,16 @@
3171   * Better try to coalesce them right now to avoid future collapses.
3172   * Returns true if caller should free @from instead of queueing it
3173   */
3174 -static bool tcp_try_coalesce(struct sock *sk,
3175 -                            struct sk_buff *to,
3176 -                            struct sk_buff *from,
3177 -                            bool *fragstolen)
3178 +bool tcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from,
3179 +                     bool *fragstolen)
3180  {
3181         int delta;
3182
3183         *fragstolen = false;
3184
3185 +       if (tcp_sk(sk)->mpc && !is_meta_sk(sk))
3186 +               return false;
3187 +
3188         if (tcp_hdr(from)->fin)
3189                 return false;
3190
3191 @@ -4230,7 +4268,9 @@
3192
3193         /* Do skb overlap to previous one? */
3194         if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
3195 -               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3196 +               /* MPTCP allows non-data data-fin to be in the ofo-queue */
3197 +               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq) &&
3198 +                   !(tp->mpc && end_seq == seq)) {
3199                         /* All the bits are present. Drop. */
3200                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
3201                         __kfree_skb(skb);
3202 @@ -4268,6 +4308,9 @@
3203                                          end_seq);
3204                         break;
3205                 }
3206 +               /* MPTCP allows non-data data-fin to be in the ofo-queue */
3207 +               if (tp->mpc && TCP_SKB_CB(skb1)->seq == TCP_SKB_CB(skb1)->end_seq)
3208 +                       continue;
3209                 __skb_unlink(skb1, &tp->out_of_order_queue);
3210                 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
3211                                  TCP_SKB_CB(skb1)->end_seq);
3212 @@ -4285,8 +4328,8 @@
3213         }
3214  }
3215
3216 -static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
3217 -                 bool *fragstolen)
3218 +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
3219 +                              bool *fragstolen)
3220  {
3221         int eaten;
3222         struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
3223 @@ -4348,7 +4391,10 @@
3224         int eaten = -1;
3225         bool fragstolen = false;
3226
3227 -       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
3228 +       /* If no data is present, but a data_fin is in the options, we still
3229 +        * have to call mptcp_queue_skb later on. */
3230 +       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
3231 +           !(tp->mpc && mptcp_is_data_fin(skb)))
3232                 goto drop;
3233
3234         skb_dst_drop(skb);
3235 @@ -4394,7 +4440,7 @@
3236                         eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
3237                 }
3238                 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3239 -               if (skb->len)
3240 +               if (skb->len || mptcp_is_data_fin(skb))
3241                         tcp_event_data_recv(sk, skb);
3242                 if (th->fin)
3243                         tcp_fin(sk);
3244 @@ -4416,7 +4462,11 @@
3245
3246                 if (eaten > 0)
3247                         kfree_skb_partial(skb, fragstolen);
3248 -               if (!sock_flag(sk, SOCK_DEAD))
3249 +               if (!sock_flag(sk, SOCK_DEAD) || tp->mpc)
3250 +                       /* MPTCP: we always have to call data_ready, because
3251 +                        * we may be about to receive a data-fin, which still
3252 +                        * must get queued.
3253 +                        */
3254                         sk->sk_data_ready(sk, 0);
3255                 return;
3256         }
3257 @@ -4468,6 +4518,8 @@
3258                 next = skb_queue_next(list, skb);
3259
3260         __skb_unlink(skb, list);
3261 +       if (tcp_sk(sk)->mpc)
3262 +               mptcp_remove_shortcuts(tcp_sk(sk)->mpcb, skb);
3263         __kfree_skb(skb);
3264         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
3265
3266 @@ -4640,6 +4692,18 @@
3267         struct tcp_sock *tp = tcp_sk(sk);
3268         bool res = false;
3269
3270 +       if (is_meta_sk(sk)) {
3271 +               if (!skb_queue_empty(&tp->out_of_order_queue)) {
3272 +                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
3273 +                       mptcp_purge_ofo_queue(tp);
3274 +
3275 +                       /* No sack at the mptcp-level */
3276 +                       sk_mem_reclaim(sk);
3277 +                       res = true;
3278 +               }
3279 +               return res;
3280 +       }
3281 +
3282         if (!skb_queue_empty(&tp->out_of_order_queue)) {
3283                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
3284                 __skb_queue_purge(&tp->out_of_order_queue);
3285 @@ -4729,7 +4793,7 @@
3286         tp->snd_cwnd_stamp = tcp_time_stamp;
3287  }
3288
3289 -static bool tcp_should_expand_sndbuf(const struct sock *sk)
3290 +bool tcp_should_expand_sndbuf(const struct sock *sk)
3291  {
3292         const struct tcp_sock *tp = tcp_sk(sk);
3293
3294 @@ -4764,7 +4828,7 @@
3295  {
3296         struct tcp_sock *tp = tcp_sk(sk);
3297
3298 -       if (tcp_should_expand_sndbuf(sk)) {
3299 +       if (tp->should_expand_sndbuf(sk)) {
3300                 tcp_sndbuf_expand(sk);
3301                 tp->snd_cwnd_stamp = tcp_time_stamp;
3302         }
3303 @@ -4776,8 +4840,9 @@
3304  {
3305         if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
3306                 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
3307 -               if (sk->sk_socket &&
3308 -                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
3309 +               if (tcp_sk(sk)->mpc ||
3310 +                   (sk->sk_socket &&
3311 +                       test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)))
3312                         tcp_new_space(sk);
3313         }
3314  }
3315 @@ -4800,7 +4865,7 @@
3316              /* ... and right edge of window advances far enough.
3317               * (tcp_recvmsg() will send ACK otherwise). Or...
3318               */
3319 -            __tcp_select_window(sk) >= tp->rcv_wnd) ||
3320 +            tp->__select_window(sk) >= tp->rcv_wnd) ||
3321             /* We ACK each frame or... */
3322             tcp_in_quickack_mode(sk) ||
3323             /* We have out of order data. */
3324 @@ -4902,6 +4967,10 @@
3325  {
3326         struct tcp_sock *tp = tcp_sk(sk);
3327
3328 +       /* MPTCP urgent data is not yet supported */
3329 +       if (tp->mpc)
3330 +               return;
3331 +
3332         /* Check if we get a new urgent pointer - normally not. */
3333         if (th->urg)
3334                 tcp_check_urg(sk, th);
3335 @@ -4969,8 +5038,7 @@
3336  }
3337
3338  #ifdef CONFIG_NET_DMA
3339 -static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
3340 -                                 int hlen)
3341 +bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
3342  {
3343         struct tcp_sock *tp = tcp_sk(sk);
3344         int chunk = skb->len - hlen;
3345 @@ -5079,9 +5147,15 @@
3346                 goto discard;
3347         }
3348
3349 +       /* If valid: post process the received MPTCP options. */
3350 +       if (tp->mpc && mptcp_handle_options(sk, th, skb))
3351 +               goto discard;
3352 +
3353         return true;
3354
3355  discard:
3356 +       if (tp->mpc)
3357 +               mptcp_reset_mopt(tp);
3358         __kfree_skb(skb);
3359         return false;
3360  }
3361 @@ -5133,6 +5207,10 @@
3362
3363         tp->rx_opt.saw_tstamp = 0;
3364
3365 +       /* MPTCP: force slowpath. */
3366 +       if (tp->mpc)
3367 +               goto slow_path;
3368 +
3369         /*      pred_flags is 0xS?10 << 16 + snd_wnd
3370          *      if header_prediction is to be made
3371          *      'S' will always be tp->tcp_header_len >> 2
3372 @@ -5347,7 +5425,7 @@
3373          */
3374         tp->lsndtime = tcp_time_stamp;
3375
3376 -       tcp_init_buffer_space(sk);
3377 +       tp->init_buffer_space(sk);
3378
3379         if (sock_flag(sk, SOCK_KEEPOPEN))
3380                 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
3381 @@ -5377,7 +5455,7 @@
3382                 /* Get original SYNACK MSS value if user MSS sets mss_clamp */
3383                 tcp_clear_options(&opt);
3384                 opt.user_mss = opt.mss_clamp = 0;
3385 -               tcp_parse_options(synack, &opt, 0, NULL);
3386 +               tcp_parse_options(synack, &opt, NULL, 0, NULL);
3387                 mss = opt.mss_clamp;
3388         }
3389
3390 @@ -5412,8 +5490,11 @@
3391         struct tcp_sock *tp = tcp_sk(sk);
3392         struct tcp_fastopen_cookie foc = { .len = -1 };
3393         int saved_clamp = tp->rx_opt.mss_clamp;
3394 +       struct mptcp_options_received mopt;
3395 +       mptcp_init_mp_opt(&mopt);
3396
3397 -       tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
3398 +       tcp_parse_options(skb, &tp->rx_opt,
3399 +                         tp->mpc ? &tp->mptcp->rx_opt : &mopt, 0, &foc);
3400         if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3401                 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3402
3403 @@ -5460,6 +5541,21 @@
3404                 if (!th->syn)
3405                         goto discard_and_undo;
3406
3407 +               if (tp->request_mptcp || tp->mpc) {
3408 +                       int ret;
3409 +                       ret = mptcp_rcv_synsent_state_process(sk, &sk,
3410 +                                                             skb, &mopt);
3411 +
3412 +                       /* May have changed if we support MPTCP */
3413 +                       tp = tcp_sk(sk);
3414 +                       icsk = inet_csk(sk);
3415 +
3416 +                       if (ret == 1)
3417 +                               goto reset_and_undo;
3418 +                       if (ret == 2)
3419 +                               goto discard;
3420 +               }
3421 +
3422                 /* rfc793:
3423                  *   "If the SYN bit is on ...
3424                  *    are acceptable then ...
3425 @@ -5472,6 +5568,15 @@
3426                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
3427                 tcp_ack(sk, skb, FLAG_SLOWPATH);
3428
3429 +               if (tp->mpc && !is_master_tp(tp)) {
3430 +                       /* Timer for repeating the ACK until an answer
3431 +                        * arrives. Used only when establishing an additional
3432 +                        * subflow inside of an MPTCP connection.
3433 +                        */
3434 +                       sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
3435 +                                      jiffies + icsk->icsk_rto);
3436 +               }
3437 +
3438                 /* Ok.. it's good. Set up sequence numbers and
3439                  * move to established.
3440                  */
3441 @@ -5498,6 +5603,11 @@
3442                         tp->tcp_header_len = sizeof(struct tcphdr);
3443                 }
3444
3445 +               if (tp->mpc) {
3446 +                       tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
3447 +                       tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
3448 +               }
3449 +
3450                 if (tcp_is_sack(tp) && sysctl_tcp_fack)
3451                         tcp_enable_fack(tp);
3452
3453 @@ -5518,7 +5628,9 @@
3454                     tcp_rcv_fastopen_synack(sk, skb, &foc))
3455                         return -1;
3456
3457 -               if (sk->sk_write_pending ||
3458 +               /* With MPTCP we cannot send data on the third ack due to the
3459 +                * lack of option-space */
3460 +               if ((sk->sk_write_pending && !tp->mpc) ||
3461                     icsk->icsk_accept_queue.rskq_defer_accept ||
3462                     icsk->icsk_ack.pingpong) {
3463                         /* Save one ACK. Data will be ready after
3464 @@ -5560,6 +5672,7 @@
3465             tcp_paws_reject(&tp->rx_opt, 0))
3466                 goto discard_and_undo;
3467
3468 +       /* TODO - check this here for MPTCP */
3469         if (th->syn) {
3470                 /* We see SYN without ACK. It is attempt of
3471                  * simultaneous connect with crossed SYNs.
3472 @@ -5576,6 +5689,11 @@
3473                         tp->tcp_header_len = sizeof(struct tcphdr);
3474                 }
3475
3476 +               if (tp->mpc) {
3477 +                       tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
3478 +                       tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
3479 +               }
3480 +
3481                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
3482                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
3483
3484 @@ -5634,6 +5752,7 @@
3485
3486  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
3487                           const struct tcphdr *th, unsigned int len)
3488 +       __releases(&sk->sk_lock.slock)
3489  {
3490         struct tcp_sock *tp = tcp_sk(sk);
3491         struct inet_connection_sock *icsk = inet_csk(sk);
3492 @@ -5685,6 +5804,10 @@
3493
3494         case TCP_SYN_SENT:
3495                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
3496 +               if (is_meta_sk(sk)) {
3497 +                       sk = tcp_sk(sk)->mpcb->master_sk;
3498 +                       tp = tcp_sk(sk);
3499 +               }
3500                 if (queued >= 0)
3501                         return queued;
3502
3503 @@ -5692,6 +5815,8 @@
3504                 tcp_urg(sk, skb, th);
3505                 __kfree_skb(skb);
3506                 tcp_data_snd_check(sk);
3507 +               if (tp->mpc && is_master_tp(tp))
3508 +                       bh_unlock_sock(sk);
3509                 return 0;
3510         }
3511
3512 @@ -5734,7 +5859,7 @@
3513
3514                         tcp_mtup_init(sk);
3515                         tp->copied_seq = tp->rcv_nxt;
3516 -                       tcp_init_buffer_space(sk);
3517 +                       tp->init_buffer_space(sk);
3518                 }
3519                 smp_mb();
3520                 tcp_set_state(sk, TCP_ESTABLISHED);
3521 @@ -5754,6 +5879,8 @@
3522
3523                 if (tp->rx_opt.tstamp_ok)
3524                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
3525 +               if (tp->mpc)
3526 +                       tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
3527
3528                 if (req) {
3529                         /* Re-arm the timer because data may have been sent out.
3530 @@ -5775,6 +5902,12 @@
3531
3532                 tcp_initialize_rcv_mss(sk);
3533                 tcp_fast_path_on(tp);
3534 +               /* Send an ACK when establishing a new
3535 +                * MPTCP subflow, i.e. using an MP_JOIN
3536 +                * subtype.
3537 +                */
3538 +               if (tp->mpc && !is_master_tp(tp))
3539 +                       tcp_send_ack(sk);
3540                 break;
3541
3542         case TCP_FIN_WAIT1: {
3543 @@ -5826,7 +5959,8 @@
3544                 tmo = tcp_fin_time(sk);
3545                 if (tmo > TCP_TIMEWAIT_LEN) {
3546                         inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
3547 -               } else if (th->fin || sock_owned_by_user(sk)) {
3548 +               } else if (th->fin || mptcp_is_data_fin(skb) ||
3549 +                          sock_owned_by_user(sk)) {
3550                         /* Bad case. We could lose such FIN otherwise.
3551                          * It is not a big problem, but it looks confusing
3552                          * and not so rare event. We still can lose it now,
3553 @@ -5855,6 +5989,9 @@
3554                         goto discard;
3555                 }
3556                 break;
3557 +       case TCP_CLOSE:
3558 +               if (tp->mp_killed)
3559 +                       goto discard;
3560         }
3561
3562         /* step 6: check the URG bit */
3563 @@ -5875,7 +6012,11 @@
3564                  */
3565                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
3566                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3567 -                           after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
3568 +                           after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) &&
3569 +                           !tp->mpc) {
3570 +                               /* In case of mptcp, the reset is handled by
3571 +                                * mptcp_rcv_state_process
3572 +                                */
3573                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
3574                                 tcp_reset(sk);
3575                                 return 1;
3576 diff -Nur linux-3.14.45.orig/net/ipv4/tcp_ipv4.c linux-3.14.45/net/ipv4/tcp_ipv4.c
3577 --- linux-3.14.45.orig/net/ipv4/tcp_ipv4.c      2015-06-23 02:01:36.000000000 +0200
3578 +++ linux-3.14.45/net/ipv4/tcp_ipv4.c   2015-06-24 14:15:48.883862476 +0200
3579 @@ -67,6 +67,8 @@
3580  #include <net/icmp.h>
3581  #include <net/inet_hashtables.h>
3582  #include <net/tcp.h>
3583 +#include <net/mptcp.h>
3584 +#include <net/mptcp_v4.h>
3585  #include <net/transp_v6.h>
3586  #include <net/ipv6.h>
3587  #include <net/inet_common.h>
3588 @@ -99,7 +101,7 @@
3589  struct inet_hashinfo tcp_hashinfo;
3590  EXPORT_SYMBOL(tcp_hashinfo);
3591
3592 -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
3593 +__u32 tcp_v4_init_sequence(const struct sk_buff *skb)
3594  {
3595         return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
3596                                           ip_hdr(skb)->saddr,
3597 @@ -335,7 +337,7 @@
3598         struct inet_sock *inet;
3599         const int type = icmp_hdr(icmp_skb)->type;
3600         const int code = icmp_hdr(icmp_skb)->code;
3601 -       struct sock *sk;
3602 +       struct sock *sk, *meta_sk;
3603         struct sk_buff *skb;
3604         struct request_sock *req;
3605         __u32 seq;
3606 @@ -359,13 +361,19 @@
3607                 return;
3608         }
3609
3610 -       bh_lock_sock(sk);
3611 +       tp = tcp_sk(sk);
3612 +       if (tp->mpc)
3613 +               meta_sk = mptcp_meta_sk(sk);
3614 +       else
3615 +               meta_sk = sk;
3616 +
3617 +       bh_lock_sock(meta_sk);
3618         /* If too many ICMPs get dropped on busy
3619          * servers this needs to be solved differently.
3620          * We do take care of PMTU discovery (RFC1191) special case :
3621          * we can receive locally generated ICMP messages while socket is held.
3622          */
3623 -       if (sock_owned_by_user(sk)) {
3624 +       if (sock_owned_by_user(meta_sk)) {
3625                 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
3626                         NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
3627         }
3628 @@ -378,7 +386,6 @@
3629         }
3630
3631         icsk = inet_csk(sk);
3632 -       tp = tcp_sk(sk);
3633         req = tp->fastopen_rsk;
3634         seq = ntohl(th->seq);
3635         if (sk->sk_state != TCP_LISTEN &&
3636 @@ -412,11 +419,13 @@
3637                                 goto out;
3638
3639                         tp->mtu_info = info;
3640 -                       if (!sock_owned_by_user(sk)) {
3641 +                       if (!sock_owned_by_user(meta_sk)) {
3642                                 tcp_v4_mtu_reduced(sk);
3643                         } else {
3644                                 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
3645                                         sock_hold(sk);
3646 +                               if (tp->mpc)
3647 +                                       mptcp_tsq_flags(sk);
3648                         }
3649                         goto out;
3650                 }
3651 @@ -432,7 +441,7 @@
3652
3653                 /* XXX (TFO) - revisit the following logic for TFO */
3654
3655 -               if (sock_owned_by_user(sk))
3656 +               if (sock_owned_by_user(meta_sk))
3657                         break;
3658
3659                 icsk->icsk_backoff--;
3660 @@ -474,7 +483,7 @@
3661         switch (sk->sk_state) {
3662                 struct request_sock *req, **prev;
3663         case TCP_LISTEN:
3664 -               if (sock_owned_by_user(sk))
3665 +               if (sock_owned_by_user(meta_sk))
3666                         goto out;
3667
3668                 req = inet_csk_search_req(sk, &prev, th->dest,
3669 @@ -507,7 +516,7 @@
3670                                It can f.e. if SYNs crossed,
3671                                or Fast Open.
3672                              */
3673 -               if (!sock_owned_by_user(sk)) {
3674 +               if (!sock_owned_by_user(meta_sk)) {
3675                         sk->sk_err = err;
3676
3677                         sk->sk_error_report(sk);
3678 @@ -536,7 +545,7 @@
3679          */
3680
3681         inet = inet_sk(sk);
3682 -       if (!sock_owned_by_user(sk) && inet->recverr) {
3683 +       if (!sock_owned_by_user(meta_sk) && inet->recverr) {
3684                 sk->sk_err = err;
3685                 sk->sk_error_report(sk);
3686         } else  { /* Only an error on timeout */
3687 @@ -544,7 +553,7 @@
3688         }
3689
3690  out:
3691 -       bh_unlock_sock(sk);
3692 +       bh_unlock_sock(meta_sk);
3693         sock_put(sk);
3694  }
3695
3696 @@ -586,7 +595,7 @@
3697   *     Exception: precedence violation. We do not implement it in any case.
3698   */
3699
3700 -static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
3701 +void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
3702  {
3703         const struct tcphdr *th = tcp_hdr(skb);
3704         struct {
3705 @@ -711,10 +720,10 @@
3706     outside socket context is ugly, certainly. What can I do?
3707   */
3708
3709 -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
3710 +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
3711                             u32 win, u32 tsval, u32 tsecr, int oif,
3712                             struct tcp_md5sig_key *key,
3713 -                           int reply_flags, u8 tos)
3714 +                           int reply_flags, u8 tos, int mptcp)
3715  {
3716         const struct tcphdr *th = tcp_hdr(skb);
3717         struct {
3718 @@ -723,6 +732,10 @@
3719  #ifdef CONFIG_TCP_MD5SIG
3720                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
3721  #endif
3722 +#ifdef CONFIG_MPTCP
3723 +                          + ((MPTCP_SUB_LEN_DSS >> 2) +
3724 +                             (MPTCP_SUB_LEN_ACK >> 2))
3725 +#endif
3726                         ];
3727         } rep;
3728         struct ip_reply_arg arg;
3729 @@ -767,6 +780,21 @@
3730                                     ip_hdr(skb)->daddr, &rep.th);
3731         }
3732  #endif
3733 +#ifdef CONFIG_MPTCP
3734 +       if (mptcp) {
3735 +               int offset = (tsecr) ? 3 : 0;
3736 +               /* Construction of 32-bit data_ack */
3737 +               rep.opt[offset++] = htonl((TCPOPT_MPTCP << 24) |
3738 +                                         ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
3739 +                                         (0x20 << 8) |
3740 +                                         (0x01));
3741 +               rep.opt[offset] = htonl(data_ack);
3742 +
3743 +               arg.iov[0].iov_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
3744 +               rep.th.doff = arg.iov[0].iov_len / 4;
3745 +       }
3746 +#endif /* CONFIG_MPTCP */
3747 +
3748         arg.flags = reply_flags;
3749         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
3750                                       ip_hdr(skb)->saddr, /* XXX */
3751 @@ -786,36 +814,44 @@
3752  {
3753         struct inet_timewait_sock *tw = inet_twsk(sk);
3754         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
3755 +       u32 data_ack = 0;
3756 +       int mptcp = 0;
3757 +
3758 +       if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw) {
3759 +               data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
3760 +               mptcp = 1;
3761 +       }
3762
3763         tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
3764 +                       data_ack,
3765                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
3766                         tcp_time_stamp + tcptw->tw_ts_offset,
3767                         tcptw->tw_ts_recent,
3768                         tw->tw_bound_dev_if,
3769                         tcp_twsk_md5_key(tcptw),
3770                         tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
3771 -                       tw->tw_tos
3772 +                       tw->tw_tos, mptcp
3773                         );
3774
3775         inet_twsk_put(tw);
3776  }
3777
3778 -static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
3779 -                                 struct request_sock *req)
3780 +void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
3781 +                          struct request_sock *req)
3782  {
3783         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
3784          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
3785          */
3786         tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
3787                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
3788 -                       tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
3789 +                       tcp_rsk(req)->rcv_nxt, 0, req->rcv_wnd,
3790                         tcp_time_stamp,
3791                         req->ts_recent,
3792                         0,
3793                         tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
3794                                           AF_INET),
3795                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
3796 -                       ip_hdr(skb)->tos);
3797 +                       ip_hdr(skb)->tos, 0);
3798  }
3799
3800  /*
3801 @@ -823,9 +859,9 @@
3802   *     This still operates on a request_sock only, not on a big
3803   *     socket.
3804   */
3805 -static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
3806 -                             struct request_sock *req,
3807 -                             u16 queue_mapping)
3808 +int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
3809 +                      struct request_sock *req,
3810 +                      u16 queue_mapping)
3811  {
3812         const struct inet_request_sock *ireq = inet_rsk(req);
3813         struct flowi4 fl4;
3814 @@ -853,7 +889,7 @@
3815         return err;
3816  }
3817
3818 -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
3819 +int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
3820  {
3821         int res = tcp_v4_send_synack(sk, NULL, req, 0);
3822
3823 @@ -865,7 +901,7 @@
3824  /*
3825   *     IPv4 request_sock destructor.
3826   */
3827 -static void tcp_v4_reqsk_destructor(struct request_sock *req)
3828 +void tcp_v4_reqsk_destructor(struct request_sock *req)
3829  {
3830         kfree(inet_rsk(req)->opt);
3831  }
3832 @@ -905,7 +941,7 @@
3833  /*
3834   * Save and compile IPv4 options into the request_sock if needed.
3835   */
3836 -static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
3837 +struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
3838  {
3839         const struct ip_options *opt = &(IPCB(skb)->opt);
3840         struct ip_options_rcu *dopt = NULL;
3841 @@ -1257,7 +1293,7 @@
3842  };
3843
3844  #ifdef CONFIG_TCP_MD5SIG
3845 -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
3846 +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
3847         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
3848         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
3849  };
3850 @@ -1415,7 +1451,7 @@
3851         tcp_init_congestion_control(child);
3852         tcp_mtup_init(child);
3853         tcp_init_metrics(child);
3854 -       tcp_init_buffer_space(child);
3855 +       tp->init_buffer_space(child);
3856
3857         /* Queue the data carried in the SYN packet. We need to first
3858          * bump skb's refcnt because the caller will attempt to free it.
3859 @@ -1447,6 +1483,7 @@
3860  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
3861  {
3862         struct tcp_options_received tmp_opt;
3863 +       struct mptcp_options_received mopt;
3864         struct request_sock *req;
3865         struct inet_request_sock *ireq;
3866         struct tcp_sock *tp = tcp_sk(sk);
3867 @@ -1461,6 +1498,22 @@
3868         struct sk_buff *skb_synack;
3869         int do_fastopen;
3870
3871 +       tcp_clear_options(&tmp_opt);
3872 +       tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
3873 +       tmp_opt.user_mss  = tp->rx_opt.user_mss;
3874 +       mptcp_init_mp_opt(&mopt);
3875 +       tcp_parse_options(skb, &tmp_opt, &mopt, 0, want_cookie ? NULL : &foc);
3876 +
3877 +#ifdef CONFIG_MPTCP
3878 +       /* MPTCP structures not initialized, so clear MPTCP fields */
3879 +       if  (mptcp_init_failed)
3880 +               mptcp_init_mp_opt(&mopt);
3881 +
3882 +       if (mopt.is_mp_join)
3883 +               return mptcp_do_join_short(skb, &mopt, &tmp_opt, sock_net(sk));
3884 +       if (mopt.drop_me)
3885 +               goto drop;
3886 +#endif
3887         /* Never answer to SYNs send to broadcast or multicast */
3888         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
3889                 goto drop;
3890 @@ -1486,7 +1539,22 @@
3891                 goto drop;
3892         }
3893
3894 -       req = inet_reqsk_alloc(&tcp_request_sock_ops);
3895 +#ifdef CONFIG_MPTCP
3896 +       if (sysctl_mptcp_enabled == MPTCP_APP && !tp->mptcp_enabled)
3897 +               mopt.saw_mpc = 0;
3898 +       if (mopt.saw_mpc && !want_cookie) {
3899 +               req = inet_reqsk_alloc(&mptcp_request_sock_ops);
3900 +
3901 +               if (!req)
3902 +                       goto drop;
3903 +
3904 +               mptcp_rsk(req)->mpcb = NULL;
3905 +               mptcp_rsk(req)->dss_csum = mopt.dss_csum;
3906 +               mptcp_rsk(req)->collide_tk.pprev = NULL;
3907 +       } else
3908 +#endif
3909 +               req = inet_reqsk_alloc(&tcp_request_sock_ops);
3910 +
3911         if (!req)
3912                 goto drop;
3913
3914 @@ -1494,17 +1562,15 @@
3915         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
3916  #endif
3917
3918 -       tcp_clear_options(&tmp_opt);
3919 -       tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
3920 -       tmp_opt.user_mss  = tp->rx_opt.user_mss;
3921 -       tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
3922 -
3923         if (want_cookie && !tmp_opt.saw_tstamp)
3924                 tcp_clear_options(&tmp_opt);
3925
3926         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
3927         tcp_openreq_init(req, &tmp_opt, skb);
3928
3929 +       if (mopt.saw_mpc && !want_cookie)
3930 +               mptcp_reqsk_new_mptcp(req, &tmp_opt, &mopt, skb);
3931 +
3932         ireq = inet_rsk(req);
3933         ireq->ir_loc_addr = daddr;
3934         ireq->ir_rmt_addr = saddr;
3935 @@ -1716,7 +1782,7 @@
3936  }
3937  EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
3938
3939 -static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
3940 +struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
3941  {
3942         struct tcphdr *th = tcp_hdr(skb);
3943         const struct iphdr *iph = ip_hdr(skb);
3944 @@ -1733,8 +1799,15 @@
3945
3946         if (nsk) {
3947                 if (nsk->sk_state != TCP_TIME_WAIT) {
3948 +                       /* Don't lock again the meta-sk. It has been locked
3949 +                        * before mptcp_v4_do_rcv.
3950 +                        */
3951 +                       if (tcp_sk(nsk)->mpc && !is_meta_sk(sk))
3952 +                               bh_lock_sock(mptcp_meta_sk(nsk));
3953                         bh_lock_sock(nsk);
3954 +
3955                         return nsk;
3956 +
3957                 }
3958                 inet_twsk_put(inet_twsk(nsk));
3959                 return NULL;
3960 @@ -1791,6 +1864,9 @@
3961                 goto discard;
3962  #endif
3963
3964 +       if (is_meta_sk(sk))
3965 +               return mptcp_v4_do_rcv(sk, skb);
3966 +
3967         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
3968                 struct dst_entry *dst = sk->sk_rx_dst;
3969
3970 @@ -1922,7 +1998,7 @@
3971         } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
3972                 wake_up_interruptible_sync_poll(sk_sleep(sk),
3973                                            POLLIN | POLLRDNORM | POLLRDBAND);
3974 -               if (!inet_csk_ack_scheduled(sk))
3975 +               if (!inet_csk_ack_scheduled(sk) && !tp->mpc)
3976                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3977                                                   (3 * tcp_rto_min(sk)) / 4,
3978                                                   TCP_RTO_MAX);
3979 @@ -1939,7 +2015,7 @@
3980  {
3981         const struct iphdr *iph;
3982         const struct tcphdr *th;
3983 -       struct sock *sk;
3984 +       struct sock *sk, *meta_sk = NULL;
3985         int ret;
3986         struct net *net = dev_net(skb->dev);
3987
3988 @@ -1972,18 +2048,42 @@
3989         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
3990                                     skb->len - th->doff * 4);
3991         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
3992 +#ifdef CONFIG_MPTCP
3993 +       TCP_SKB_CB(skb)->mptcp_flags = 0;
3994 +       TCP_SKB_CB(skb)->dss_off = 0;
3995 +#endif
3996         TCP_SKB_CB(skb)->when    = 0;
3997         TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
3998         TCP_SKB_CB(skb)->sacked  = 0;
3999
4000         sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
4001 -       if (!sk)
4002 -               goto no_tcp_socket;
4003
4004  process:
4005 -       if (sk->sk_state == TCP_TIME_WAIT)
4006 +       if (sk && sk->sk_state == TCP_TIME_WAIT)
4007                 goto do_time_wait;
4008
4009 +#ifdef CONFIG_MPTCP
4010 +       if (!sk && th->syn && !th->ack) {
4011 +               int ret = mptcp_lookup_join(skb, NULL);
4012 +
4013 +               if (ret < 0) {
4014 +                       tcp_v4_send_reset(NULL, skb);
4015 +                       goto discard_it;
4016 +               } else if (ret > 0) {
4017 +                       return 0;
4018 +               }
4019 +       }
4020 +
4021 +       /* Is there a pending request sock for this segment ? */
4022 +       if ((!sk || sk->sk_state == TCP_LISTEN) && mptcp_check_req(skb, net)) {
4023 +               if (sk)
4024 +                       sock_put(sk);
4025 +               return 0;
4026 +       }
4027 +#endif
4028 +       if (!sk)
4029 +               goto no_tcp_socket;
4030 +
4031         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
4032                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
4033                 goto discard_and_relse;
4034 @@ -1999,11 +2099,21 @@
4035         sk_mark_napi_id(sk, skb);
4036         skb->dev = NULL;
4037
4038 -       bh_lock_sock_nested(sk);
4039 +       if (tcp_sk(sk)->mpc) {
4040 +               meta_sk = mptcp_meta_sk(sk);
4041 +
4042 +               bh_lock_sock_nested(meta_sk);
4043 +               if (sock_owned_by_user(meta_sk))
4044 +                       skb->sk = sk;
4045 +       } else {
4046 +               meta_sk = sk;
4047 +               bh_lock_sock_nested(sk);
4048 +       }
4049 +
4050         ret = 0;
4051 -       if (!sock_owned_by_user(sk)) {
4052 +       if (!sock_owned_by_user(meta_sk)) {
4053  #ifdef CONFIG_NET_DMA
4054 -               struct tcp_sock *tp = tcp_sk(sk);
4055 +               struct tcp_sock *tp = tcp_sk(meta_sk);
4056                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
4057                         tp->ucopy.dma_chan = net_dma_find_channel();
4058                 if (tp->ucopy.dma_chan)
4059 @@ -2011,16 +2121,16 @@
4060                 else
4061  #endif
4062                 {
4063 -                       if (!tcp_prequeue(sk, skb))
4064 +                       if (!tcp_prequeue(meta_sk, skb))
4065                                 ret = tcp_v4_do_rcv(sk, skb);
4066                 }
4067 -       } else if (unlikely(sk_add_backlog(sk, skb,
4068 -                                          sk->sk_rcvbuf + sk->sk_sndbuf))) {
4069 -               bh_unlock_sock(sk);
4070 +       } else if (unlikely(sk_add_backlog(meta_sk, skb,
4071 +                                          meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
4072 +               bh_unlock_sock(meta_sk);
4073                 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
4074                 goto discard_and_relse;
4075         }
4076 -       bh_unlock_sock(sk);
4077 +       bh_unlock_sock(meta_sk);
4078
4079         sock_put(sk);
4080
4081 @@ -2075,6 +2185,18 @@
4082                         sk = sk2;
4083                         goto process;
4084                 }
4085 +#ifdef CONFIG_MPTCP
4086 +               if (th->syn && !th->ack) {
4087 +                       int ret = mptcp_lookup_join(skb, inet_twsk(sk));
4088 +
4089 +                       if (ret < 0) {
4090 +                               tcp_v4_send_reset(NULL, skb);
4091 +                               goto discard_it;
4092 +                       } else if (ret > 0) {
4093 +                               return 0;
4094 +                       }
4095 +               }
4096 +#endif
4097                 /* Fall through to ACK */
4098         }
4099         case TCP_TW_ACK:
4100 @@ -2158,6 +2280,11 @@
4101
4102         tcp_cleanup_congestion_control(sk);
4103
4104 +       if (tp->mpc)
4105 +               mptcp_destroy_sock(sk);
4106 +       if (tp->inside_tk_table)
4107 +               mptcp_hash_remove(tp);
4108 +
4109         /* Cleanup up the write buffer. */
4110         tcp_write_queue_purge(sk);
4111
4112 diff -Nur linux-3.14.45.orig/net/ipv4/tcp_minisocks.c linux-3.14.45/net/ipv4/tcp_minisocks.c
4113 --- linux-3.14.45.orig/net/ipv4/tcp_minisocks.c 2015-06-23 02:01:36.000000000 +0200
4114 +++ linux-3.14.45/net/ipv4/tcp_minisocks.c      2015-06-24 14:15:48.887862480 +0200
4115 @@ -18,11 +18,13 @@
4116   *             Jorge Cwik, <jorge@laser.satlink.net>
4117   */
4118
4119 +#include <linux/kconfig.h>
4120  #include <linux/mm.h>
4121  #include <linux/module.h>
4122  #include <linux/slab.h>
4123  #include <linux/sysctl.h>
4124  #include <linux/workqueue.h>
4125 +#include <net/mptcp.h>
4126  #include <net/tcp.h>
4127  #include <net/inet_common.h>
4128  #include <net/xfrm.h>
4129 @@ -95,10 +97,13 @@
4130         struct tcp_options_received tmp_opt;
4131         struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
4132         bool paws_reject = false;
4133 +       struct mptcp_options_received mopt;
4134
4135         tmp_opt.saw_tstamp = 0;
4136         if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
4137 -               tcp_parse_options(skb, &tmp_opt, 0, NULL);
4138 +               mptcp_init_mp_opt(&mopt);
4139 +
4140 +               tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
4141
4142                 if (tmp_opt.saw_tstamp) {
4143                         tmp_opt.rcv_tsecr       -= tcptw->tw_ts_offset;
4144 @@ -106,6 +111,11 @@
4145                         tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
4146                         paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
4147                 }
4148 +
4149 +               if (unlikely(mopt.mp_fclose) && tcptw->mptcp_tw) {
4150 +                       if (mopt.mptcp_key == tcptw->mptcp_tw->loc_key)
4151 +                               goto kill_with_rst;
4152 +               }
4153         }
4154
4155         if (tw->tw_substate == TCP_FIN_WAIT2) {
4156 @@ -128,6 +138,16 @@
4157                 if (!th->ack ||
4158                     !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
4159                     TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
4160 +                       /* If mptcp_is_data_fin() returns true, we are sure that
4161 +                        * mopt has been initialized - otherwise it would not
4162 +                        * be a DATA_FIN.
4163 +                        */
4164 +                       if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw &&
4165 +                           mptcp_is_data_fin(skb) &&
4166 +                           TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
4167 +                           mopt.data_seq + 1 == (u32)tcptw->mptcp_tw->rcv_nxt)
4168 +                               return TCP_TW_ACK;
4169 +
4170                         inet_twsk_put(tw);
4171                         return TCP_TW_SUCCESS;
4172                 }
4173 @@ -270,6 +290,11 @@
4174         const struct tcp_sock *tp = tcp_sk(sk);
4175         bool recycle_ok = false;
4176
4177 +       if (is_meta_sk(sk)) {
4178 +               mptcp_update_tw_socks(tp, state);
4179 +               goto tcp_done;
4180 +       }
4181 +
4182         if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
4183                 recycle_ok = tcp_remember_stamp(sk);
4184
4185 @@ -290,6 +315,15 @@
4186                 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
4187                 tcptw->tw_ts_offset     = tp->tsoffset;
4188
4189 +               if (tp->mpc) {
4190 +                       if (mptcp_time_wait(sk, tcptw)) {
4191 +                               inet_twsk_free(tw);
4192 +                               goto exit;
4193 +                       }
4194 +               } else {
4195 +                       tcptw->mptcp_tw = NULL;
4196 +               }
4197 +
4198  #if IS_ENABLED(CONFIG_IPV6)
4199                 if (tw->tw_family == PF_INET6) {
4200                         struct ipv6_pinfo *np = inet6_sk(sk);
4201 @@ -347,15 +381,19 @@
4202                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
4203         }
4204
4205 +exit:
4206         tcp_update_metrics(sk);
4207 +tcp_done:
4208         tcp_done(sk);
4209  }
4210
4211  void tcp_twsk_destructor(struct sock *sk)
4212  {
4213 -#ifdef CONFIG_TCP_MD5SIG
4214         struct tcp_timewait_sock *twsk = tcp_twsk(sk);
4215
4216 +       if (twsk->mptcp_tw)
4217 +               mptcp_twsk_destructor(twsk);
4218 +#ifdef CONFIG_TCP_MD5SIG
4219         if (twsk->tw_md5_key)
4220                 kfree_rcu(twsk->tw_md5_key, rcu);
4221  #endif
4222 @@ -392,6 +430,9 @@
4223
4224                 newtp->snd_sml = newtp->snd_una =
4225                 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
4226 +#ifdef CONFIG_MPTCP
4227 +               memset(&newtp->rcvq_space, 0, sizeof(newtp->rcvq_space));
4228 +#endif
4229
4230                 tcp_prequeue_init(newtp);
4231                 INIT_LIST_HEAD(&newtp->tsq_node);
4232 @@ -436,7 +477,11 @@
4233
4234                 newtp->urg_data = 0;
4235
4236 -               if (sock_flag(newsk, SOCK_KEEPOPEN))
4237 +               /* MPTCP: If we are creating a subflow, KEEPOPEN might have been
4238 +                * set on the meta. But, keepalive is entirely handled at the
4239 +                * meta-socket, so let's keep it there.
4240 +                */
4241 +               if (sock_flag(newsk, SOCK_KEEPOPEN) && is_meta_sk(sk))
4242                         inet_csk_reset_keepalive_timer(newsk,
4243                                                        keepalive_time_when(newtp));
4244
4245 @@ -468,6 +513,8 @@
4246                         newtp->rx_opt.ts_recent_stamp = 0;
4247                         newtp->tcp_header_len = sizeof(struct tcphdr);
4248                 }
4249 +               if (treq->saw_mpc)
4250 +                       newtp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
4251                 newtp->tsoffset = 0;
4252  #ifdef CONFIG_TCP_MD5SIG
4253                 newtp->md5sig_info = NULL;      /*XXX*/
4254 @@ -504,16 +551,20 @@
4255                            bool fastopen)
4256  {
4257         struct tcp_options_received tmp_opt;
4258 +       struct mptcp_options_received mopt;
4259         struct sock *child;
4260         const struct tcphdr *th = tcp_hdr(skb);
4261         __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
4262         bool paws_reject = false;
4263
4264 -       BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
4265 +       BUG_ON(!tcp_sk(sk)->mpc && fastopen == (sk->sk_state == TCP_LISTEN));
4266
4267         tmp_opt.saw_tstamp = 0;
4268 +
4269 +       mptcp_init_mp_opt(&mopt);
4270 +
4271         if (th->doff > (sizeof(struct tcphdr)>>2)) {
4272 -               tcp_parse_options(skb, &tmp_opt, 0, NULL);
4273 +               tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
4274
4275                 if (tmp_opt.saw_tstamp) {
4276                         tmp_opt.ts_recent = req->ts_recent;
4277 @@ -552,7 +603,14 @@
4278                  *
4279                  * Reset timer after retransmitting SYNACK, similar to
4280                  * the idea of fast retransmit in recovery.
4281 +                *
4282 +                * Fall back to TCP if MP_CAPABLE is not set.
4283                  */
4284 +
4285 +               if (tcp_rsk(req)->saw_mpc && !mopt.saw_mpc)
4286 +                       tcp_rsk(req)->saw_mpc = false;
4287 +
4288 +
4289                 if (!inet_rtx_syn_ack(sk, req))
4290                         req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
4291                                            TCP_RTO_MAX) + jiffies;
4292 @@ -674,7 +732,20 @@
4293
4294         /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
4295         if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
4296 -           TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
4297 +           TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1 &&
4298 +           /* TODO MPTCP:
4299 +            * We do this here, because otherwise options sent in the third ack,
4300 +            * or duplicate fourth ack will get lost. Options like MP_PRIO, ADD_ADDR,...
4301 +            *
4302 +            * We could store them in request_sock, but this would mean that we
4303 +            * have to put tcp_options_received and mptcp_options_received in there,
4304 +            * increasing considerably the size of the request-sock.
4305 +            *
4306 +            * As soon as we have reworked the request-sock MPTCP-fields and
4307 +            * created a mptcp_request_sock structure, we can handle options
4308 +            * correclty there without increasing request_sock.
4309 +            */
4310 +           !tcp_rsk(req)->saw_mpc) {
4311                 inet_rsk(req)->acked = 1;
4312                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
4313                 return NULL;
4314 @@ -686,10 +757,29 @@
4315          * ESTABLISHED STATE. If it will be dropped after
4316          * socket is created, wait for troubles.
4317          */
4318 -       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
4319 +#ifdef CONFIG_MPTCP
4320 +       if (tcp_sk(sk)->mpc)
4321 +               /* MPTCP: We call the mptcp-specific syn_recv_sock */
4322 +               child = tcp_sk(sk)->mpcb->syn_recv_sock(sk, skb, req, NULL);
4323 +       else
4324 +#endif
4325 +               child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
4326 +                               req, NULL);
4327 +
4328         if (child == NULL)
4329                 goto listen_overflow;
4330
4331 +       if (!is_meta_sk(sk)) {
4332 +               int ret = mptcp_check_req_master(sk, child, req, prev, &mopt);
4333 +               if (ret < 0)
4334 +                       goto listen_overflow;
4335 +
4336 +               /* MPTCP-supported */
4337 +               if (!ret)
4338 +                       return tcp_sk(child)->mpcb->master_sk;
4339 +       } else {
4340 +               return mptcp_check_req_child(sk, child, req, prev, &mopt);
4341 +       }
4342         inet_csk_reqsk_queue_unlink(sk, req, prev);
4343         inet_csk_reqsk_queue_removed(sk, req);
4344
4345 @@ -739,8 +829,9 @@
4346  {
4347         int ret = 0;
4348         int state = child->sk_state;
4349 +       struct sock *meta_sk = tcp_sk(child)->mpc ? mptcp_meta_sk(child) : child;
4350
4351 -       if (!sock_owned_by_user(child)) {
4352 +       if (!sock_owned_by_user(meta_sk)) {
4353                 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
4354                                             skb->len);
4355                 /* Wakeup parent, send SIGIO */
4356 @@ -751,10 +842,14 @@
4357                  * in main socket hash table and lock on listening
4358                  * socket does not protect us more.
4359                  */
4360 -               __sk_add_backlog(child, skb);
4361 +               if (tcp_sk(child)->mpc)
4362 +                       skb->sk = child;
4363 +               __sk_add_backlog(meta_sk, skb);
4364         }
4365
4366 -       bh_unlock_sock(child);
4367 +       if (tcp_sk(child)->mpc)
4368 +               bh_unlock_sock(child);
4369 +       bh_unlock_sock(meta_sk);
4370         sock_put(child);
4371         return ret;
4372  }
4373 diff -Nur linux-3.14.45.orig/net/ipv4/tcp_output.c linux-3.14.45/net/ipv4/tcp_output.c
4374 --- linux-3.14.45.orig/net/ipv4/tcp_output.c    2015-06-23 02:01:36.000000000 +0200
4375 +++ linux-3.14.45/net/ipv4/tcp_output.c 2015-06-24 14:15:48.887862480 +0200
4376 @@ -36,6 +36,12 @@
4377
4378  #define pr_fmt(fmt) "TCP: " fmt
4379
4380 +#include <net/mptcp.h>
4381 +#include <net/mptcp_v4.h>
4382 +#if IS_ENABLED(CONFIG_IPV6)
4383 +#include <net/mptcp_v6.h>
4384 +#endif
4385 +#include <net/ipv6.h>
4386  #include <net/tcp.h>
4387
4388  #include <linux/compiler.h>
4389 @@ -72,7 +78,7 @@
4390                            int push_one, gfp_t gfp);
4391
4392  /* Account for new data that has been sent to the network. */
4393 -static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
4394 +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
4395  {
4396         struct inet_connection_sock *icsk = inet_csk(sk);
4397         struct tcp_sock *tp = tcp_sk(sk);
4398 @@ -211,7 +217,7 @@
4399  void tcp_select_initial_window(int __space, __u32 mss,
4400                                __u32 *rcv_wnd, __u32 *window_clamp,
4401                                int wscale_ok, __u8 *rcv_wscale,
4402 -                              __u32 init_rcv_wnd)
4403 +                              __u32 init_rcv_wnd, const struct sock *sk)
4404  {
4405         unsigned int space = (__space < 0 ? 0 : __space);
4406
4407 @@ -266,11 +272,15 @@
4408   * value can be stuffed directly into th->window for an outgoing
4409   * frame.
4410   */
4411 -static u16 tcp_select_window(struct sock *sk)
4412 +u16 tcp_select_window(struct sock *sk)
4413  {
4414         struct tcp_sock *tp = tcp_sk(sk);
4415 -       u32 cur_win = tcp_receive_window(tp);
4416 -       u32 new_win = __tcp_select_window(sk);
4417 +       /* The window must never shrink at the meta-level. At the subflow we
4418 +        * have to allow this. Otherwise we may announce a window too large
4419 +        * for the current meta-level sk_rcvbuf.
4420 +        */
4421 +       u32 cur_win = tcp_receive_window(tp->mpc ? tcp_sk(mptcp_meta_sk(sk)) : tp);
4422 +       u32 new_win = tp->__select_window(sk);
4423
4424         /* Never shrink the offered window */
4425         if (new_win < cur_win) {
4426 @@ -283,6 +293,7 @@
4427                  */
4428                 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
4429         }
4430 +
4431         tp->rcv_wnd = new_win;
4432         tp->rcv_wup = tp->rcv_nxt;
4433
4434 @@ -361,7 +372,7 @@
4435  /* Constructs common control bits of non-data skb. If SYN/FIN is present,
4436   * auto increment end seqno.
4437   */
4438 -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
4439 +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
4440  {
4441         struct skb_shared_info *shinfo = skb_shinfo(skb);
4442
4443 @@ -381,7 +392,7 @@
4444         TCP_SKB_CB(skb)->end_seq = seq;
4445  }
4446
4447 -static inline bool tcp_urg_mode(const struct tcp_sock *tp)
4448 +bool tcp_urg_mode(const struct tcp_sock *tp)
4449  {
4450         return tp->snd_una != tp->snd_up;
4451  }
4452 @@ -391,17 +402,7 @@
4453  #define OPTION_MD5             (1 << 2)
4454  #define OPTION_WSCALE          (1 << 3)
4455  #define OPTION_FAST_OPEN_COOKIE        (1 << 8)
4456 -
4457 -struct tcp_out_options {
4458 -       u16 options;            /* bit field of OPTION_* */
4459 -       u16 mss;                /* 0 to disable */
4460 -       u8 ws;                  /* window scale, 0 to disable */
4461 -       u8 num_sack_blocks;     /* number of SACK blocks to include */
4462 -       u8 hash_size;           /* bytes in hash_location */
4463 -       __u8 *hash_location;    /* temporary pointer, overloaded */
4464 -       __u32 tsval, tsecr;     /* need to include OPTION_TS */
4465 -       struct tcp_fastopen_cookie *fastopen_cookie;    /* Fast open cookie */
4466 -};
4467 +/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */
4468
4469  /* Write previously computed TCP options to the packet.
4470   *
4471 @@ -417,7 +418,7 @@
4472   * (but it may well be that other scenarios fail similarly).
4473   */
4474  static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
4475 -                             struct tcp_out_options *opts)
4476 +                             struct tcp_out_options *opts, struct sk_buff *skb)
4477  {
4478         u16 options = opts->options;    /* mungable copy */
4479
4480 @@ -500,6 +501,9 @@
4481                 }
4482                 ptr += (foc->len + 3) >> 2;
4483         }
4484 +
4485 +       if (unlikely(OPTION_MPTCP & opts->options))
4486 +               mptcp_options_write(ptr, tp, opts, skb);
4487  }
4488
4489  /* Compute TCP options for SYN packets. This is not the final
4490 @@ -551,6 +555,8 @@
4491                 if (unlikely(!(OPTION_TS & opts->options)))
4492                         remaining -= TCPOLEN_SACKPERM_ALIGNED;
4493         }
4494 +       if (tp->request_mptcp || tp->mpc)
4495 +               mptcp_syn_options(sk, opts, &remaining);
4496
4497         if (fastopen && fastopen->cookie.len >= 0) {
4498                 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
4499 @@ -624,6 +630,9 @@
4500                 }
4501         }
4502
4503 +       if (tcp_rsk(req)->saw_mpc)
4504 +               mptcp_synack_options(req, opts, &remaining);
4505 +
4506         return MAX_TCP_OPTION_SPACE - remaining;
4507  }
4508
4509 @@ -657,16 +666,22 @@
4510                 opts->tsecr = tp->rx_opt.ts_recent;
4511                 size += TCPOLEN_TSTAMP_ALIGNED;
4512         }
4513 +       if (tp->mpc)
4514 +               mptcp_established_options(sk, skb, opts, &size);
4515
4516         eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
4517         if (unlikely(eff_sacks)) {
4518 -               const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
4519 -               opts->num_sack_blocks =
4520 -                       min_t(unsigned int, eff_sacks,
4521 -                             (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
4522 -                             TCPOLEN_SACK_PERBLOCK);
4523 -               size += TCPOLEN_SACK_BASE_ALIGNED +
4524 -                       opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
4525 +               const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
4526 +               if (remaining < TCPOLEN_SACK_BASE_ALIGNED)
4527 +                       opts->num_sack_blocks = 0;
4528 +               else
4529 +                       opts->num_sack_blocks =
4530 +                           min_t(unsigned int, eff_sacks,
4531 +                                 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
4532 +                                 TCPOLEN_SACK_PERBLOCK);
4533 +               if (opts->num_sack_blocks)
4534 +                       size += TCPOLEN_SACK_BASE_ALIGNED +
4535 +                           opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
4536         }
4537
4538         return size;
4539 @@ -714,7 +729,7 @@
4540         unsigned long flags;
4541         struct list_head *q, *n;
4542         struct tcp_sock *tp;
4543 -       struct sock *sk;
4544 +       struct sock *sk, *meta_sk;
4545
4546         local_irq_save(flags);
4547         list_splice_init(&tsq->head, &list);
4548 @@ -725,15 +740,27 @@
4549                 list_del(&tp->tsq_node);
4550
4551                 sk = (struct sock *)tp;
4552 -               bh_lock_sock(sk);
4553 +               meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
4554 +               bh_lock_sock(meta_sk);
4555
4556 -               if (!sock_owned_by_user(sk)) {
4557 +               if (!sock_owned_by_user(meta_sk)) {
4558                         tcp_tsq_handler(sk);
4559 +                       if (tp->mpc)
4560 +                               tcp_tsq_handler(meta_sk);
4561                 } else {
4562                         /* defer the work to tcp_release_cb() */
4563                         set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
4564 +
4565 +                       /* For MPTCP, we set the tsq-bit on the meta, and the
4566 +                        * subflow as we don't know if the limitation happened
4567 +                        * while inside mptcp_write_xmit or during tcp_write_xmit.
4568 +                        */
4569 +                       if (tp->mpc) {
4570 +                               set_bit(TCP_TSQ_DEFERRED, &tcp_sk(meta_sk)->tsq_flags);
4571 +                               mptcp_tsq_flags(sk);
4572 +                       }
4573                 }
4574 -               bh_unlock_sock(sk);
4575 +               bh_unlock_sock(meta_sk);
4576
4577                 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
4578                 sk_free(sk);
4579 @@ -743,7 +770,10 @@
4580  #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) |          \
4581                           (1UL << TCP_WRITE_TIMER_DEFERRED) |   \
4582                           (1UL << TCP_DELACK_TIMER_DEFERRED) |  \
4583 -                         (1UL << TCP_MTU_REDUCED_DEFERRED))
4584 +                         (1UL << TCP_MTU_REDUCED_DEFERRED) |   \
4585 +                         (1UL << MPTCP_PATH_MANAGER) |         \
4586 +                         (1UL << MPTCP_SUB_DEFERRED))
4587 +
4588  /**
4589   * tcp_release_cb - tcp release_sock() callback
4590   * @sk: socket
4591 @@ -790,6 +820,13 @@
4592                 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
4593                 __sock_put(sk);
4594         }
4595 +       if (flags & (1UL << MPTCP_PATH_MANAGER)) {
4596 +               if (tcp_sk(sk)->mpcb->pm_ops->release_sock)
4597 +                       tcp_sk(sk)->mpcb->pm_ops->release_sock(sk);
4598 +               __sock_put(sk);
4599 +       }
4600 +       if (flags & (1UL << MPTCP_SUB_DEFERRED))
4601 +               mptcp_tsq_sub_deferred(sk);
4602  }
4603  EXPORT_SYMBOL(tcp_release_cb);
4604
4605 @@ -849,8 +886,8 @@
4606   * We are working here with either a clone of the original
4607   * SKB, or a fresh unique copy made by the retransmit engine.
4608   */
4609 -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
4610 -                           gfp_t gfp_mask)
4611 +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
4612 +                       gfp_t gfp_mask)
4613  {
4614         const struct inet_connection_sock *icsk = inet_csk(sk);
4615         struct inet_sock *inet;
4616 @@ -878,10 +915,28 @@
4617                         NET_INC_STATS(sock_net(sk),
4618                                       LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
4619
4620 -               if (unlikely(skb_cloned(skb)))
4621 -                       skb = pskb_copy(skb, gfp_mask);
4622 -               else
4623 +               if (unlikely(skb_cloned(skb))) {
4624 +                       struct sk_buff *newskb;
4625 +                       if (mptcp_is_data_seq(skb))
4626 +                               skb_push(skb, MPTCP_SUB_LEN_DSS_ALIGN +
4627 +                                             MPTCP_SUB_LEN_ACK_ALIGN +
4628 +                                             MPTCP_SUB_LEN_SEQ_ALIGN);
4629 +
4630 +                       newskb = pskb_copy(skb, gfp_mask);
4631 +
4632 +                       if (mptcp_is_data_seq(skb)) {
4633 +                               skb_pull(skb, MPTCP_SUB_LEN_DSS_ALIGN +
4634 +                                             MPTCP_SUB_LEN_ACK_ALIGN +
4635 +                                             MPTCP_SUB_LEN_SEQ_ALIGN);
4636 +                               if (newskb)
4637 +                                       skb_pull(newskb, MPTCP_SUB_LEN_DSS_ALIGN +
4638 +                                                        MPTCP_SUB_LEN_ACK_ALIGN +
4639 +                                                        MPTCP_SUB_LEN_SEQ_ALIGN);
4640 +                       }
4641 +                       skb = newskb;
4642 +               } else {
4643                         skb = skb_clone(skb, gfp_mask);
4644 +               }
4645                 if (unlikely(!skb))
4646                         return -ENOBUFS;
4647         }
4648 @@ -929,7 +984,7 @@
4649                  */
4650                 th->window      = htons(min(tp->rcv_wnd, 65535U));
4651         } else {
4652 -               th->window      = htons(tcp_select_window(sk));
4653 +               th->window      = htons(tp->select_window(sk));
4654         }
4655         th->check               = 0;
4656         th->urg_ptr             = 0;
4657 @@ -945,7 +1000,7 @@
4658                 }
4659         }
4660
4661 -       tcp_options_write((__be32 *)(th + 1), tp, &opts);
4662 +       tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
4663         if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
4664                 TCP_ECN_send(sk, skb, tcp_header_size);
4665
4666 @@ -984,7 +1039,7 @@
4667   * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
4668   * otherwise socket can stall.
4669   */
4670 -static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
4671 +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
4672  {
4673         struct tcp_sock *tp = tcp_sk(sk);
4674
4675 @@ -997,15 +1052,16 @@
4676  }
4677
4678  /* Initialize TSO segments for a packet. */
4679 -static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
4680 -                                unsigned int mss_now)
4681 +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
4682 +                         unsigned int mss_now)
4683  {
4684         struct skb_shared_info *shinfo = skb_shinfo(skb);
4685
4686         /* Make sure we own this skb before messing gso_size/gso_segs */
4687         WARN_ON_ONCE(skb_cloned(skb));
4688
4689 -       if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
4690 +       if (skb->len <= mss_now || (is_meta_sk(sk) && !mptcp_sk_can_gso(sk)) ||
4691 +           (!is_meta_sk(sk) && !sk_can_gso(sk)) || skb->ip_summed == CHECKSUM_NONE) {
4692                 /* Avoid the costly divide in the normal
4693                  * non-TSO case.
4694                  */
4695 @@ -1037,7 +1093,7 @@
4696  /* Pcount in the middle of the write queue got changed, we need to do various
4697   * tweaks to fix counters
4698   */
4699 -static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
4700 +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
4701  {
4702         struct tcp_sock *tp = tcp_sk(sk);
4703
4704 @@ -1078,6 +1134,9 @@
4705         int nlen;
4706         u8 flags;
4707
4708 +       if (tcp_sk(sk)->mpc && mptcp_is_data_seq(skb))
4709 +               mptcp_fragment(sk, skb, len, mss_now, 0);
4710 +
4711         if (WARN_ON(len > skb->len))
4712                 return -EINVAL;
4713
4714 @@ -1160,7 +1219,7 @@
4715   * eventually). The difference is that pulled data not copied, but
4716   * immediately discarded.
4717   */
4718 -static void __pskb_trim_head(struct sk_buff *skb, int len)
4719 +void __pskb_trim_head(struct sk_buff *skb, int len)
4720  {
4721         struct skb_shared_info *shinfo;
4722         int i, k, eat;
4723 @@ -1201,6 +1260,9 @@
4724  /* Remove acked data from a packet in the transmit queue. */
4725  int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
4726  {
4727 +       if (tcp_sk(sk)->mpc && !is_meta_sk(sk) && mptcp_is_data_seq(skb))
4728 +               return mptcp_trim_head(sk, skb, len);
4729 +
4730         if (skb_unclone(skb, GFP_ATOMIC))
4731                 return -ENOMEM;
4732
4733 @@ -1218,6 +1280,15 @@
4734         if (tcp_skb_pcount(skb) > 1)
4735                 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
4736
4737 +#ifdef CONFIG_MPTCP
4738 +       /* Some data got acked - we assume that the seq-number reached the dest.
4739 +        * Anyway, our MPTCP-option has been trimmed above - we lost it here.
4740 +        * Only remove the SEQ if the call does not come from a meta retransmit.
4741 +        */
4742 +       if (tcp_sk(sk)->mpc && !is_meta_sk(sk))
4743 +               TCP_SKB_CB(skb)->mptcp_flags &= ~MPTCPHDR_SEQ;
4744 +#endif
4745 +
4746         return 0;
4747  }
4748
4749 @@ -1377,7 +1448,7 @@
4750  }
4751
4752  /* Congestion window validation. (RFC2861) */
4753 -static void tcp_cwnd_validate(struct sock *sk)
4754 +void tcp_cwnd_validate(struct sock *sk)
4755  {
4756         struct tcp_sock *tp = tcp_sk(sk);
4757
4758 @@ -1411,8 +1482,8 @@
4759   * But we can avoid doing the divide again given we already have
4760   *  skb_pcount = skb->len / mss_now
4761   */
4762 -static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
4763 -                               const struct sk_buff *skb)
4764 +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
4765 +                        const struct sk_buff *skb)
4766  {
4767         if (skb->len < tcp_skb_pcount(skb) * mss_now)
4768                 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
4769 @@ -1433,19 +1504,28 @@
4770                  (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
4771  }
4772  /* Returns the portion of skb which can be sent right away */
4773 -static unsigned int tcp_mss_split_point(const struct sock *sk,
4774 -                                       const struct sk_buff *skb,
4775 -                                       unsigned int mss_now,
4776 -                                       unsigned int max_segs,
4777 -                                       int nonagle)
4778 +unsigned int tcp_mss_split_point(const struct sock *sk,
4779 +                                const struct sk_buff *skb,
4780 +                                unsigned int mss_now,
4781 +                                unsigned int max_segs,
4782 +                                int nonagle)
4783  {
4784         const struct tcp_sock *tp = tcp_sk(sk);
4785 +       const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
4786         u32 partial, needed, window, max_len;
4787
4788 -       window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
4789 +       if (!tp->mpc)
4790 +               window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
4791 +       else
4792 +               /* We need to evaluate the available space in the sending window
4793 +                * at the subflow level. However, the subflow seq has not yet
4794 +                * been set. Nevertheless we know that the caller will set it to
4795 +                * write_seq.
4796 +                */
4797 +               window = tcp_wnd_end(tp) - tp->write_seq;
4798         max_len = mss_now * max_segs;
4799
4800 -       if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
4801 +       if (likely(max_len <= window && skb != tcp_write_queue_tail(meta_sk)))
4802                 return max_len;
4803
4804         needed = min(skb->len, window);
4805 @@ -1467,13 +1547,14 @@
4806  /* Can at least one segment of SKB be sent right now, according to the
4807   * congestion window rules?  If so, return how many segments are allowed.
4808   */
4809 -static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
4810 -                                        const struct sk_buff *skb)
4811 +unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
4812 +                          const struct sk_buff *skb)
4813  {
4814         u32 in_flight, cwnd;
4815
4816         /* Don't be strict about the congestion window for the final FIN.  */
4817 -       if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
4818 +       if (skb &&
4819 +           ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb)) &&
4820             tcp_skb_pcount(skb) == 1)
4821                 return 1;
4822
4823 @@ -1489,8 +1570,8 @@
4824   * This must be invoked the first time we consider transmitting
4825   * SKB onto the wire.
4826   */
4827 -static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
4828 -                            unsigned int mss_now)
4829 +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
4830 +                     unsigned int mss_now)
4831  {
4832         int tso_segs = tcp_skb_pcount(skb);
4833
4834 @@ -1505,8 +1586,8 @@
4835  /* Return true if the Nagle test allows this packet to be
4836   * sent now.
4837   */
4838 -static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
4839 -                                 unsigned int cur_mss, int nonagle)
4840 +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
4841 +                   unsigned int cur_mss, int nonagle)
4842  {
4843         /* Nagle rule does not apply to frames, which sit in the middle of the
4844          * write_queue (they have no chances to get new data).
4845 @@ -1518,7 +1599,8 @@
4846                 return true;
4847
4848         /* Don't use the nagle rule for urgent data (or for the final FIN). */
4849 -       if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
4850 +       if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
4851 +           mptcp_is_data_fin(skb))
4852                 return true;
4853
4854         if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
4855 @@ -1528,9 +1610,8 @@
4856  }
4857
4858  /* Does at least the first segment of SKB fit into the send window? */
4859 -static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
4860 -                            const struct sk_buff *skb,
4861 -                            unsigned int cur_mss)
4862 +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
4863 +                     unsigned int cur_mss)
4864  {
4865         u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4866
4867 @@ -1549,14 +1630,16 @@
4868  {
4869         const struct tcp_sock *tp = tcp_sk(sk);
4870         unsigned int cwnd_quota;
4871 +       const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
4872 +       const struct tcp_sock *meta_tp = tcp_sk(meta_sk);
4873
4874 -       tcp_init_tso_segs(sk, skb, cur_mss);
4875 +       tcp_init_tso_segs(meta_sk, skb, cur_mss);
4876
4877 -       if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
4878 +       if (!tcp_nagle_test(meta_tp, skb, cur_mss, nonagle))
4879                 return 0;
4880
4881         cwnd_quota = tcp_cwnd_test(tp, skb);
4882 -       if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
4883 +       if (cwnd_quota && !tcp_snd_wnd_test(meta_tp, skb, cur_mss))
4884                 cwnd_quota = 0;
4885
4886         return cwnd_quota;
4887 @@ -1566,12 +1649,16 @@
4888  bool tcp_may_send_now(struct sock *sk)
4889  {
4890         const struct tcp_sock *tp = tcp_sk(sk);
4891 -       struct sk_buff *skb = tcp_send_head(sk);
4892 +       struct sk_buff *skb;
4893 +       const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
4894 +       const struct tcp_sock *meta_tp = tcp_sk(meta_sk);
4895 +
4896 +       skb = tcp_send_head(meta_sk);
4897
4898         return skb &&
4899                 tcp_snd_test(sk, skb, tcp_current_mss(sk),
4900 -                            (tcp_skb_is_last(sk, skb) ?
4901 -                             tp->nonagle : TCP_NAGLE_PUSH));
4902 +                            (tcp_skb_is_last(meta_sk, skb) ?
4903 +                             meta_tp->nonagle : TCP_NAGLE_PUSH));
4904  }
4905
4906  /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
4907 @@ -1588,6 +1675,9 @@
4908         int nlen = skb->len - len;
4909         u8 flags;
4910
4911 +       if (tcp_sk(sk)->mpc && mptcp_is_data_seq(skb))
4912 +               mptso_fragment(sk, skb, len, mss_now, gfp, 0);
4913 +
4914         /* All of a TSO frame must be composed of paged data.  */
4915         if (skb->len != skb->data_len)
4916                 return tcp_fragment(sk, skb, len, mss_now);
4917 @@ -1633,29 +1723,39 @@
4918   *
4919   * This algorithm is from John Heffner.
4920   */
4921 -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
4922 +bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
4923  {
4924         struct tcp_sock *tp = tcp_sk(sk);
4925 +       struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
4926 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
4927         const struct inet_connection_sock *icsk = inet_csk(sk);
4928         u32 send_win, cong_win, limit, in_flight;
4929         int win_divisor;
4930
4931 -       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4932 +       if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb))
4933                 goto send_now;
4934
4935         if (icsk->icsk_ca_state != TCP_CA_Open)
4936                 goto send_now;
4937
4938         /* Defer for less than two clock ticks. */
4939 -       if (tp->tso_deferred &&
4940 -           (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
4941 +       if (meta_tp->tso_deferred &&
4942 +           (((u32)jiffies << 1) >> 1) - (meta_tp->tso_deferred >> 1) > 1)
4943                 goto send_now;
4944
4945         in_flight = tcp_packets_in_flight(tp);
4946
4947         BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
4948
4949 -       send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
4950 +       if (!tp->mpc)
4951 +               send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
4952 +       else
4953 +               /* We need to evaluate the available space in the sending window
4954 +                * at the subflow level. However, the subflow seq has not yet
4955 +                * been set. Nevertheless we know that the caller will set it to
4956 +                * write_seq.
4957 +                */
4958 +               send_win = tcp_wnd_end(tp) - tp->write_seq;
4959
4960         /* From in_flight test above, we know that cwnd > in_flight.  */
4961         cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
4962 @@ -1668,7 +1768,7 @@
4963                 goto send_now;
4964
4965         /* Middle in queue won't get any more data, full sendable already? */
4966 -       if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
4967 +       if ((skb != tcp_write_queue_tail(meta_sk)) && (limit >= skb->len))
4968                 goto send_now;
4969
4970         win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
4971 @@ -1694,13 +1794,13 @@
4972         /* Ok, it looks like it is advisable to defer.
4973          * Do not rearm the timer if already set to not break TCP ACK clocking.
4974          */
4975 -       if (!tp->tso_deferred)
4976 -               tp->tso_deferred = 1 | (jiffies << 1);
4977 +       if (!meta_tp->tso_deferred)
4978 +               meta_tp->tso_deferred = 1 | (jiffies << 1);
4979
4980         return true;
4981
4982  send_now:
4983 -       tp->tso_deferred = 0;
4984 +       meta_tp->tso_deferred = 0;
4985         return false;
4986  }
4987
4988 @@ -1713,7 +1813,7 @@
4989   *         1 if a probe was sent,
4990   *         -1 otherwise
4991   */
4992 -static int tcp_mtu_probe(struct sock *sk)
4993 +int tcp_mtu_probe(struct sock *sk)
4994  {
4995         struct tcp_sock *tp = tcp_sk(sk);
4996         struct inet_connection_sock *icsk = inet_csk(sk);
4997 @@ -1858,6 +1958,9 @@
4998         int cwnd_quota;
4999         int result;
5000
5001 +       if (is_meta_sk(sk))
5002 +               return mptcp_write_xmit(sk, mss_now, nonagle, push_one, gfp);
5003 +
5004         sent_pkts = 0;
5005
5006         if (!push_one) {
5007 @@ -2314,6 +2417,10 @@
5008         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
5009                 return;
5010
5011 +       /* Currently not supported for MPTCP - but it should be possible */
5012 +       if (tp->mpc)
5013 +               return;
5014 +
5015         tcp_for_write_queue_from_safe(skb, tmp, sk) {
5016                 if (!tcp_can_collapse(sk, skb))
5017                         break;
5018 @@ -2411,10 +2518,26 @@
5019          */
5020         if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
5021                      skb_headroom(skb) >= 0xFFFF)) {
5022 -               struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
5023 -                                                  GFP_ATOMIC);
5024 +               struct sk_buff *nskb;
5025 +
5026 +               if (mptcp_is_data_seq(skb))
5027 +                       skb_push(skb, MPTCP_SUB_LEN_DSS_ALIGN +
5028 +                                     MPTCP_SUB_LEN_ACK_ALIGN +
5029 +                                     MPTCP_SUB_LEN_SEQ_ALIGN);
5030 +
5031 +               nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
5032 +
5033 +               if (mptcp_is_data_seq(skb)) {
5034 +                       skb_pull(skb, MPTCP_SUB_LEN_DSS_ALIGN +
5035 +                                     MPTCP_SUB_LEN_ACK_ALIGN +
5036 +                                     MPTCP_SUB_LEN_SEQ_ALIGN);
5037 +                       if (nskb)
5038 +                               skb_pull(nskb, MPTCP_SUB_LEN_DSS_ALIGN +
5039 +                                              MPTCP_SUB_LEN_ACK_ALIGN +
5040 +                                              MPTCP_SUB_LEN_SEQ_ALIGN);
5041 +               }
5042                 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
5043 -                            -ENOBUFS;
5044 +                             -ENOBUFS;
5045         } else {
5046                 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
5047         }
5048 @@ -2665,6 +2788,11 @@
5049  {
5050         struct sk_buff *skb;
5051
5052 +       if (is_meta_sk(sk)) {
5053 +               mptcp_send_active_reset(sk, priority);
5054 +               return;
5055 +       }
5056 +
5057         /* NOTE: No TCP options attached and we never retransmit this. */
5058         skb = alloc_skb(MAX_TCP_HEADER, priority);
5059         if (!skb) {
5060 @@ -2767,14 +2895,14 @@
5061                     (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
5062                         req->window_clamp = tcp_full_space(sk);
5063
5064 -               /* tcp_full_space because it is guaranteed to be the first packet */
5065 -               tcp_select_initial_window(tcp_full_space(sk),
5066 -                       mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
5067 +               tp->select_initial_window(tcp_full_space(sk),
5068 +                       mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) -
5069 +                       (tcp_rsk(req)->saw_mpc ? MPTCP_SUB_LEN_DSM_ALIGN : 0),
5070                         &req->rcv_wnd,
5071                         &req->window_clamp,
5072                         ireq->wscale_ok,
5073                         &rcv_wscale,
5074 -                       dst_metric(dst, RTAX_INITRWND));
5075 +                       dst_metric(dst, RTAX_INITRWND), sk);
5076                 ireq->rcv_wscale = rcv_wscale;
5077         }
5078
5079 @@ -2810,7 +2938,7 @@
5080
5081         /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
5082         th->window = htons(min(req->rcv_wnd, 65535U));
5083 -       tcp_options_write((__be32 *)(th + 1), tp, &opts);
5084 +       tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
5085         th->doff = (tcp_header_size >> 2);
5086         TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
5087
5088 @@ -2866,13 +2994,13 @@
5089             (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
5090                 tp->window_clamp = tcp_full_space(sk);
5091
5092 -       tcp_select_initial_window(tcp_full_space(sk),
5093 +       tp->select_initial_window(tcp_full_space(sk),
5094                                   tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
5095                                   &tp->rcv_wnd,
5096                                   &tp->window_clamp,
5097                                   sysctl_tcp_window_scaling,
5098                                   &rcv_wscale,
5099 -                                 dst_metric(dst, RTAX_INITRWND));
5100 +                                 dst_metric(dst, RTAX_INITRWND), sk);
5101
5102         tp->rx_opt.rcv_wscale = rcv_wscale;
5103         tp->rcv_ssthresh = tp->rcv_wnd;
5104 @@ -2896,6 +3024,38 @@
5105         inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
5106         inet_csk(sk)->icsk_retransmits = 0;
5107         tcp_clear_retrans(tp);
5108 +
5109 +#ifdef CONFIG_MPTCP
5110 +       if (sysctl_mptcp_enabled && mptcp_doit(sk)) {
5111 +               if (is_master_tp(tp)) {
5112 +                       tp->request_mptcp = 1;
5113 +                       mptcp_connect_init(sk);
5114 +               } else if (tp->mptcp) {
5115 +                       struct inet_sock *inet  = inet_sk(sk);
5116 +
5117 +                       tp->mptcp->snt_isn      = tp->write_seq;
5118 +                       tp->mptcp->init_rcv_wnd = tp->rcv_wnd;
5119 +
5120 +                       /* Set nonce for new subflows */
5121 +                       if (sk->sk_family == AF_INET)
5122 +                               tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce(
5123 +                                                       inet->inet_saddr,
5124 +                                                       inet->inet_daddr,
5125 +                                                       inet->inet_sport,
5126 +                                                       inet->inet_dport,
5127 +                                                       tp->write_seq);
5128 +#if IS_ENABLED(CONFIG_IPV6)
5129 +                       else
5130 +                               tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce(
5131 +                                               inet6_sk(sk)->saddr.s6_addr32,
5132 +                                               sk->sk_v6_daddr.s6_addr32,
5133 +                                               inet->inet_sport,
5134 +                                               inet->inet_dport,
5135 +                                               tp->write_seq);
5136 +#endif
5137 +               }
5138 +       }
5139 +#endif
5140  }
5141
5142  static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
5143 @@ -3131,6 +3291,7 @@
5144         TCP_SKB_CB(buff)->when = tcp_time_stamp;
5145         tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
5146  }
5147 +EXPORT_SYMBOL(tcp_send_ack);
5148
5149  /* This routine sends a packet with an out of date sequence
5150   * number. It assumes the other end will try to ack it.
5151 @@ -3143,7 +3304,7 @@
5152   * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
5153   * out-of-date with SND.UNA-1 to probe window.
5154   */
5155 -static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
5156 +int tcp_xmit_probe_skb(struct sock *sk, int urgent)
5157  {
5158         struct tcp_sock *tp = tcp_sk(sk);
5159         struct sk_buff *skb;
5160 @@ -3181,6 +3342,9 @@
5161         if (sk->sk_state == TCP_CLOSE)
5162                 return -1;
5163
5164 +       if (is_meta_sk(sk))
5165 +               return mptcp_write_wakeup(sk);
5166 +
5167         if ((skb = tcp_send_head(sk)) != NULL &&
5168             before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
5169                 int err;
5170 diff -Nur linux-3.14.45.orig/net/ipv4/tcp_timer.c linux-3.14.45/net/ipv4/tcp_timer.c
5171 --- linux-3.14.45.orig/net/ipv4/tcp_timer.c     2015-06-23 02:01:36.000000000 +0200
5172 +++ linux-3.14.45/net/ipv4/tcp_timer.c  2015-06-24 14:15:48.891862483 +0200
5173 @@ -20,6 +20,7 @@
5174
5175  #include <linux/module.h>
5176  #include <linux/gfp.h>
5177 +#include <net/mptcp.h>
5178  #include <net/tcp.h>
5179
5180  int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
5181 @@ -32,7 +33,7 @@
5182  int sysctl_tcp_orphan_retries __read_mostly;
5183  int sysctl_tcp_thin_linear_timeouts __read_mostly;
5184
5185 -static void tcp_write_err(struct sock *sk)
5186 +void tcp_write_err(struct sock *sk)
5187  {
5188         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
5189         sk->sk_error_report(sk);
5190 @@ -124,10 +125,8 @@
5191   * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
5192   * syn_set flag is set.
5193   */
5194 -static bool retransmits_timed_out(struct sock *sk,
5195 -                                 unsigned int boundary,
5196 -                                 unsigned int timeout,
5197 -                                 bool syn_set)
5198 +bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
5199 +                          unsigned int timeout, bool syn_set)
5200  {
5201         unsigned int linear_backoff_thresh, start_ts;
5202         unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
5203 @@ -153,7 +152,7 @@
5204  }
5205
5206  /* A write timeout has occurred. Process the after effects. */
5207 -static int tcp_write_timeout(struct sock *sk)
5208 +int tcp_write_timeout(struct sock *sk)
5209  {
5210         struct inet_connection_sock *icsk = inet_csk(sk);
5211         struct tcp_sock *tp = tcp_sk(sk);
5212 @@ -168,6 +167,10 @@
5213                 }
5214                 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
5215                 syn_set = true;
5216 +               /* Stop retransmitting MP_CAPABLE options in SYN if timed out. */
5217 +               if (tcp_sk(sk)->request_mptcp &&
5218 +                   icsk->icsk_retransmits >= mptcp_sysctl_syn_retries())
5219 +                       tcp_sk(sk)->request_mptcp = 0;
5220         } else {
5221                 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
5222                         /* Black hole detection */
5223 @@ -248,18 +251,22 @@
5224  static void tcp_delack_timer(unsigned long data)
5225  {
5226         struct sock *sk = (struct sock *)data;
5227 +       struct tcp_sock *tp = tcp_sk(sk);
5228 +       struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
5229
5230 -       bh_lock_sock(sk);
5231 -       if (!sock_owned_by_user(sk)) {
5232 +       bh_lock_sock(meta_sk);
5233 +       if (!sock_owned_by_user(meta_sk)) {
5234                 tcp_delack_timer_handler(sk);
5235         } else {
5236                 inet_csk(sk)->icsk_ack.blocked = 1;
5237 -               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
5238 +               NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_DELAYEDACKLOCKED);
5239                 /* deleguate our work to tcp_release_cb() */
5240                 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
5241                         sock_hold(sk);
5242 +               if (tp->mpc)
5243 +                       mptcp_tsq_flags(sk);
5244         }
5245 -       bh_unlock_sock(sk);
5246 +       bh_unlock_sock(meta_sk);
5247         sock_put(sk);
5248  }
5249
5250 @@ -421,6 +428,9 @@
5251
5252         tcp_enter_loss(sk, 0);
5253
5254 +       if (tp->mpc)
5255 +               mptcp_reinject_data(sk, 1);
5256 +
5257         if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
5258                 /* Retransmission failed because of local congestion,
5259                  * do not backoff.
5260 @@ -471,6 +481,8 @@
5261                 /* Use normal (exponential) backoff */
5262                 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
5263         }
5264 +       if (tp->mpc)
5265 +               mptcp_set_rto(sk);
5266         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
5267         if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
5268                 __sk_dst_reset(sk);
5269 @@ -502,7 +514,10 @@
5270                 break;
5271         case ICSK_TIME_RETRANS:
5272                 icsk->icsk_pending = 0;
5273 -               tcp_retransmit_timer(sk);
5274 +               if (is_meta_sk(sk))
5275 +                       mptcp_retransmit_timer(sk);
5276 +               else
5277 +                       tcp_retransmit_timer(sk);
5278                 break;
5279         case ICSK_TIME_PROBE0:
5280                 icsk->icsk_pending = 0;
5281 @@ -517,16 +532,19 @@
5282  static void tcp_write_timer(unsigned long data)
5283  {
5284         struct sock *sk = (struct sock *)data;
5285 +       struct sock *meta_sk = tcp_sk(sk)->mpc ? mptcp_meta_sk(sk) : sk;
5286
5287 -       bh_lock_sock(sk);
5288 -       if (!sock_owned_by_user(sk)) {
5289 +       bh_lock_sock(meta_sk);
5290 +       if (!sock_owned_by_user(meta_sk)) {
5291                 tcp_write_timer_handler(sk);
5292         } else {
5293                 /* deleguate our work to tcp_release_cb() */
5294                 if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
5295                         sock_hold(sk);
5296 +               if (tcp_sk(sk)->mpc)
5297 +                       mptcp_tsq_flags(sk);
5298         }
5299 -       bh_unlock_sock(sk);
5300 +       bh_unlock_sock(meta_sk);
5301         sock_put(sk);
5302  }
5303
5304 @@ -563,11 +581,12 @@
5305         struct sock *sk = (struct sock *) data;
5306         struct inet_connection_sock *icsk = inet_csk(sk);
5307         struct tcp_sock *tp = tcp_sk(sk);
5308 +       struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
5309         u32 elapsed;
5310
5311         /* Only process if socket is not in use. */
5312 -       bh_lock_sock(sk);
5313 -       if (sock_owned_by_user(sk)) {
5314 +       bh_lock_sock(meta_sk);
5315 +       if (sock_owned_by_user(meta_sk)) {
5316                 /* Try again later. */
5317                 inet_csk_reset_keepalive_timer (sk, HZ/20);
5318                 goto out;
5319 @@ -578,6 +597,29 @@
5320                 goto out;
5321         }
5322
5323 +       if (tp->send_mp_fclose) {
5324 +               /* MUST do this before tcp_write_timeout, because retrans_stamp
5325 +                * may have been set to 0 in another part while we are
5326 +                * retransmitting MP_FASTCLOSE. Then, we would crash, because
5327 +                * retransmits_timed_out accesses the meta-write-queue.
5328 +                *
5329 +                * We make sure that the timestamp is != 0.
5330 +                */
5331 +               if (!tp->retrans_stamp)
5332 +                       tp->retrans_stamp = tcp_time_stamp ? : 1;
5333 +
5334 +               if (tcp_write_timeout(sk))
5335 +                       goto out;
5336 +
5337 +               tcp_send_ack(sk);
5338 +               icsk->icsk_backoff++;
5339 +               icsk->icsk_retransmits++;
5340 +
5341 +               icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
5342 +               elapsed = icsk->icsk_rto;
5343 +               goto resched;
5344 +       }
5345 +
5346         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
5347                 if (tp->linger2 >= 0) {
5348                         const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
5349 @@ -639,7 +681,7 @@
5350         tcp_done(sk);
5351
5352  out:
5353 -       bh_unlock_sock(sk);
5354 +       bh_unlock_sock(meta_sk);
5355         sock_put(sk);
5356  }
5357
5358 diff -Nur linux-3.14.45.orig/net/ipv6/addrconf.c linux-3.14.45/net/ipv6/addrconf.c
5359 --- linux-3.14.45.orig/net/ipv6/addrconf.c      2015-06-23 02:01:36.000000000 +0200
5360 +++ linux-3.14.45/net/ipv6/addrconf.c   2015-06-24 14:15:48.891862483 +0200
5361 @@ -765,6 +765,7 @@
5362
5363         kfree_rcu(ifp, rcu);
5364  }
5365 +EXPORT_SYMBOL(inet6_ifa_finish_destroy);
5366
5367  static void
5368  ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
5369 diff -Nur linux-3.14.45.orig/net/ipv6/af_inet6.c linux-3.14.45/net/ipv6/af_inet6.c
5370 --- linux-3.14.45.orig/net/ipv6/af_inet6.c      2015-06-23 02:01:36.000000000 +0200
5371 +++ linux-3.14.45/net/ipv6/af_inet6.c   2015-06-24 14:15:48.891862483 +0200
5372 @@ -97,8 +97,7 @@
5373         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
5374  }
5375
5376 -static int inet6_create(struct net *net, struct socket *sock, int protocol,
5377 -                       int kern)
5378 +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern)
5379  {
5380         struct inet_sock *inet;
5381         struct ipv6_pinfo *np;
5382 diff -Nur linux-3.14.45.orig/net/ipv6/inet6_connection_sock.c linux-3.14.45/net/ipv6/inet6_connection_sock.c
5383 --- linux-3.14.45.orig/net/ipv6/inet6_connection_sock.c 2015-06-23 02:01:36.000000000 +0200
5384 +++ linux-3.14.45/net/ipv6/inet6_connection_sock.c      2015-06-24 14:15:48.891862483 +0200
5385 @@ -96,8 +96,8 @@
5386  /*
5387   * request_sock (formerly open request) hash tables.
5388   */
5389 -static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
5390 -                          const u32 rnd, const u32 synq_hsize)
5391 +u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
5392 +                   const u32 rnd, const u32 synq_hsize)
5393  {
5394         u32 c;
5395
5396 diff -Nur linux-3.14.45.orig/net/ipv6/syncookies.c linux-3.14.45/net/ipv6/syncookies.c
5397 --- linux-3.14.45.orig/net/ipv6/syncookies.c    2015-06-23 02:01:36.000000000 +0200
5398 +++ linux-3.14.45/net/ipv6/syncookies.c 2015-06-24 14:15:48.891862483 +0200
5399 @@ -181,7 +181,7 @@
5400
5401         /* check for timestamp cookie support */
5402         memset(&tcp_opt, 0, sizeof(tcp_opt));
5403 -       tcp_parse_options(skb, &tcp_opt, 0, NULL);
5404 +       tcp_parse_options(skb, &tcp_opt, NULL, 0, NULL);
5405
5406         if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
5407                 goto out;
5408 @@ -253,10 +253,10 @@
5409         }
5410
5411         req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
5412 -       tcp_select_initial_window(tcp_full_space(sk), req->mss,
5413 +       tp->select_initial_window(tcp_full_space(sk), req->mss,
5414                                   &req->rcv_wnd, &req->window_clamp,
5415                                   ireq->wscale_ok, &rcv_wscale,
5416 -                                 dst_metric(dst, RTAX_INITRWND));
5417 +                                 dst_metric(dst, RTAX_INITRWND), sk);
5418
5419         ireq->rcv_wscale = rcv_wscale;
5420
5421 diff -Nur linux-3.14.45.orig/net/ipv6/tcp_ipv6.c linux-3.14.45/net/ipv6/tcp_ipv6.c
5422 --- linux-3.14.45.orig/net/ipv6/tcp_ipv6.c      2015-06-23 02:01:36.000000000 +0200
5423 +++ linux-3.14.45/net/ipv6/tcp_ipv6.c   2015-06-24 14:44:57.517799806 +0200
5424 @@ -63,6 +63,8 @@
5425  #include <net/inet_common.h>
5426  #include <net/secure_seq.h>
5427  #include <net/tcp_memcontrol.h>
5428 +#include <net/mptcp.h>
5429 +#include <net/mptcp_v6.h>
5430  #include <net/busy_poll.h>
5431
5432  #include <asm/uaccess.h>
5433 @@ -73,14 +75,6 @@
5434  #include <linux/crypto.h>
5435  #include <linux/scatterlist.h>
5436
5437 -static void    tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
5438 -static void    tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
5439 -                                     struct request_sock *req);
5440 -
5441 -static int     tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
5442 -
5443 -static const struct inet_connection_sock_af_ops ipv6_mapped;
5444 -static const struct inet_connection_sock_af_ops ipv6_specific;
5445  #ifdef CONFIG_TCP_MD5SIG
5446  static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
5447  static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
5448 @@ -92,7 +86,7 @@
5449  }
5450  #endif
5451
5452 -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5453 +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5454  {
5455         struct dst_entry *dst = skb_dst(skb);
5456         const struct rt6_info *rt = (const struct rt6_info *)dst;
5457 @@ -104,7 +98,7 @@
5458                 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
5459  }
5460
5461 -static void tcp_v6_hash(struct sock *sk)
5462 +void tcp_v6_hash(struct sock *sk)
5463  {
5464         if (sk->sk_state != TCP_CLOSE) {
5465                 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
5466 @@ -117,7 +111,7 @@
5467         }
5468  }
5469
5470 -static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
5471 +__u32 tcp_v6_init_sequence(const struct sk_buff *skb)
5472  {
5473         return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
5474                                             ipv6_hdr(skb)->saddr.s6_addr32,
5475 @@ -125,7 +119,7 @@
5476                                             tcp_hdr(skb)->source);
5477  }
5478
5479 -static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
5480 +int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
5481                           int addr_len)
5482  {
5483         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
5484 @@ -339,7 +333,7 @@
5485         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
5486         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
5487         struct ipv6_pinfo *np;
5488 -       struct sock *sk;
5489 +       struct sock *sk, *meta_sk;
5490         int err;
5491         struct tcp_sock *tp;
5492         __u32 seq;
5493 @@ -359,8 +353,14 @@
5494                 return;
5495         }
5496
5497 -       bh_lock_sock(sk);
5498 -       if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
5499 +       tp = tcp_sk(sk);
5500 +       if (tp->mpc)
5501 +               meta_sk = mptcp_meta_sk(sk);
5502 +       else
5503 +               meta_sk = sk;
5504 +
5505 +       bh_lock_sock(meta_sk);
5506 +       if (sock_owned_by_user(meta_sk) && type != ICMPV6_PKT_TOOBIG)
5507                 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
5508
5509         if (sk->sk_state == TCP_CLOSE)
5510 @@ -371,7 +371,6 @@
5511                 goto out;
5512         }
5513
5514 -       tp = tcp_sk(sk);
5515         seq = ntohl(th->seq);
5516         if (sk->sk_state != TCP_LISTEN &&
5517             !between(seq, tp->snd_una, tp->snd_nxt)) {
5518 @@ -401,11 +400,15 @@
5519                         goto out;
5520
5521                 tp->mtu_info = ntohl(info);
5522 -               if (!sock_owned_by_user(sk))
5523 +               if (!sock_owned_by_user(meta_sk))
5524                         tcp_v6_mtu_reduced(sk);
5525 -               else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
5526 +               else {
5527 +                       if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
5528                                            &tp->tsq_flags))
5529 -                       sock_hold(sk);
5530 +                               sock_hold(sk);
5531 +                       if (tp->mpc)
5532 +                               mptcp_tsq_flags(sk);
5533 +               }
5534                 goto out;
5535         }
5536
5537 @@ -415,7 +418,7 @@
5538         switch (sk->sk_state) {
5539                 struct request_sock *req, **prev;
5540         case TCP_LISTEN:
5541 -               if (sock_owned_by_user(sk))
5542 +               if (sock_owned_by_user(meta_sk))
5543                         goto out;
5544
5545                 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
5546 @@ -440,7 +443,7 @@
5547         case TCP_SYN_SENT:
5548         case TCP_SYN_RECV:  /* Cannot happen.
5549                                It can, it SYNs are crossed. --ANK */
5550 -               if (!sock_owned_by_user(sk)) {
5551 +               if (!sock_owned_by_user(meta_sk)) {
5552                         sk->sk_err = err;
5553                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
5554
5555 @@ -450,22 +453,22 @@
5556                 goto out;
5557         }
5558
5559 -       if (!sock_owned_by_user(sk) && np->recverr) {
5560 +       if (!sock_owned_by_user(meta_sk) && np->recverr) {
5561                 sk->sk_err = err;
5562                 sk->sk_error_report(sk);
5563         } else
5564                 sk->sk_err_soft = err;
5565
5566  out:
5567 -       bh_unlock_sock(sk);
5568 +       bh_unlock_sock(meta_sk);
5569         sock_put(sk);
5570  }
5571
5572
5573 -static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
5574 -                             struct flowi6 *fl6,
5575 -                             struct request_sock *req,
5576 -                             u16 queue_mapping)
5577 +int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
5578 +                      struct flowi6 *fl6,
5579 +                      struct request_sock *req,
5580 +                      u16 queue_mapping)
5581  {
5582         struct inet_request_sock *ireq = inet_rsk(req);
5583         struct ipv6_pinfo *np = inet6_sk(sk);
5584 @@ -495,7 +498,7 @@
5585         return err;
5586  }
5587
5588 -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
5589 +int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
5590  {
5591         struct flowi6 fl6;
5592         int res;
5593 @@ -506,7 +509,7 @@
5594         return res;
5595  }
5596
5597 -static void tcp_v6_reqsk_destructor(struct request_sock *req)
5598 +void tcp_v6_reqsk_destructor(struct request_sock *req)
5599  {
5600         kfree_skb(inet_rsk(req)->pktopts);
5601  }
5602 @@ -719,16 +722,16 @@
5603  };
5604
5605  #ifdef CONFIG_TCP_MD5SIG
5606 -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
5607 +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
5608         .md5_lookup     =       tcp_v6_reqsk_md5_lookup,
5609         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
5610  };
5611  #endif
5612
5613 -static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
5614 -                                u32 tsval, u32 tsecr,
5615 +static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack,
5616 +                                u32 data_ack, u32 win, u32 tsval, u32 tsecr,
5617                                  struct tcp_md5sig_key *key, int rst, u8 tclass,
5618 -                                u32 label)
5619 +                                u32 label, int mptcp)
5620  {
5621         const struct tcphdr *th = tcp_hdr(skb);
5622         struct tcphdr *t1;
5623 @@ -746,7 +749,10 @@
5624         if (key)
5625                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
5626  #endif
5627 -
5628 +#ifdef CONFIG_MPTCP
5629 +       if (mptcp)
5630 +               tot_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
5631 +#endif
5632         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
5633                          GFP_ATOMIC);
5634         if (buff == NULL)
5635 @@ -784,6 +790,17 @@
5636                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
5637                                     &ipv6_hdr(skb)->saddr,
5638                                     &ipv6_hdr(skb)->daddr, t1);
5639 +               topt += 4;
5640 +       }
5641 +#endif
5642 +#ifdef CONFIG_MPTCP
5643 +       if (mptcp) {
5644 +               /* Construction of 32-bit data_ack */
5645 +               *topt++ = htonl((TCPOPT_MPTCP << 24) |
5646 +                               ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
5647 +                               (0x20 << 8) |
5648 +                               (0x01));
5649 +               *topt++ = htonl(data_ack);
5650         }
5651  #endif
5652
5653 @@ -821,7 +838,7 @@
5654         kfree_skb(buff);
5655  }
5656
5657 -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
5658 +void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
5659  {
5660         const struct tcphdr *th = tcp_hdr(skb);
5661         u32 seq = 0, ack_seq = 0;
5662 @@ -876,7 +893,7 @@
5663                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
5664                           (th->doff << 2);
5665
5666 -       tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0);
5667 +       tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, 0, key, 1, 0, 0, 0);
5668
5669  #ifdef CONFIG_TCP_MD5SIG
5670  release_sk1:
5671 @@ -887,40 +904,48 @@
5672  #endif
5673  }
5674
5675 -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
5676 +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
5677                             u32 win, u32 tsval, u32 tsecr,
5678 -                           struct tcp_md5sig_key *key, u8 tclass,
5679 -                           u32 label)
5680 +                           struct tcp_md5sig_key *key, u8 tclass, u32 label,
5681 +                           int mptcp)
5682  {
5683         tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass,
5684 -                            label);
5685 +                            label, mptcp);
5686  }
5687
5688  static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
5689  {
5690         struct inet_timewait_sock *tw = inet_twsk(sk);
5691         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
5692 +       u32 data_ack = 0;
5693 +       int mptcp = 0;
5694 +
5695 +       if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw) {
5696 +               data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
5697 +               mptcp = 1;
5698 +       }
5699
5700         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
5701 +                       data_ack,
5702                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
5703                         tcp_time_stamp + tcptw->tw_ts_offset,
5704                         tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
5705 -                       tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
5706 +                       tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), mptcp);
5707
5708         inet_twsk_put(tw);
5709  }
5710
5711 -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
5712 +void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
5713                                   struct request_sock *req)
5714  {
5715         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
5716 -                       req->rcv_wnd, tcp_time_stamp, req->ts_recent,
5717 +                       0, req->rcv_wnd, tcp_time_stamp, req->ts_recent,
5718                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
5719 -                       0, 0);
5720 +                       0, 0, 0);
5721  }
5722
5723
5724 -static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
5725 +struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
5726  {
5727         struct request_sock *req, **prev;
5728         const struct tcphdr *th = tcp_hdr(skb);
5729 @@ -939,7 +964,13 @@
5730
5731         if (nsk) {
5732                 if (nsk->sk_state != TCP_TIME_WAIT) {
5733 +                       /* Don't lock again the meta-sk. It has been locked
5734 +                        * before mptcp_v6_do_rcv.
5735 +                        */
5736 +                       if (tcp_sk(nsk)->mpc && !is_meta_sk(sk))
5737 +                               bh_lock_sock(mptcp_meta_sk(nsk));
5738                         bh_lock_sock(nsk);
5739 +
5740                         return nsk;
5741                 }
5742                 inet_twsk_put(inet_twsk(nsk));
5743 @@ -959,6 +990,7 @@
5744  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
5745  {
5746         struct tcp_options_received tmp_opt;
5747 +       struct mptcp_options_received mopt;
5748         struct request_sock *req;
5749         struct inet_request_sock *ireq;
5750         struct ipv6_pinfo *np = inet6_sk(sk);
5751 @@ -971,6 +1003,23 @@
5752         if (skb->protocol == htons(ETH_P_IP))
5753                 return tcp_v4_conn_request(sk, skb);
5754
5755 +       tcp_clear_options(&tmp_opt);
5756 +       tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
5757 +       tmp_opt.user_mss = tp->rx_opt.user_mss;
5758 +       mptcp_init_mp_opt(&mopt);
5759 +       tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
5760 +
5761 +#ifdef CONFIG_MPTCP
5762 +       /*MPTCP structures not initialized, so return error */
5763 +       if (mptcp_init_failed)
5764 +               mptcp_init_mp_opt(&mopt);
5765 +
5766 +       if (mopt.is_mp_join)
5767 +               return mptcp_do_join_short(skb, &mopt, &tmp_opt, sock_net(sk));
5768 +       if (mopt.drop_me)
5769 +               goto drop;
5770 +#endif
5771 +
5772         if (!ipv6_unicast_destination(skb))
5773                 goto drop;
5774
5775 @@ -986,7 +1035,22 @@
5776                 goto drop;
5777         }
5778
5779 -       req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
5780 +#ifdef CONFIG_MPTCP
5781 +       if (sysctl_mptcp_enabled == MPTCP_APP && !tp->mptcp_enabled)
5782 +               mopt.saw_mpc = 0;
5783 +       if (mopt.saw_mpc && !want_cookie) {
5784 +               req = inet6_reqsk_alloc(&mptcp6_request_sock_ops);
5785 +
5786 +               if (req == NULL)
5787 +                       goto drop;
5788 +
5789 +               mptcp_rsk(req)->mpcb = NULL;
5790 +               mptcp_rsk(req)->dss_csum = mopt.dss_csum;
5791 +               mptcp_rsk(req)->collide_tk.pprev = NULL;
5792 +       } else
5793 +#endif
5794 +               req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
5795 +
5796         if (req == NULL)
5797                 goto drop;
5798
5799 @@ -994,17 +1058,15 @@
5800         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
5801  #endif
5802
5803 -       tcp_clear_options(&tmp_opt);
5804 -       tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
5805 -       tmp_opt.user_mss = tp->rx_opt.user_mss;
5806 -       tcp_parse_options(skb, &tmp_opt, 0, NULL);
5807 -
5808         if (want_cookie && !tmp_opt.saw_tstamp)
5809                 tcp_clear_options(&tmp_opt);
5810
5811         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
5812         tcp_openreq_init(req, &tmp_opt, skb);
5813
5814 +       if (mopt.saw_mpc && !want_cookie)
5815 +               mptcp_reqsk_new_mptcp(req, &tmp_opt, &mopt, skb);
5816 +
5817         ireq = inet_rsk(req);
5818         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
5819         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
5820 @@ -1094,9 +1156,9 @@
5821         return 0; /* don't send reset */
5822  }
5823
5824 -static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
5825 -                                        struct request_sock *req,
5826 -                                        struct dst_entry *dst)
5827 +struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
5828 +                                 struct request_sock *req,
5829 +                                 struct dst_entry *dst)
5830  {
5831         struct inet_request_sock *ireq;
5832         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
5833 @@ -1317,7 +1379,7 @@
5834   * This is because we cannot sleep with the original spinlock
5835   * held.
5836   */
5837 -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
5838 +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
5839  {
5840         struct ipv6_pinfo *np = inet6_sk(sk);
5841         struct tcp_sock *tp;
5842 @@ -1339,6 +1401,9 @@
5843                 goto discard;
5844  #endif
5845
5846 +       if (is_meta_sk(sk))
5847 +               return mptcp_v6_do_rcv(sk, skb);
5848 +
5849         if (sk_filter(sk, skb))
5850                 goto discard;
5851
5852 @@ -1460,7 +1525,7 @@
5853  {
5854         const struct tcphdr *th;
5855         const struct ipv6hdr *hdr;
5856 -       struct sock *sk;
5857 +       struct sock *sk, *meta_sk = NULL;
5858         int ret;
5859         struct net *net = dev_net(skb->dev);
5860
5861 @@ -1491,18 +1556,43 @@
5862         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
5863                                     skb->len - th->doff*4);
5864         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
5865 +#ifdef CONFIG_MPTCP
5866 +       TCP_SKB_CB(skb)->mptcp_flags = 0;
5867 +       TCP_SKB_CB(skb)->dss_off = 0;
5868 +#endif
5869         TCP_SKB_CB(skb)->when = 0;
5870         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
5871         TCP_SKB_CB(skb)->sacked = 0;
5872
5873         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
5874 -       if (!sk)
5875 -               goto no_tcp_socket;
5876
5877  process:
5878 -       if (sk->sk_state == TCP_TIME_WAIT)
5879 +       if (sk && sk->sk_state == TCP_TIME_WAIT)
5880                 goto do_time_wait;
5881
5882 +#ifdef CONFIG_MPTCP
5883 +       if (!sk && th->syn && !th->ack) {
5884 +               int ret = mptcp_lookup_join(skb, NULL);
5885 +
5886 +               if (ret < 0) {
5887 +                       tcp_v6_send_reset(NULL, skb);
5888 +                       goto discard_it;
5889 +               } else if (ret > 0) {
5890 +                       return 0;
5891 +               }
5892 +       }
5893 +
5894 +       /* Is there a pending request sock for this segment ? */
5895 +       if ((!sk || sk->sk_state == TCP_LISTEN) && mptcp_check_req(skb, net)) {
5896 +               if (sk)
5897 +                       sock_put(sk);
5898 +               return 0;
5899 +       }
5900 +#endif
5901 +
5902 +       if (!sk)
5903 +               goto no_tcp_socket;
5904 +
5905         if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
5906                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
5907                 goto discard_and_relse;
5908 @@ -1517,11 +1607,21 @@
5909         sk_mark_napi_id(sk, skb);
5910         skb->dev = NULL;
5911
5912 -       bh_lock_sock_nested(sk);
5913 +       if (tcp_sk(sk)->mpc) {
5914 +               meta_sk = mptcp_meta_sk(sk);
5915 +
5916 +               bh_lock_sock_nested(meta_sk);
5917 +               if (sock_owned_by_user(meta_sk))
5918 +                       skb->sk = sk;
5919 +       } else {
5920 +               meta_sk = sk;
5921 +               bh_lock_sock_nested(sk);
5922 +       }
5923 +
5924         ret = 0;
5925 -       if (!sock_owned_by_user(sk)) {
5926 +       if (!sock_owned_by_user(meta_sk)) {
5927  #ifdef CONFIG_NET_DMA
5928 -               struct tcp_sock *tp = tcp_sk(sk);
5929 +               struct tcp_sock *tp = tcp_sk(meta_sk);
5930                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5931                         tp->ucopy.dma_chan = net_dma_find_channel();
5932                 if (tp->ucopy.dma_chan)
5933 @@ -1529,16 +1629,17 @@
5934                 else
5935  #endif
5936                 {
5937 -                       if (!tcp_prequeue(sk, skb))
5938 +                       if (!tcp_prequeue(meta_sk, skb))
5939                                 ret = tcp_v6_do_rcv(sk, skb);
5940                 }
5941 -       } else if (unlikely(sk_add_backlog(sk, skb,
5942 -                                          sk->sk_rcvbuf + sk->sk_sndbuf))) {
5943 -               bh_unlock_sock(sk);
5944 +       } else if (unlikely(sk_add_backlog(meta_sk, skb,
5945 +                                          meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
5946 +               bh_unlock_sock(meta_sk);
5947                 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
5948                 goto discard_and_relse;
5949         }
5950 -       bh_unlock_sock(sk);
5951 +
5952 +       bh_unlock_sock(meta_sk);
5953
5954         sock_put(sk);
5955         return ret ? -1 : 0;
5956 @@ -1595,6 +1696,18 @@
5957                         sk = sk2;
5958                         goto process;
5959                 }
5960 +#ifdef CONFIG_MPTCP
5961 +               if (th->syn && !th->ack) {
5962 +                       int ret = mptcp_lookup_join(skb, inet_twsk(sk));
5963 +
5964 +                       if (ret < 0) {
5965 +                               tcp_v6_send_reset(NULL, skb);
5966 +                               goto discard_it;
5967 +                       } else if (ret > 0) {
5968 +                               return 0;
5969 +                       }
5970 +               }
5971 +#endif
5972                 /* Fall through to ACK */
5973         }
5974         case TCP_TW_ACK:
5975 @@ -1644,13 +1757,13 @@
5976         }
5977  }
5978
5979 -static struct timewait_sock_ops tcp6_timewait_sock_ops = {
5980 +struct timewait_sock_ops tcp6_timewait_sock_ops = {
5981         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
5982         .twsk_unique    = tcp_twsk_unique,
5983         .twsk_destructor= tcp_twsk_destructor,
5984  };
5985
5986 -static const struct inet_connection_sock_af_ops ipv6_specific = {
5987 +const struct inet_connection_sock_af_ops ipv6_specific = {
5988         .queue_xmit        = inet6_csk_xmit,
5989         .send_check        = tcp_v6_send_check,
5990         .rebuild_header    = inet6_sk_rebuild_header,
5991 @@ -1683,7 +1796,7 @@
5992   *     TCP over IPv4 via INET6 API
5993   */
5994
5995 -static const struct inet_connection_sock_af_ops ipv6_mapped = {
5996 +const struct inet_connection_sock_af_ops ipv6_mapped = {
5997         .queue_xmit        = ip_queue_xmit,
5998         .send_check        = tcp_v4_send_check,
5999         .rebuild_header    = inet_sk_rebuild_header,
6000 @@ -1729,7 +1842,7 @@
6001         return 0;
6002  }
6003
6004 -static void tcp_v6_destroy_sock(struct sock *sk)
6005 +void tcp_v6_destroy_sock(struct sock *sk)
6006  {
6007         tcp_v4_destroy_sock(sk);
6008         inet6_destroy_sock(sk);
6009 diff -Nur linux-3.14.45.orig/net/mptcp/Kconfig linux-3.14.45/net/mptcp/Kconfig
6010 --- linux-3.14.45.orig/net/mptcp/Kconfig        1970-01-01 01:00:00.000000000 +0100
6011 +++ linux-3.14.45/net/mptcp/Kconfig     2015-06-24 14:15:48.891862483 +0200
6012 @@ -0,0 +1,58 @@
6013 +#
6014 +# MPTCP configuration
6015 +#
6016 +config MPTCP
6017 +        bool "MPTCP protocol"
6018 +        depends on (IPV6=y || IPV6=n)
6019 +        ---help---
6020 +          This replaces the normal TCP stack with a Multipath TCP stack,
6021 +          able to use several paths at once.
6022 +
6023 +menuconfig MPTCP_PM_ADVANCED
6024 +       bool "MPTCP: advanced path-manager control"
6025 +       depends on MPTCP=y
6026 +       ---help---
6027 +         Support for selection of different path-managers. You should choose 'Y' here,
6028 +         because otherwise you will not actively create new MPTCP-subflows.
6029 +
6030 +if MPTCP_PM_ADVANCED
6031 +
6032 +config MPTCP_FULLMESH
6033 +       tristate "MPTCP Full-Mesh Path-Manager"
6034 +       depends on MPTCP=y
6035 +       ---help---
6036 +         This path-management module will create a full-mesh among all IP-addresses.
6037 +
6038 +config MPTCP_NDIFFPORTS
6039 +       tristate "MPTCP ndiff-ports"
6040 +       depends on MPTCP=y
6041 +       ---help---
6042 +         This path-management module will create multiple subflows between the same
6043 +         pair of IP-addresses, modifying the source-port. You can set the number
6044 +         of subflows via the mptcp_ndiffports-sysctl.
6045 +
6046 +choice
6047 +       prompt "Default MPTCP Path-Manager"
6048 +       default DEFAULT
6049 +       help
6050 +         Select the Path-Manager of your choice
6051 +
6052 +       config DEFAULT_FULLMESH
6053 +               bool "Full mesh" if MPTCP_FULLMESH=y
6054 +
6055 +       config DEFAULT_NDIFFPORTS
6056 +               bool "ndiff-ports" if MPTCP_NDIFFPORTS=y
6057 +
6058 +       config DEFAULT_DUMMY
6059 +               bool "Default"
6060 +
6061 +endchoice
6062 +
6063 +endif
6064 +
6065 +config DEFAULT_MPTCP_PM
6066 +       string
6067 +       default "default" if DEFAULT_DUMMY
6068 +       default "fullmesh" if DEFAULT_FULLMESH
6069 +       default "ndiffports" if DEFAULT_NDIFFPORTS
6070 +       default "default"
6071 diff -Nur linux-3.14.45.orig/net/mptcp/Makefile linux-3.14.45/net/mptcp/Makefile
6072 --- linux-3.14.45.orig/net/mptcp/Makefile       1970-01-01 01:00:00.000000000 +0100
6073 +++ linux-3.14.45/net/mptcp/Makefile    2015-06-24 14:15:48.891862483 +0200
6074 @@ -0,0 +1,18 @@
6075 +#
6076 +## Makefile for MultiPath TCP support code.
6077 +#
6078 +#
6079 +
6080 +obj-$(CONFIG_MPTCP) += mptcp.o
6081 +
6082 +mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_ofo_queue.o mptcp_pm.o \
6083 +          mptcp_output.o mptcp_input.o
6084 +
6085 +obj-$(CONFIG_TCP_CONG_COUPLED) += mptcp_coupled.o
6086 +obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o
6087 +obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o
6088 +obj-$(CONFIG_MPTCP_FULLMESH) += mptcp_fullmesh.o
6089 +obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o
6090 +
6091 +mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o
6092 +
6093 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_coupled.c linux-3.14.45/net/mptcp/mptcp_coupled.c
6094 --- linux-3.14.45.orig/net/mptcp/mptcp_coupled.c        1970-01-01 01:00:00.000000000 +0100
6095 +++ linux-3.14.45/net/mptcp/mptcp_coupled.c     2015-06-24 14:15:48.891862483 +0200
6096 @@ -0,0 +1,273 @@
6097 +/*
6098 + *     MPTCP implementation - Coupled Congestion Control
6099 + *
6100 + *     Initial Design & Implementation:
6101 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
6102 + *
6103 + *     Current Maintainer & Author:
6104 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
6105 + *
6106 + *     Additional authors:
6107 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
6108 + *     Gregory Detal <gregory.detal@uclouvain.be>
6109 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
6110 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
6111 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
6112 + *     Andreas Ripke <ripke@neclab.eu>
6113 + *     Vlad Dogaru <vlad.dogaru@intel.com>
6114 + *     Octavian Purdila <octavian.purdila@intel.com>
6115 + *     John Ronan <jronan@tssg.org>
6116 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
6117 + *     Brandon Heller <brandonh@stanford.edu>
6118 + *
6119 + *
6120 + *     This program is free software; you can redistribute it and/or
6121 + *      modify it under the terms of the GNU General Public License
6122 + *      as published by the Free Software Foundation; either version
6123 + *      2 of the License, or (at your option) any later version.
6124 + */
6125 +#include <net/tcp.h>
6126 +#include <net/mptcp.h>
6127 +
6128 +#include <linux/module.h>
6129 +
6130 +/* Scaling is done in the numerator with alpha_scale_num and in the denominator
6131 + * with alpha_scale_den.
6132 + *
6133 + * To downscale, we just need to use alpha_scale.
6134 + *
6135 + * We have: alpha_scale = alpha_scale_num / (alpha_scale_den ^ 2)
6136 + */
6137 +static int alpha_scale_den = 10;
6138 +static int alpha_scale_num = 32;
6139 +static int alpha_scale = 12;
6140 +
6141 +struct mptcp_ccc {
6142 +       u64     alpha;
6143 +       bool    forced_update;
6144 +};
6145 +
6146 +static inline int mptcp_ccc_sk_can_send(const struct sock *sk)
6147 +{
6148 +       return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt;
6149 +}
6150 +
6151 +static inline u64 mptcp_get_alpha(struct sock *meta_sk)
6152 +{
6153 +       struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
6154 +       return mptcp_ccc->alpha;
6155 +}
6156 +
6157 +static inline void mptcp_set_alpha(struct sock *meta_sk, u64 alpha)
6158 +{
6159 +       struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
6160 +       mptcp_ccc->alpha = alpha;
6161 +}
6162 +
6163 +static inline u64 mptcp_ccc_scale(u32 val, int scale)
6164 +{
6165 +       return (u64) val << scale;
6166 +}
6167 +
6168 +static inline bool mptcp_get_forced(struct sock *meta_sk)
6169 +{
6170 +       struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
6171 +       return mptcp_ccc->forced_update;
6172 +}
6173 +
6174 +static inline void mptcp_set_forced(struct sock *meta_sk, bool force)
6175 +{
6176 +       struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
6177 +       mptcp_ccc->forced_update = force;
6178 +}
6179 +
6180 +static void mptcp_ccc_recalc_alpha(struct sock *sk)
6181 +{
6182 +       struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
6183 +       struct sock *sub_sk;
6184 +       int best_cwnd = 0, best_rtt = 0, can_send = 0;
6185 +       u64 max_numerator = 0, sum_denominator = 0, alpha = 1;
6186 +
6187 +       if (!mpcb)
6188 +               return;
6189 +
6190 +       /* Only one subflow left - fall back to normal reno-behavior
6191 +        * (set alpha to 1) */
6192 +       if (mpcb->cnt_established <= 1)
6193 +               goto exit;
6194 +
6195 +       /* Do regular alpha-calculation for multiple subflows */
6196 +
6197 +       /* Find the max numerator of the alpha-calculation */
6198 +       mptcp_for_each_sk(mpcb, sub_sk) {
6199 +               struct tcp_sock *sub_tp = tcp_sk(sub_sk);
6200 +               u64 tmp;
6201 +
6202 +               if (!mptcp_ccc_sk_can_send(sub_sk))
6203 +                       continue;
6204 +
6205 +               can_send++;
6206 +
6207 +               /* We need to look for the path, that provides the max-value.
6208 +                * Integer-overflow is not possible here, because
6209 +                * tmp will be in u64.
6210 +                */
6211 +               tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd,
6212 +                               alpha_scale_num), (u64)sub_tp->srtt * sub_tp->srtt);
6213 +
6214 +               if (tmp >= max_numerator) {
6215 +                       max_numerator = tmp;
6216 +                       best_cwnd = sub_tp->snd_cwnd;
6217 +                       best_rtt = sub_tp->srtt;
6218 +               }
6219 +       }
6220 +
6221 +       /* No subflow is able to send - we don't care anymore */
6222 +       if (unlikely(!can_send))
6223 +               goto exit;
6224 +
6225 +       /* Calculate the denominator */
6226 +       mptcp_for_each_sk(mpcb, sub_sk) {
6227 +               struct tcp_sock *sub_tp = tcp_sk(sub_sk);
6228 +
6229 +               if (!mptcp_ccc_sk_can_send(sub_sk))
6230 +                       continue;
6231 +
6232 +               sum_denominator += div_u64(
6233 +                               mptcp_ccc_scale(sub_tp->snd_cwnd,
6234 +                                               alpha_scale_den) * best_rtt,
6235 +                                               sub_tp->srtt);
6236 +       }
6237 +       sum_denominator *= sum_denominator;
6238 +       if (unlikely(!sum_denominator)) {
6239 +               pr_err("%s: sum_denominator == 0, cnt_established:%d\n",
6240 +                      __func__, mpcb->cnt_established);
6241 +               mptcp_for_each_sk(mpcb, sub_sk) {
6242 +                       struct tcp_sock *sub_tp = tcp_sk(sub_sk);
6243 +                       pr_err("%s: pi:%d, state:%d\n, rtt:%u, cwnd: %u",
6244 +                              __func__, sub_tp->mptcp->path_index,
6245 +                              sub_sk->sk_state, sub_tp->srtt,
6246 +                              sub_tp->snd_cwnd);
6247 +               }
6248 +       }
6249 +
6250 +       alpha = div64_u64(mptcp_ccc_scale(best_cwnd, alpha_scale_num), sum_denominator);
6251 +
6252 +       if (unlikely(!alpha))
6253 +               alpha = 1;
6254 +
6255 +exit:
6256 +       mptcp_set_alpha(mptcp_meta_sk(sk), alpha);
6257 +}
6258 +
6259 +static void mptcp_ccc_init(struct sock *sk)
6260 +{
6261 +       if (tcp_sk(sk)->mpc) {
6262 +               mptcp_set_forced(mptcp_meta_sk(sk), 0);
6263 +               mptcp_set_alpha(mptcp_meta_sk(sk), 1);
6264 +       }
6265 +       /* If we do not mptcp, behave like reno: return */
6266 +}
6267 +
6268 +static void mptcp_ccc_cwnd_event(struct sock *sk, enum tcp_ca_event event)
6269 +{
6270 +       if (event == CA_EVENT_LOSS)
6271 +               mptcp_ccc_recalc_alpha(sk);
6272 +}
6273 +
6274 +static void mptcp_ccc_set_state(struct sock *sk, u8 ca_state)
6275 +{
6276 +       if (!tcp_sk(sk)->mpc)
6277 +               return;
6278 +
6279 +       mptcp_set_forced(mptcp_meta_sk(sk), 1);
6280 +}
6281 +
6282 +static void mptcp_ccc_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
6283 +{
6284 +       struct tcp_sock *tp = tcp_sk(sk);
6285 +       struct mptcp_cb *mpcb = tp->mpcb;
6286 +       int snd_cwnd;
6287 +
6288 +       if (!tp->mpc) {
6289 +               tcp_reno_cong_avoid(sk, ack, acked, in_flight);
6290 +               return;
6291 +       }
6292 +
6293 +       if (!tcp_is_cwnd_limited(sk, in_flight))
6294 +               return;
6295 +
6296 +       if (tp->snd_cwnd <= tp->snd_ssthresh) {
6297 +               /* In "safe" area, increase. */
6298 +               tcp_slow_start(tp, acked);
6299 +               mptcp_ccc_recalc_alpha(sk);
6300 +               return;
6301 +       }
6302 +
6303 +       if (mptcp_get_forced(mptcp_meta_sk(sk))) {
6304 +               mptcp_ccc_recalc_alpha(sk);
6305 +               mptcp_set_forced(mptcp_meta_sk(sk), 0);
6306 +       }
6307 +
6308 +       if (mpcb->cnt_established > 1) {
6309 +               u64 alpha = mptcp_get_alpha(mptcp_meta_sk(sk));
6310 +
6311 +               /* This may happen, if at the initialization, the mpcb
6312 +                * was not yet attached to the sock, and thus
6313 +                * initializing alpha failed.
6314 +                */
6315 +               if (unlikely(!alpha))
6316 +                       alpha = 1;
6317 +
6318 +               snd_cwnd = (int) div_u64 ((u64) mptcp_ccc_scale(1, alpha_scale),
6319 +                                               alpha);
6320 +
6321 +               /* snd_cwnd_cnt >= max (scale * tot_cwnd / alpha, cwnd)
6322 +                * Thus, we select here the max value. */
6323 +               if (snd_cwnd < tp->snd_cwnd)
6324 +                       snd_cwnd = tp->snd_cwnd;
6325 +       } else {
6326 +               snd_cwnd = tp->snd_cwnd;
6327 +       }
6328 +
6329 +       if (tp->snd_cwnd_cnt >= snd_cwnd) {
6330 +               if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
6331 +                       tp->snd_cwnd++;
6332 +                       mptcp_ccc_recalc_alpha(sk);
6333 +               }
6334 +
6335 +               tp->snd_cwnd_cnt = 0;
6336 +       } else {
6337 +               tp->snd_cwnd_cnt++;
6338 +       }
6339 +}
6340 +
6341 +static struct tcp_congestion_ops mptcp_ccc = {
6342 +       .init           = mptcp_ccc_init,
6343 +       .ssthresh       = tcp_reno_ssthresh,
6344 +       .cong_avoid     = mptcp_ccc_cong_avoid,
6345 +       .cwnd_event     = mptcp_ccc_cwnd_event,
6346 +       .set_state      = mptcp_ccc_set_state,
6347 +       .min_cwnd       = tcp_reno_min_cwnd,
6348 +       .owner          = THIS_MODULE,
6349 +       .name           = "coupled",
6350 +};
6351 +
6352 +static int __init mptcp_ccc_register(void)
6353 +{
6354 +       BUILD_BUG_ON(sizeof(struct mptcp_ccc) > ICSK_CA_PRIV_SIZE);
6355 +       return tcp_register_congestion_control(&mptcp_ccc);
6356 +}
6357 +
6358 +static void __exit mptcp_ccc_unregister(void)
6359 +{
6360 +       tcp_unregister_congestion_control(&mptcp_ccc);
6361 +}
6362 +
6363 +module_init(mptcp_ccc_register);
6364 +module_exit(mptcp_ccc_unregister);
6365 +
6366 +MODULE_AUTHOR("Christoph Paasch, Sébastien Barré");
6367 +MODULE_LICENSE("GPL");
6368 +MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
6369 +MODULE_VERSION("0.1");
6370 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ctrl.c linux-3.14.45/net/mptcp/mptcp_ctrl.c
6371 --- linux-3.14.45.orig/net/mptcp/mptcp_ctrl.c   1970-01-01 01:00:00.000000000 +0100
6372 +++ linux-3.14.45/net/mptcp/mptcp_ctrl.c        2015-06-24 14:15:48.891862483 +0200
6373 @@ -0,0 +1,2270 @@
6374 +/*
6375 + *     MPTCP implementation - MPTCP-control
6376 + *
6377 + *     Initial Design & Implementation:
6378 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
6379 + *
6380 + *     Current Maintainer & Author:
6381 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
6382 + *
6383 + *     Additional authors:
6384 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
6385 + *     Gregory Detal <gregory.detal@uclouvain.be>
6386 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
6387 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
6388 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
6389 + *     Andreas Ripke <ripke@neclab.eu>
6390 + *     Vlad Dogaru <vlad.dogaru@intel.com>
6391 + *     Octavian Purdila <octavian.purdila@intel.com>
6392 + *     John Ronan <jronan@tssg.org>
6393 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
6394 + *     Brandon Heller <brandonh@stanford.edu>
6395 + *
6396 + *
6397 + *     This program is free software; you can redistribute it and/or
6398 + *      modify it under the terms of the GNU General Public License
6399 + *      as published by the Free Software Foundation; either version
6400 + *      2 of the License, or (at your option) any later version.
6401 + */
6402 +
6403 +#include <net/inet_common.h>
6404 +#include <net/inet6_hashtables.h>
6405 +#include <net/ipv6.h>
6406 +#include <net/ip6_checksum.h>
6407 +#include <net/mptcp.h>
6408 +#include <net/mptcp_v4.h>
6409 +#if IS_ENABLED(CONFIG_IPV6)
6410 +#include <net/mptcp_v6.h>
6411 +#endif
6412 +#include <net/sock.h>
6413 +#include <net/tcp.h>
6414 +#include <net/tcp_states.h>
6415 +#include <net/transp_v6.h>
6416 +#include <net/xfrm.h>
6417 +
6418 +#include <linux/cryptohash.h>
6419 +#include <linux/kconfig.h>
6420 +#include <linux/module.h>
6421 +#include <linux/netpoll.h>
6422 +#include <linux/list.h>
6423 +#include <linux/jhash.h>
6424 +#include <linux/tcp.h>
6425 +#include <linux/net.h>
6426 +#include <linux/in.h>
6427 +#include <linux/random.h>
6428 +#include <linux/inetdevice.h>
6429 +#include <linux/workqueue.h>
6430 +#include <linux/atomic.h>
6431 +#include <linux/sysctl.h>
6432 +
6433 +static struct kmem_cache *mptcp_sock_cache __read_mostly;
6434 +static struct kmem_cache *mptcp_cb_cache __read_mostly;
6435 +static struct kmem_cache *mptcp_tw_cache __read_mostly;
6436 +
6437 +int sysctl_mptcp_enabled __read_mostly = 1;
6438 +int sysctl_mptcp_checksum __read_mostly = 1;
6439 +int sysctl_mptcp_debug __read_mostly;
6440 +EXPORT_SYMBOL(sysctl_mptcp_debug);
6441 +int sysctl_mptcp_syn_retries __read_mostly = 3;
6442 +
6443 +bool mptcp_init_failed __read_mostly;
6444 +
6445 +static int proc_mptcp_path_manager(ctl_table *ctl, int write,
6446 +                                  void __user *buffer, size_t *lenp,
6447 +                                  loff_t *ppos)
6448 +{
6449 +       char val[MPTCP_PM_NAME_MAX];
6450 +       ctl_table tbl = {
6451 +               .data = val,
6452 +               .maxlen = MPTCP_PM_NAME_MAX,
6453 +       };
6454 +       int ret;
6455 +
6456 +       mptcp_get_default_path_manager(val);
6457 +
6458 +       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
6459 +       if (write && ret == 0)
6460 +               ret = mptcp_set_default_path_manager(val);
6461 +       return ret;
6462 +}
6463 +
6464 +static struct ctl_table mptcp_table[] = {
6465 +       {
6466 +               .procname = "mptcp_enabled",
6467 +               .data = &sysctl_mptcp_enabled,
6468 +               .maxlen = sizeof(int),
6469 +               .mode = 0644,
6470 +               .proc_handler = &proc_dointvec
6471 +       },
6472 +       {
6473 +               .procname = "mptcp_checksum",
6474 +               .data = &sysctl_mptcp_checksum,
6475 +               .maxlen = sizeof(int),
6476 +               .mode = 0644,
6477 +               .proc_handler = &proc_dointvec
6478 +       },
6479 +       {
6480 +               .procname = "mptcp_debug",
6481 +               .data = &sysctl_mptcp_debug,
6482 +               .maxlen = sizeof(int),
6483 +               .mode = 0644,
6484 +               .proc_handler = &proc_dointvec
6485 +       },
6486 +       {
6487 +               .procname = "mptcp_syn_retries",
6488 +               .data = &sysctl_mptcp_syn_retries,
6489 +               .maxlen = sizeof(int),
6490 +               .mode = 0644,
6491 +               .proc_handler = &proc_dointvec
6492 +       },
6493 +       {
6494 +               .procname       = "mptcp_path_manager",
6495 +               .mode           = 0644,
6496 +               .maxlen         = MPTCP_PM_NAME_MAX,
6497 +               .proc_handler   = proc_mptcp_path_manager,
6498 +       },
6499 +       { }
6500 +};
6501 +
6502 +static inline u32 mptcp_hash_tk(u32 token)
6503 +{
6504 +       return token % MPTCP_HASH_SIZE;
6505 +}
6506 +
6507 +struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE];
6508 +EXPORT_SYMBOL(tk_hashtable);
6509 +
6510 +/* This second hashtable is needed to retrieve request socks
6511 + * created as a result of a join request. While the SYN contains
6512 + * the token, the final ack does not, so we need a separate hashtable
6513 + * to retrieve the mpcb.
6514 + */
6515 +struct list_head mptcp_reqsk_htb[MPTCP_HASH_SIZE];
6516 +spinlock_t mptcp_reqsk_hlock;  /* hashtable protection */
6517 +
6518 +/* The following hash table is used to avoid collision of token */
6519 +static struct hlist_nulls_head mptcp_reqsk_tk_htb[MPTCP_HASH_SIZE];
6520 +spinlock_t mptcp_tk_hashlock;  /* hashtable protection */
6521 +
6522 +static int mptcp_reqsk_find_tk(u32 token)
6523 +{
6524 +       u32 hash = mptcp_hash_tk(token);
6525 +       struct mptcp_request_sock *mtreqsk;
6526 +       const struct hlist_nulls_node *node;
6527 +
6528 +       hlist_nulls_for_each_entry_rcu(mtreqsk, node,
6529 +                                      &mptcp_reqsk_tk_htb[hash], collide_tk) {
6530 +               if (token == mtreqsk->mptcp_loc_token)
6531 +                       return 1;
6532 +       }
6533 +       return 0;
6534 +}
6535 +
6536 +static void mptcp_reqsk_insert_tk(struct request_sock *reqsk, u32 token)
6537 +{
6538 +       u32 hash = mptcp_hash_tk(token);
6539 +
6540 +       hlist_nulls_add_head_rcu(&mptcp_rsk(reqsk)->collide_tk,
6541 +                                &mptcp_reqsk_tk_htb[hash]);
6542 +}
6543 +
6544 +static void mptcp_reqsk_remove_tk(struct request_sock *reqsk)
6545 +{
6546 +       rcu_read_lock();
6547 +       spin_lock(&mptcp_tk_hashlock);
6548 +       hlist_nulls_del_init_rcu(&mptcp_rsk(reqsk)->collide_tk);
6549 +       spin_unlock(&mptcp_tk_hashlock);
6550 +       rcu_read_unlock();
6551 +}
6552 +
6553 +void mptcp_reqsk_destructor(struct request_sock *req)
6554 +{
6555 +       if (!mptcp_rsk(req)->mpcb) {
6556 +               if (in_softirq()) {
6557 +                       mptcp_reqsk_remove_tk(req);
6558 +               } else {
6559 +                       rcu_read_lock_bh();
6560 +                       spin_lock(&mptcp_tk_hashlock);
6561 +                       hlist_nulls_del_init_rcu(&mptcp_rsk(req)->collide_tk);
6562 +                       spin_unlock(&mptcp_tk_hashlock);
6563 +                       rcu_read_unlock_bh();
6564 +               }
6565 +       } else {
6566 +               mptcp_hash_request_remove(req);
6567 +       }
6568 +}
6569 +
6570 +static void __mptcp_hash_insert(struct tcp_sock *meta_tp, u32 token)
6571 +{
6572 +       u32 hash = mptcp_hash_tk(token);
6573 +       hlist_nulls_add_head_rcu(&meta_tp->tk_table, &tk_hashtable[hash]);
6574 +       meta_tp->inside_tk_table = 1;
6575 +}
6576 +
6577 +static int mptcp_find_token(u32 token)
6578 +{
6579 +       u32 hash = mptcp_hash_tk(token);
6580 +       struct tcp_sock *meta_tp;
6581 +       const struct hlist_nulls_node *node;
6582 +
6583 +       hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], tk_table) {
6584 +               if (token == meta_tp->mptcp_loc_token)
6585 +                       return 1;
6586 +       }
6587 +       return 0;
6588 +}
6589 +
6590 +static void mptcp_set_key_reqsk(struct request_sock *req,
6591 +                               const struct sk_buff *skb)
6592 +{
6593 +       struct inet_request_sock *ireq = inet_rsk(req);
6594 +       struct mptcp_request_sock *mtreq = mptcp_rsk(req);
6595 +
6596 +       if (skb->protocol == htons(ETH_P_IP)) {
6597 +               mtreq->mptcp_loc_key = mptcp_v4_get_key(ip_hdr(skb)->saddr,
6598 +                                                       ip_hdr(skb)->daddr,
6599 +                                                       htons(ireq->ir_num),
6600 +                                                       ireq->ir_rmt_port);
6601 +#if IS_ENABLED(CONFIG_IPV6)
6602 +       } else {
6603 +               mtreq->mptcp_loc_key = mptcp_v6_get_key(ipv6_hdr(skb)->saddr.s6_addr32,
6604 +                                                       ipv6_hdr(skb)->daddr.s6_addr32,
6605 +                                                       htons(ireq->ir_num),
6606 +                                                       ireq->ir_rmt_port);
6607 +#endif
6608 +       }
6609 +
6610 +       mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL);
6611 +}
6612 +
6613 +/* New MPTCP-connection request, prepare a new token for the meta-socket that
6614 + * will be created in mptcp_check_req_master(), and store the received token.
6615 + */
6616 +void mptcp_reqsk_new_mptcp(struct request_sock *req,
6617 +                          const struct tcp_options_received *rx_opt,
6618 +                          const struct mptcp_options_received *mopt,
6619 +                          const struct sk_buff *skb)
6620 +{
6621 +       struct mptcp_request_sock *mtreq = mptcp_rsk(req);
6622 +
6623 +       tcp_rsk(req)->saw_mpc = 1;
6624 +
6625 +       rcu_read_lock();
6626 +       spin_lock(&mptcp_tk_hashlock);
6627 +       do {
6628 +               mptcp_set_key_reqsk(req, skb);
6629 +       } while (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) ||
6630 +                mptcp_find_token(mtreq->mptcp_loc_token));
6631 +
6632 +       mptcp_reqsk_insert_tk(req, mtreq->mptcp_loc_token);
6633 +       spin_unlock(&mptcp_tk_hashlock);
6634 +       rcu_read_unlock();
6635 +       mtreq->mptcp_rem_key = mopt->mptcp_key;
6636 +}
6637 +
6638 +static void mptcp_set_key_sk(struct sock *sk)
6639 +{
6640 +       struct tcp_sock *tp = tcp_sk(sk);
6641 +       struct inet_sock *isk = inet_sk(sk);
6642 +
6643 +       if (sk->sk_family == AF_INET)
6644 +               tp->mptcp_loc_key = mptcp_v4_get_key(isk->inet_saddr,
6645 +                                                    isk->inet_daddr,
6646 +                                                    isk->inet_sport,
6647 +                                                    isk->inet_dport);
6648 +#if IS_ENABLED(CONFIG_IPV6)
6649 +       else
6650 +               tp->mptcp_loc_key = mptcp_v6_get_key(inet6_sk(sk)->saddr.s6_addr32,
6651 +                                                    sk->sk_v6_daddr.s6_addr32,
6652 +                                                    isk->inet_sport,
6653 +                                                    isk->inet_dport);
6654 +#endif
6655 +
6656 +       mptcp_key_sha1(tp->mptcp_loc_key,
6657 +                      &tp->mptcp_loc_token, NULL);
6658 +}
6659 +
6660 +void mptcp_connect_init(struct sock *sk)
6661 +{
6662 +       struct tcp_sock *tp = tcp_sk(sk);
6663 +
6664 +       rcu_read_lock_bh();
6665 +       spin_lock(&mptcp_tk_hashlock);
6666 +       do {
6667 +               mptcp_set_key_sk(sk);
6668 +       } while (mptcp_reqsk_find_tk(tp->mptcp_loc_token) ||
6669 +                mptcp_find_token(tp->mptcp_loc_token));
6670 +
6671 +       __mptcp_hash_insert(tp, tp->mptcp_loc_token);
6672 +       spin_unlock(&mptcp_tk_hashlock);
6673 +       rcu_read_unlock_bh();
6674 +}
6675 +
6676 +/**
6677 + * This function increments the refcount of the mpcb struct.
6678 + * It is the responsibility of the caller to decrement when releasing
6679 + * the structure.
6680 + */
6681 +struct sock *mptcp_hash_find(struct net *net, u32 token)
6682 +{
6683 +       u32 hash = mptcp_hash_tk(token);
6684 +       struct tcp_sock *meta_tp;
6685 +       struct sock *meta_sk = NULL;
6686 +       struct hlist_nulls_node *node;
6687 +
6688 +       rcu_read_lock();
6689 +       hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash],
6690 +                                      tk_table) {
6691 +               meta_sk = (struct sock *)meta_tp;
6692 +               if (token == meta_tp->mptcp_loc_token &&
6693 +                   net_eq(net, sock_net(meta_sk)) &&
6694 +                   atomic_inc_not_zero(&meta_sk->sk_refcnt))
6695 +                       break;
6696 +               meta_sk = NULL;
6697 +       }
6698 +       rcu_read_unlock();
6699 +       return meta_sk;
6700 +}
6701 +
6702 +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp)
6703 +{
6704 +       /* remove from the token hashtable */
6705 +       rcu_read_lock_bh();
6706 +       spin_lock(&mptcp_tk_hashlock);
6707 +       hlist_nulls_del_init_rcu(&meta_tp->tk_table);
6708 +       meta_tp->inside_tk_table = 0;
6709 +       spin_unlock(&mptcp_tk_hashlock);
6710 +       rcu_read_unlock_bh();
6711 +}
6712 +
6713 +void mptcp_hash_remove(struct tcp_sock *meta_tp)
6714 +{
6715 +       rcu_read_lock();
6716 +       spin_lock(&mptcp_tk_hashlock);
6717 +       hlist_nulls_del_init_rcu(&meta_tp->tk_table);
6718 +       meta_tp->inside_tk_table = 0;
6719 +       spin_unlock(&mptcp_tk_hashlock);
6720 +       rcu_read_unlock();
6721 +}
6722 +
6723 +static struct sock *mptcp_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
6724 +                                       struct request_sock *req,
6725 +                                       struct dst_entry *dst)
6726 +{
6727 +#if IS_ENABLED(CONFIG_IPV6)
6728 +       if (sk->sk_family == AF_INET6)
6729 +               return tcp_v6_syn_recv_sock(sk, skb, req, dst);
6730 +
6731 +       /* sk->sk_family == AF_INET */
6732 +       if (req->rsk_ops->family == AF_INET6)
6733 +               return mptcp_v6v4_syn_recv_sock(sk, skb, req, dst);
6734 +#endif
6735 +
6736 +       /* sk->sk_family == AF_INET && req->rsk_ops->family == AF_INET */
6737 +       return tcp_v4_syn_recv_sock(sk, skb, req, dst);
6738 +}
6739 +
6740 +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk, int copied)
6741 +{
6742 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
6743 +       struct sock *sk, *subsk = NULL;
6744 +       u32 max_data_seq = 0;
6745 +       /* max_data_seq initialized to correct compiler-warning.
6746 +        * But the initialization is handled by max_data_seq_set
6747 +        */
6748 +       short max_data_seq_set = 0;
6749 +       u32 min_time = 0xffffffff;
6750 +
6751 +       /* How do we select the subflow to send the window-update on?
6752 +        *
6753 +        * 1. He has to be in a state where he can send an ack and is
6754 +        *           operational (pf = 0).
6755 +        * 2. He has to be one of those subflow who recently
6756 +        *    contributed to the received stream
6757 +        *    (this guarantees a working subflow)
6758 +        *    a) its latest data_seq received is after the original
6759 +        *       copied_seq.
6760 +        *       We select the one with the lowest rtt, so that the
6761 +        *       window-update reaches our peer the fastest.
6762 +        *    b) if no subflow has this kind of data_seq (e.g., very
6763 +        *       strange meta-level retransmissions going on), we take
6764 +        *       the subflow who last sent the highest data_seq.
6765 +        */
6766 +       mptcp_for_each_sk(meta_tp->mpcb, sk) {
6767 +               struct tcp_sock *tp = tcp_sk(sk);
6768 +
6769 +               if (!mptcp_sk_can_send_ack(sk) || tp->pf)
6770 +                       continue;
6771 +
6772 +               /* Select among those who contributed to the
6773 +                * current receive-queue.
6774 +                */
6775 +               if (copied && after(tp->mptcp->last_data_seq, meta_tp->copied_seq - copied)) {
6776 +                       if (tp->srtt < min_time) {
6777 +                               min_time = tp->srtt;
6778 +                               subsk = sk;
6779 +                               max_data_seq_set = 0;
6780 +                       }
6781 +                       continue;
6782 +               }
6783 +
6784 +               if (!subsk && !max_data_seq_set) {
6785 +                       max_data_seq = tp->mptcp->last_data_seq;
6786 +                       max_data_seq_set = 1;
6787 +                       subsk = sk;
6788 +               }
6789 +
6790 +               /* Otherwise, take the one with the highest data_seq */
6791 +               if ((!subsk || max_data_seq_set) &&
6792 +                   after(tp->mptcp->last_data_seq, max_data_seq)) {
6793 +                       max_data_seq = tp->mptcp->last_data_seq;
6794 +                       subsk = sk;
6795 +               }
6796 +       }
6797 +
6798 +       if (!subsk) {
6799 +               mptcp_debug("%s subsk is null, copied %d, cseq %u\n", __func__,
6800 +                           copied, meta_tp->copied_seq);
6801 +               mptcp_for_each_sk(meta_tp->mpcb, sk) {
6802 +                       struct tcp_sock *tp = tcp_sk(sk);
6803 +                       mptcp_debug("%s pi %d state %u last_dseq %u\n",
6804 +                                   __func__, tp->mptcp->path_index, sk->sk_state,
6805 +                                   tp->mptcp->last_data_seq);
6806 +               }
6807 +       }
6808 +
6809 +       return subsk;
6810 +}
6811 +EXPORT_SYMBOL(mptcp_select_ack_sock);
6812 +
6813 +static void mptcp_sock_def_error_report(struct sock *sk)
6814 +{
6815 +       struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
6816 +
6817 +       if (!sock_flag(sk, SOCK_DEAD))
6818 +               mptcp_sub_close(sk, 0);
6819 +
6820 +       if (mpcb->infinite_mapping_rcv || mpcb->infinite_mapping_snd ||
6821 +           mpcb->send_infinite_mapping) {
6822 +               struct sock *meta_sk = mptcp_meta_sk(sk);
6823 +
6824 +               meta_sk->sk_err = sk->sk_err;
6825 +               meta_sk->sk_err_soft = sk->sk_err_soft;
6826 +
6827 +               if (!sock_flag(meta_sk, SOCK_DEAD))
6828 +                       meta_sk->sk_error_report(meta_sk);
6829 +
6830 +               tcp_done(meta_sk);
6831 +       }
6832 +
6833 +       sk->sk_err = 0;
6834 +       return;
6835 +}
6836 +
6837 +static void mptcp_mpcb_put(struct mptcp_cb *mpcb)
6838 +{
6839 +       if (atomic_dec_and_test(&mpcb->mpcb_refcnt)) {
6840 +               mptcp_cleanup_path_manager(mpcb);
6841 +               kmem_cache_free(mptcp_cb_cache, mpcb);
6842 +       }
6843 +}
6844 +
6845 +static void mptcp_sock_destruct(struct sock *sk)
6846 +{
6847 +       struct tcp_sock *tp = tcp_sk(sk);
6848 +
6849 +       inet_sock_destruct(sk);
6850 +
6851 +       BUG_ON(!list_empty(&tp->mptcp->cb_list));
6852 +
6853 +       kmem_cache_free(mptcp_sock_cache, tp->mptcp);
6854 +       tp->mptcp = NULL;
6855 +
6856 +       if (!is_meta_sk(sk) && !tp->was_meta_sk) {
6857 +               /* Taken when mpcb pointer was set */
6858 +               sock_put(mptcp_meta_sk(sk));
6859 +               mptcp_mpcb_put(tp->mpcb);
6860 +       } else {
6861 +               struct mptcp_cb *mpcb = tp->mpcb;
6862 +               struct mptcp_tw *mptw;
6863 +
6864 +               /* The mpcb is disappearing - we can make the final
6865 +                * update to the rcv_nxt of the time-wait-sock and remove
6866 +                * its reference to the mpcb.
6867 +                */
6868 +               spin_lock_bh(&mpcb->tw_lock);
6869 +               list_for_each_entry_rcu(mptw, &mpcb->tw_list, list) {
6870 +                       list_del_rcu(&mptw->list);
6871 +                       mptw->in_list = 0;
6872 +                       mptcp_mpcb_put(mpcb);
6873 +                       rcu_assign_pointer(mptw->mpcb, NULL);
6874 +               }
6875 +               spin_unlock_bh(&mpcb->tw_lock);
6876 +
6877 +               mptcp_mpcb_put(mpcb);
6878 +
6879 +               mptcp_debug("%s destroying meta-sk\n", __func__);
6880 +       }
6881 +}
6882 +
6883 +void mptcp_destroy_sock(struct sock *sk)
6884 +{
6885 +       if (is_meta_sk(sk)) {
6886 +               struct sock *sk_it, *tmpsk;
6887 +
6888 +               __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue);
6889 +               mptcp_purge_ofo_queue(tcp_sk(sk));
6890 +
6891 +               /* We have to close all remaining subflows. Normally, they
6892 +                * should all be about to get closed. But, if the kernel is
6893 +                * forcing a closure (e.g., tcp_write_err), the subflows might
6894 +                * not have been closed properly (as we are waiting for the
6895 +                * DATA_ACK of the DATA_FIN).
6896 +                */
6897 +               mptcp_for_each_sk_safe(tcp_sk(sk)->mpcb, sk_it, tmpsk) {
6898 +                       /* Already did call tcp_close - waiting for graceful
6899 +                        * closure, or if we are retransmitting fast-close on
6900 +                        * the subflow. The reset (or timeout) will kill the
6901 +                        * subflow..
6902 +                        */
6903 +                       if (tcp_sk(sk_it)->closing ||
6904 +                           tcp_sk(sk_it)->send_mp_fclose)
6905 +                               continue;
6906 +
6907 +                       /* Allow the delayed work first to prevent time-wait state */
6908 +                       if (delayed_work_pending(&tcp_sk(sk_it)->mptcp->work))
6909 +                               continue;
6910 +
6911 +                       mptcp_sub_close(sk_it, 0);
6912 +               }
6913 +       } else {
6914 +               mptcp_del_sock(sk);
6915 +       }
6916 +}
6917 +
6918 +static void mptcp_set_state(struct sock *sk)
6919 +{
6920 +       struct sock *meta_sk = mptcp_meta_sk(sk);
6921 +
6922 +       /* Meta is not yet established - wake up the application */
6923 +       if ((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) &&
6924 +           sk->sk_state == TCP_ESTABLISHED) {
6925 +               tcp_set_state(meta_sk, TCP_ESTABLISHED);
6926 +
6927 +               if (!sock_flag(meta_sk, SOCK_DEAD)) {
6928 +                       meta_sk->sk_state_change(meta_sk);
6929 +                       sk_wake_async(meta_sk, SOCK_WAKE_IO, POLL_OUT);
6930 +               }
6931 +       }
6932 +
6933 +       if (sk->sk_state == TCP_ESTABLISHED) {
6934 +               tcp_sk(sk)->mptcp->establish_increased = 1;
6935 +               tcp_sk(sk)->mpcb->cnt_established++;
6936 +       }
6937 +}
6938 +
6939 +u32 mptcp_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
6940 +u32 mptcp_key_seed = 0;
6941 +
6942 +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn)
6943 +{
6944 +       u32 workspace[SHA_WORKSPACE_WORDS];
6945 +       u32 mptcp_hashed_key[SHA_DIGEST_WORDS];
6946 +       u8 input[64];
6947 +       int i;
6948 +
6949 +       memset(workspace, 0, sizeof(workspace));
6950 +
6951 +       /* Initialize input with appropriate padding */
6952 +       memset(&input[9], 0, sizeof(input) - 10); /* -10, because the last byte
6953 +                                                  * is explicitly set too */
6954 +       memcpy(input, &key, sizeof(key)); /* Copy key to the msg beginning */
6955 +       input[8] = 0x80; /* Padding: First bit after message = 1 */
6956 +       input[63] = 0x40; /* Padding: Length of the message = 64 bits */
6957 +
6958 +       sha_init(mptcp_hashed_key);
6959 +       sha_transform(mptcp_hashed_key, input, workspace);
6960 +
6961 +       for (i = 0; i < 5; i++)
6962 +               mptcp_hashed_key[i] = cpu_to_be32(mptcp_hashed_key[i]);
6963 +
6964 +       if (token)
6965 +               *token = mptcp_hashed_key[0];
6966 +       if (idsn)
6967 +               *idsn = *((u64 *)&mptcp_hashed_key[3]);
6968 +}
6969 +
6970 +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u8 *rand_1, u8 *rand_2,
6971 +                      u32 *hash_out)
6972 +{
6973 +       u32 workspace[SHA_WORKSPACE_WORDS];
6974 +       u8 input[128]; /* 2 512-bit blocks */
6975 +       int i;
6976 +
6977 +       memset(workspace, 0, sizeof(workspace));
6978 +
6979 +       /* Generate key xored with ipad */
6980 +       memset(input, 0x36, 64);
6981 +       for (i = 0; i < 8; i++)
6982 +               input[i] ^= key_1[i];
6983 +       for (i = 0; i < 8; i++)
6984 +               input[i + 8] ^= key_2[i];
6985 +
6986 +       memcpy(&input[64], rand_1, 4);
6987 +       memcpy(&input[68], rand_2, 4);
6988 +       input[72] = 0x80; /* Padding: First bit after message = 1 */
6989 +       memset(&input[73], 0, 53);
6990 +
6991 +       /* Padding: Length of the message = 512 + 64 bits */
6992 +       input[126] = 0x02;
6993 +       input[127] = 0x40;
6994 +
6995 +       sha_init(hash_out);
6996 +       sha_transform(hash_out, input, workspace);
6997 +       memset(workspace, 0, sizeof(workspace));
6998 +
6999 +       sha_transform(hash_out, &input[64], workspace);
7000 +       memset(workspace, 0, sizeof(workspace));
7001 +
7002 +       for (i = 0; i < 5; i++)
7003 +               hash_out[i] = cpu_to_be32(hash_out[i]);
7004 +
7005 +       /* Prepare second part of hmac */
7006 +       memset(input, 0x5C, 64);
7007 +       for (i = 0; i < 8; i++)
7008 +               input[i] ^= key_1[i];
7009 +       for (i = 0; i < 8; i++)
7010 +               input[i + 8] ^= key_2[i];
7011 +
7012 +       memcpy(&input[64], hash_out, 20);
7013 +       input[84] = 0x80;
7014 +       memset(&input[85], 0, 41);
7015 +
7016 +       /* Padding: Length of the message = 512 + 160 bits */
7017 +       input[126] = 0x02;
7018 +       input[127] = 0xA0;
7019 +
7020 +       sha_init(hash_out);
7021 +       sha_transform(hash_out, input, workspace);
7022 +       memset(workspace, 0, sizeof(workspace));
7023 +
7024 +       sha_transform(hash_out, &input[64], workspace);
7025 +
7026 +       for (i = 0; i < 5; i++)
7027 +               hash_out[i] = cpu_to_be32(hash_out[i]);
7028 +}
7029 +
7030 +static void mptcp_mpcb_inherit_sockopts(struct sock *meta_sk, struct sock *master_sk)
7031 +{
7032 +       /* Socket-options handled by mptcp_inherit_sk while creating the meta-sk.
7033 +        * ======
7034 +        * SO_SNDBUF, SO_SNDBUFFORCE, SO_RCVBUF, SO_RCVBUFFORCE, SO_RCVLOWAT,
7035 +        * SO_RCVTIMEO, SO_SNDTIMEO, SO_ATTACH_FILTER, SO_DETACH_FILTER,
7036 +        * TCP_NODELAY, TCP_CORK
7037 +        *
7038 +        * Socket-options handled in this function here
7039 +        * ======
7040 +        * TCP_DEFER_ACCEPT
7041 +        *
7042 +        * Socket-options on the todo-list
7043 +        * ======
7044 +        * SO_BINDTODEVICE - should probably prevent creation of new subsocks
7045 +        *                   across other devices. - what about the api-draft?
7046 +        * SO_DEBUG
7047 +        * SO_REUSEADDR - probably we don't care about this
7048 +        * SO_DONTROUTE, SO_BROADCAST
7049 +        * SO_OOBINLINE
7050 +        * SO_LINGER
7051 +        * SO_TIMESTAMP* - I don't think this is of concern for a SOCK_STREAM
7052 +        * SO_PASSSEC - I don't think this is of concern for a SOCK_STREAM
7053 +        * SO_RXQ_OVFL
7054 +        * TCP_COOKIE_TRANSACTIONS
7055 +        * TCP_MAXSEG
7056 +        * TCP_THIN_* - Handled by mptcp_inherit_sk, but we need to support this
7057 +        *              in mptcp_retransmit_timer. AND we need to check what is
7058 +        *              about the subsockets.
7059 +        * TCP_LINGER2
7060 +        * TCP_WINDOW_CLAMP
7061 +        * TCP_USER_TIMEOUT
7062 +        * TCP_MD5SIG
7063 +        *
7064 +        * Socket-options of no concern for the meta-socket (but for the subsocket)
7065 +        * ======
7066 +        * SO_PRIORITY
7067 +        * SO_MARK
7068 +        * TCP_CONGESTION
7069 +        * TCP_SYNCNT
7070 +        * TCP_QUICKACK
7071 +        * SO_KEEPALIVE
7072 +        */
7073 +
7074 +       /****** DEFER_ACCEPT-handler ******/
7075 +
7076 +       /* DEFER_ACCEPT is not of concern for new subflows - we always accept
7077 +        * them
7078 +        */
7079 +       inet_csk(meta_sk)->icsk_accept_queue.rskq_defer_accept = 0;
7080 +}
7081 +
7082 +static void mptcp_sub_inherit_sockopts(struct sock *meta_sk, struct sock *sub_sk)
7083 +{
7084 +       /* IP_TOS also goes to the subflow. */
7085 +       if (inet_sk(sub_sk)->tos != inet_sk(meta_sk)->tos) {
7086 +               inet_sk(sub_sk)->tos = inet_sk(meta_sk)->tos;
7087 +               sub_sk->sk_priority = meta_sk->sk_priority;
7088 +               sk_dst_reset(sub_sk);
7089 +       }
7090 +
7091 +       /* Inherit SO_REUSEADDR */
7092 +       sub_sk->sk_reuse = meta_sk->sk_reuse;
7093 +
7094 +       /* Inherit snd/rcv-buffer locks */
7095 +       sub_sk->sk_userlocks = meta_sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
7096 +}
7097 +
7098 +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb)
7099 +{
7100 +       /* skb-sk may be NULL if we receive a packet immediatly after the
7101 +        * SYN/ACK + MP_CAPABLE.
7102 +        */
7103 +       struct sock *sk = skb->sk ? skb->sk : meta_sk;
7104 +       int ret = 0;
7105 +
7106 +       skb->sk = NULL;
7107 +
7108 +       if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
7109 +               kfree_skb(skb);
7110 +               return 0;
7111 +       }
7112 +
7113 +       if (sk->sk_family == AF_INET)
7114 +               ret = tcp_v4_do_rcv(sk, skb);
7115 +#if IS_ENABLED(CONFIG_IPV6)
7116 +       else
7117 +               ret = tcp_v6_do_rcv(sk, skb);
7118 +#endif
7119 +
7120 +       sock_put(sk);
7121 +       return ret;
7122 +}
7123 +
7124 +struct lock_class_key meta_key;
7125 +struct lock_class_key meta_slock_key;
7126 +
7127 +/* Code heavily inspired from sk_clone() */
7128 +static int mptcp_inherit_sk(const struct sock *sk, struct sock *newsk,
7129 +                           int family, const gfp_t flags)
7130 +{
7131 +       struct sk_filter *filter;
7132 +       struct proto *prot = newsk->sk_prot;
7133 +       const struct inet_connection_sock_af_ops *af_ops = inet_csk(newsk)->icsk_af_ops;
7134 +#ifdef CONFIG_SECURITY_NETWORK
7135 +       void *sptr = newsk->sk_security;
7136 +#endif
7137 +
7138 +       if (sk->sk_family == AF_INET) {
7139 +               memcpy(newsk, sk, offsetof(struct sock, sk_dontcopy_begin));
7140 +               memcpy(&newsk->sk_dontcopy_end, &sk->sk_dontcopy_end,
7141 +                      sizeof(struct tcp_sock) - offsetof(struct sock, sk_dontcopy_end));
7142 +       } else {
7143 +               memcpy(newsk, sk, offsetof(struct sock, sk_dontcopy_begin));
7144 +               memcpy(&newsk->sk_dontcopy_end, &sk->sk_dontcopy_end,
7145 +                      sizeof(struct tcp6_sock) - offsetof(struct sock, sk_dontcopy_end));
7146 +       }
7147 +
7148 +#ifdef CONFIG_SECURITY_NETWORK
7149 +       newsk->sk_security = sptr;
7150 +       security_sk_clone(sk, newsk);
7151 +#endif
7152 +
7153 +       /* Has been changed by sock_copy above - we may need an IPv6-socket */
7154 +       newsk->sk_family = family;
7155 +       newsk->sk_prot = prot;
7156 +       newsk->sk_prot_creator = prot;
7157 +       inet_csk(newsk)->icsk_af_ops = af_ops;
7158 +
7159 +       /* We don't yet have the mptcp-point. Thus we still need inet_sock_destruct */
7160 +       newsk->sk_destruct = inet_sock_destruct;
7161 +
7162 +       /* SANITY */
7163 +       get_net(sock_net(newsk));
7164 +       sk_node_init(&newsk->sk_node);
7165 +       sock_lock_init_class_and_name(newsk, "slock-AF_INET-MPTCP",
7166 +                                     &meta_slock_key, "sk_lock-AF_INET-MPTCP",
7167 +                                     &meta_key);
7168 +
7169 +       /* Unlocks are in:
7170 +        *
7171 +        * 1. If we are creating the master-sk
7172 +        *      * on client-side in tcp_rcv_state_process, "case TCP_SYN_SENT"
7173 +        *      * on server-side in tcp_child_process
7174 +        * 2. If we are creating another subsock
7175 +        *      * Also in tcp_child_process
7176 +        */
7177 +       bh_lock_sock(newsk);
7178 +       newsk->sk_backlog.head = NULL;
7179 +       newsk->sk_backlog.tail = NULL;
7180 +       newsk->sk_backlog.len = 0;
7181 +
7182 +       atomic_set(&newsk->sk_rmem_alloc, 0);
7183 +       atomic_set(&newsk->sk_wmem_alloc, 1);
7184 +       atomic_set(&newsk->sk_omem_alloc, 0);
7185 +
7186 +       skb_queue_head_init(&newsk->sk_receive_queue);
7187 +       skb_queue_head_init(&newsk->sk_write_queue);
7188 +#ifdef CONFIG_NET_DMA
7189 +       skb_queue_head_init(&newsk->sk_async_wait_queue);
7190 +#endif
7191 +
7192 +       spin_lock_init(&newsk->sk_dst_lock);
7193 +       rwlock_init(&newsk->sk_callback_lock);
7194 +       lockdep_set_class_and_name(&newsk->sk_callback_lock,
7195 +                                  af_callback_keys + newsk->sk_family,
7196 +                                  af_family_clock_key_strings[newsk->sk_family]);
7197 +       newsk->sk_dst_cache     = NULL;
7198 +       newsk->sk_rx_dst        = NULL;
7199 +       newsk->sk_wmem_queued   = 0;
7200 +       newsk->sk_forward_alloc = 0;
7201 +       newsk->sk_send_head     = NULL;
7202 +       newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
7203 +
7204 +       tcp_sk(newsk)->mptcp = NULL;
7205 +
7206 +       sock_reset_flag(newsk, SOCK_DONE);
7207 +       skb_queue_head_init(&newsk->sk_error_queue);
7208 +
7209 +       filter = rcu_dereference_protected(newsk->sk_filter, 1);
7210 +       if (filter != NULL)
7211 +               sk_filter_charge(newsk, filter);
7212 +
7213 +       if (unlikely(xfrm_sk_clone_policy(newsk))) {
7214 +               /* It is still raw copy of parent, so invalidate
7215 +                * destructor and make plain sk_free()
7216 +                */
7217 +               newsk->sk_destruct = NULL;
7218 +               bh_unlock_sock(newsk);
7219 +               sk_free(newsk);
7220 +               newsk = NULL;
7221 +               return -ENOMEM;
7222 +       }
7223 +
7224 +       newsk->sk_err      = 0;
7225 +       newsk->sk_priority = 0;
7226 +       /* Before updating sk_refcnt, we must commit prior changes to memory
7227 +        * (Documentation/RCU/rculist_nulls.txt for details)
7228 +        */
7229 +       smp_wmb();
7230 +       atomic_set(&newsk->sk_refcnt, 2);
7231 +
7232 +       /* Increment the counter in the same struct proto as the master
7233 +        * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
7234 +        * is the same as sk->sk_prot->socks, as this field was copied
7235 +        * with memcpy).
7236 +        *
7237 +        * This _changes_ the previous behaviour, where
7238 +        * tcp_create_openreq_child always was incrementing the
7239 +        * equivalent to tcp_prot->socks (inet_sock_nr), so this have
7240 +        * to be taken into account in all callers. -acme
7241 +        */
7242 +       sk_refcnt_debug_inc(newsk);
7243 +       sk_set_socket(newsk, NULL);
7244 +       newsk->sk_wq = NULL;
7245 +
7246 +       if (newsk->sk_prot->sockets_allocated)
7247 +               percpu_counter_inc(newsk->sk_prot->sockets_allocated);
7248 +
7249 +       if (sock_flag(newsk, SOCK_TIMESTAMP) ||
7250 +           sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
7251 +               net_enable_timestamp();
7252 +
7253 +       return 0;
7254 +}
7255 +
7256 +int mptcp_alloc_mpcb(struct sock *meta_sk, __u64 remote_key, u32 window)
7257 +{
7258 +       struct mptcp_cb *mpcb;
7259 +       struct sock *master_sk;
7260 +       struct inet_connection_sock *master_icsk, *meta_icsk = inet_csk(meta_sk);
7261 +       struct tcp_sock *master_tp, *meta_tp = tcp_sk(meta_sk);
7262 +       struct sk_buff *skb, *tmp;
7263 +       u64 idsn;
7264 +
7265 +       master_sk = sk_prot_alloc(meta_sk->sk_prot, GFP_ATOMIC | __GFP_ZERO,
7266 +                                 meta_sk->sk_family);
7267 +       if (!master_sk)
7268 +               return -ENOBUFS;
7269 +
7270 +       master_tp = tcp_sk(master_sk);
7271 +       master_icsk = inet_csk(master_sk);
7272 +
7273 +       /* Need to set this here - it is needed by mptcp_inherit_sk */
7274 +       master_sk->sk_prot = meta_sk->sk_prot;
7275 +       master_sk->sk_prot_creator = meta_sk->sk_prot;
7276 +       master_icsk->icsk_af_ops = meta_icsk->icsk_af_ops;
7277 +
7278 +       mpcb = kmem_cache_zalloc(mptcp_cb_cache, GFP_ATOMIC);
7279 +       if (!mpcb) {
7280 +               sk_free(master_sk);
7281 +               return -ENOBUFS;
7282 +       }
7283 +
7284 +       /* master_sk inherits from meta_sk */
7285 +       if (mptcp_inherit_sk(meta_sk, master_sk, meta_sk->sk_family, GFP_ATOMIC)) {
7286 +               kmem_cache_free(mptcp_cb_cache, mpcb);
7287 +               return -ENOBUFS;
7288 +       }
7289 +
7290 +#if IS_ENABLED(CONFIG_IPV6)
7291 +       if (meta_icsk->icsk_af_ops == &ipv6_mapped) {
7292 +               struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk);
7293 +
7294 +               inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6;
7295 +
7296 +               newnp = inet6_sk(master_sk);
7297 +               memcpy(newnp, np, sizeof(struct ipv6_pinfo));
7298 +
7299 +               newnp->ipv6_mc_list = NULL;
7300 +               newnp->ipv6_ac_list = NULL;
7301 +               newnp->ipv6_fl_list = NULL;
7302 +               newnp->opt = NULL;
7303 +               newnp->pktoptions = NULL;
7304 +               (void)xchg(&newnp->rxpmtu, NULL);
7305 +       } else if (meta_sk->sk_family == AF_INET6) {
7306 +               struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk);
7307 +
7308 +               inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6;
7309 +
7310 +               newnp = inet6_sk(master_sk);
7311 +               memcpy(newnp, np, sizeof(struct ipv6_pinfo));
7312 +
7313 +               newnp->hop_limit        = -1;
7314 +               newnp->mcast_hops       = IPV6_DEFAULT_MCASTHOPS;
7315 +               newnp->mc_loop  = 1;
7316 +               newnp->pmtudisc = IPV6_PMTUDISC_WANT;
7317 +               newnp->ipv6only = sock_net(master_sk)->ipv6.sysctl.bindv6only;
7318 +       }
7319 +#endif
7320 +
7321 +       meta_tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, GFP_ATOMIC);
7322 +       if (!meta_tp->mptcp) {
7323 +               kmem_cache_free(mptcp_cb_cache, mpcb);
7324 +               sk_free(master_sk);
7325 +               return -ENOBUFS;
7326 +       }
7327 +
7328 +       INIT_LIST_HEAD(&meta_tp->mptcp->cb_list);
7329 +
7330 +       /* Store the keys and generate the peer's token */
7331 +       mpcb->mptcp_loc_key = meta_tp->mptcp_loc_key;
7332 +       mpcb->mptcp_loc_token = meta_tp->mptcp_loc_token;
7333 +
7334 +       /* Generate Initial data-sequence-numbers */
7335 +       mptcp_key_sha1(mpcb->mptcp_loc_key, NULL, &idsn);
7336 +       idsn = ntohll(idsn) + 1;
7337 +       mpcb->snd_high_order[0] = idsn >> 32;
7338 +       mpcb->snd_high_order[1] = mpcb->snd_high_order[0] - 1;
7339 +
7340 +       meta_tp->write_seq = (u32)idsn;
7341 +       meta_tp->snd_sml = meta_tp->write_seq;
7342 +       meta_tp->snd_una = meta_tp->write_seq;
7343 +       meta_tp->snd_nxt = meta_tp->write_seq;
7344 +       meta_tp->pushed_seq = meta_tp->write_seq;
7345 +       meta_tp->snd_up = meta_tp->write_seq;
7346 +
7347 +       mpcb->mptcp_rem_key = remote_key;
7348 +       mptcp_key_sha1(mpcb->mptcp_rem_key, &mpcb->mptcp_rem_token, &idsn);
7349 +       idsn = ntohll(idsn) + 1;
7350 +       mpcb->rcv_high_order[0] = idsn >> 32;
7351 +       mpcb->rcv_high_order[1] = mpcb->rcv_high_order[0] + 1;
7352 +       meta_tp->copied_seq = (u32) idsn;
7353 +       meta_tp->rcv_nxt = (u32) idsn;
7354 +       meta_tp->rcv_wup = (u32) idsn;
7355 +
7356 +       meta_tp->snd_wl1 = meta_tp->rcv_nxt - 1;
7357 +       meta_tp->snd_wnd = window;
7358 +       meta_tp->retrans_stamp = 0; /* Set in tcp_connect() */
7359 +
7360 +       meta_tp->packets_out = 0;
7361 +       meta_tp->mptcp->snt_isn = meta_tp->write_seq; /* Initial data-sequence-number */
7362 +       meta_icsk->icsk_probes_out = 0;
7363 +
7364 +       /* Set mptcp-pointers */
7365 +       master_tp->mpcb = mpcb;
7366 +       master_tp->meta_sk = meta_sk;
7367 +       meta_tp->mpcb = mpcb;
7368 +       meta_tp->meta_sk = meta_sk;
7369 +       mpcb->meta_sk = meta_sk;
7370 +       mpcb->master_sk = master_sk;
7371 +
7372 +       set_mpc(meta_tp);
7373 +       meta_tp->mptcp->attached = 0;
7374 +       meta_tp->was_meta_sk = 0;
7375 +
7376 +       /* Initialize the queues */
7377 +       skb_queue_head_init(&mpcb->reinject_queue);
7378 +       skb_queue_head_init(&master_tp->out_of_order_queue);
7379 +       tcp_prequeue_init(master_tp);
7380 +       INIT_LIST_HEAD(&master_tp->tsq_node);
7381 +
7382 +       master_tp->tsq_flags = 0;
7383 +
7384 +       /* Copy the write-queue from the meta down to the master.
7385 +        * This is necessary to get the SYN to the master-write-queue.
7386 +        * No other data can be queued, before tcp_sendmsg waits for the
7387 +        * connection to finish.
7388 +        */
7389 +       skb_queue_walk_safe(&meta_sk->sk_write_queue, skb, tmp) {
7390 +               skb_unlink(skb, &meta_sk->sk_write_queue);
7391 +               skb_queue_tail(&master_sk->sk_write_queue, skb);
7392 +
7393 +               master_sk->sk_wmem_queued += skb->truesize;
7394 +               sk_mem_charge(master_sk, skb->truesize);
7395 +       }
7396 +
7397 +       meta_sk->sk_wmem_queued = 0;
7398 +       meta_sk->sk_forward_alloc = 0;
7399 +
7400 +       mutex_init(&mpcb->mpcb_mutex);
7401 +
7402 +       /* Init the accept_queue structure, we support a queue of 32 pending
7403 +        * connections, it does not need to be huge, since we only store  here
7404 +        * pending subflow creations.
7405 +        */
7406 +       if (reqsk_queue_alloc(&meta_icsk->icsk_accept_queue, 32, GFP_ATOMIC)) {
7407 +               inet_put_port(master_sk);
7408 +               kmem_cache_free(mptcp_sock_cache, meta_tp->mptcp);
7409 +               kmem_cache_free(mptcp_cb_cache, mpcb);
7410 +               sk_free(master_sk);
7411 +               reset_mpc(meta_tp);
7412 +               return -ENOMEM;
7413 +       }
7414 +
7415 +       /* Redefine function-pointers as the meta-sk is now fully ready */
7416 +       meta_sk->sk_backlog_rcv = mptcp_backlog_rcv;
7417 +       meta_sk->sk_destruct = mptcp_sock_destruct;
7418 +       mpcb->syn_recv_sock = mptcp_syn_recv_sock;
7419 +
7420 +       /* Meta-level retransmit timer */
7421 +       meta_icsk->icsk_rto *= 2; /* Double of initial - rto */
7422 +
7423 +       tcp_init_xmit_timers(master_sk);
7424 +       /* Has been set for sending out the SYN */
7425 +       inet_csk_clear_xmit_timer(meta_sk, ICSK_TIME_RETRANS);
7426 +
7427 +       if (!meta_tp->inside_tk_table) {
7428 +               /* Adding the meta_tp in the token hashtable - coming from server-side */
7429 +               rcu_read_lock();
7430 +               spin_lock(&mptcp_tk_hashlock);
7431 +
7432 +               __mptcp_hash_insert(meta_tp, mpcb->mptcp_loc_token);
7433 +
7434 +               spin_unlock(&mptcp_tk_hashlock);
7435 +               rcu_read_unlock();
7436 +       }
7437 +       master_tp->inside_tk_table = 0;
7438 +
7439 +       /* Init time-wait stuff */
7440 +       INIT_LIST_HEAD(&mpcb->tw_list);
7441 +       spin_lock_init(&mpcb->tw_lock);
7442 +
7443 +       INIT_LIST_HEAD(&mpcb->callback_list);
7444 +
7445 +       mptcp_mpcb_inherit_sockopts(meta_sk, master_sk);
7446 +
7447 +       mpcb->orig_sk_rcvbuf = meta_sk->sk_rcvbuf;
7448 +       mpcb->orig_sk_sndbuf = meta_sk->sk_sndbuf;
7449 +       mpcb->orig_window_clamp = meta_tp->window_clamp;
7450 +
7451 +       /* The meta is directly linked - set refcnt to 1 */
7452 +       atomic_set(&mpcb->mpcb_refcnt, 1);
7453 +
7454 +       mptcp_init_path_manager(mpcb);
7455 +
7456 +       mptcp_debug("%s: created mpcb with token %#x\n",
7457 +                   __func__, mpcb->mptcp_loc_token);
7458 +
7459 +       return 0;
7460 +}
7461 +
7462 +struct sock *mptcp_sk_clone(const struct sock *sk, int family,
7463 +                           const gfp_t priority)
7464 +{
7465 +       struct sock *newsk = NULL;
7466 +
7467 +       if (family == AF_INET && sk->sk_family == AF_INET) {
7468 +               newsk = sk_prot_alloc(&tcp_prot, priority, family);
7469 +               if (!newsk)
7470 +                       return NULL;
7471 +
7472 +               /* Set these pointers - they are needed by mptcp_inherit_sk */
7473 +               newsk->sk_prot = &tcp_prot;
7474 +               newsk->sk_prot_creator = &tcp_prot;
7475 +               inet_csk(newsk)->icsk_af_ops = &ipv4_specific;
7476 +               newsk->sk_family = AF_INET;
7477 +       }
7478 +#if IS_ENABLED(CONFIG_IPV6)
7479 +       else {
7480 +               newsk = sk_prot_alloc(&tcpv6_prot, priority, family);
7481 +               if (!newsk)
7482 +                       return NULL;
7483 +
7484 +               newsk->sk_prot = &tcpv6_prot;
7485 +               newsk->sk_prot_creator = &tcpv6_prot;
7486 +               if (family == AF_INET)
7487 +                       inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
7488 +               else
7489 +                       inet_csk(newsk)->icsk_af_ops = &ipv6_specific;
7490 +               newsk->sk_family = AF_INET6;
7491 +       }
7492 +#endif
7493 +
7494 +       if (mptcp_inherit_sk(sk, newsk, family, priority))
7495 +               return NULL;
7496 +
7497 +       return newsk;
7498 +}
7499 +
7500 +void mptcp_fallback_meta_sk(struct sock *meta_sk)
7501 +{
7502 +       kfree(inet_csk(meta_sk)->icsk_accept_queue.listen_opt);
7503 +       kmem_cache_free(mptcp_sock_cache, tcp_sk(meta_sk)->mptcp);
7504 +       kmem_cache_free(mptcp_cb_cache, tcp_sk(meta_sk)->mpcb);
7505 +}
7506 +
7507 +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id,
7508 +                  gfp_t flags)
7509 +{
7510 +       struct mptcp_cb *mpcb   = tcp_sk(meta_sk)->mpcb;
7511 +       struct tcp_sock *tp     = tcp_sk(sk);
7512 +
7513 +       tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, flags);
7514 +       if (!tp->mptcp)
7515 +               return -ENOMEM;
7516 +
7517 +       tp->mptcp->path_index = mptcp_set_new_pathindex(mpcb);
7518 +       /* No more space for more subflows? */
7519 +       if (!tp->mptcp->path_index) {
7520 +               kmem_cache_free(mptcp_sock_cache, tp->mptcp);
7521 +               return -EPERM;
7522 +       }
7523 +
7524 +       INIT_LIST_HEAD(&tp->mptcp->cb_list);
7525 +
7526 +       tp->mptcp->tp = tp;
7527 +       tp->mpcb = mpcb;
7528 +       tp->meta_sk = meta_sk;
7529 +       set_mpc(tp);
7530 +       tp->mptcp->loc_id = loc_id;
7531 +       tp->mptcp->rem_id = rem_id;
7532 +       tp->mptcp->last_rbuf_opti = tcp_time_stamp;
7533 +
7534 +       /* The corresponding sock_put is in mptcp_sock_destruct(). It cannot be
7535 +        * included in mptcp_del_sock(), because the mpcb must remain alive
7536 +        * until the last subsocket is completely destroyed.
7537 +        */
7538 +       sock_hold(meta_sk);
7539 +       atomic_inc(&mpcb->mpcb_refcnt);
7540 +
7541 +       tp->mptcp->next = mpcb->connection_list;
7542 +       mpcb->connection_list = tp;
7543 +       tp->mptcp->attached = 1;
7544 +
7545 +       mpcb->cnt_subflows++;
7546 +       atomic_add(atomic_read(&((struct sock *)tp)->sk_rmem_alloc),
7547 +                  &meta_sk->sk_rmem_alloc);
7548 +
7549 +       mptcp_sub_inherit_sockopts(meta_sk, sk);
7550 +       INIT_DELAYED_WORK(&tp->mptcp->work, mptcp_sub_close_wq);
7551 +
7552 +       /* As we successfully allocated the mptcp_tcp_sock, we have to
7553 +        * change the function-pointers here (for sk_destruct to work correctly)
7554 +        */
7555 +       sk->sk_error_report = mptcp_sock_def_error_report;
7556 +       sk->sk_data_ready = mptcp_data_ready;
7557 +       sk->sk_write_space = mptcp_write_space;
7558 +       sk->sk_state_change = mptcp_set_state;
7559 +       sk->sk_destruct = mptcp_sock_destruct;
7560 +
7561 +       if (sk->sk_family == AF_INET)
7562 +               mptcp_debug("%s: token %#x pi %d, src_addr:%pI4:%d dst_addr:%pI4:%d, cnt_subflows now %d\n",
7563 +                           __func__ , mpcb->mptcp_loc_token,
7564 +                           tp->mptcp->path_index,
7565 +                           &((struct inet_sock *)tp)->inet_saddr,
7566 +                           ntohs(((struct inet_sock *)tp)->inet_sport),
7567 +                           &((struct inet_sock *)tp)->inet_daddr,
7568 +                           ntohs(((struct inet_sock *)tp)->inet_dport),
7569 +                           mpcb->cnt_subflows);
7570 +#if IS_ENABLED(CONFIG_IPV6)
7571 +       else
7572 +               mptcp_debug("%s: token %#x pi %d, src_addr:%pI6:%d dst_addr:%pI6:%d, cnt_subflows now %d\n",
7573 +                           __func__ , mpcb->mptcp_loc_token,
7574 +                           tp->mptcp->path_index, &inet6_sk(sk)->saddr,
7575 +                           ntohs(((struct inet_sock *)tp)->inet_sport),
7576 +                           &sk->sk_v6_daddr,
7577 +                           ntohs(((struct inet_sock *)tp)->inet_dport),
7578 +                           mpcb->cnt_subflows);
7579 +#endif
7580 +
7581 +       return 0;
7582 +}
7583 +
7584 +void mptcp_del_sock(struct sock *sk)
7585 +{
7586 +       struct tcp_sock *tp = tcp_sk(sk), *tp_prev;
7587 +       struct mptcp_cb *mpcb;
7588 +
7589 +       if (!tp->mptcp || !tp->mptcp->attached)
7590 +               return;
7591 +
7592 +       mpcb = tp->mpcb;
7593 +       tp_prev = mpcb->connection_list;
7594 +
7595 +       mptcp_debug("%s: Removing subsock tok %#x pi:%d state %d is_meta? %d\n",
7596 +                   __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index,
7597 +                   sk->sk_state, is_meta_sk(sk));
7598 +
7599 +       if (tp_prev == tp) {
7600 +               mpcb->connection_list = tp->mptcp->next;
7601 +       } else {
7602 +               for (; tp_prev && tp_prev->mptcp->next; tp_prev = tp_prev->mptcp->next) {
7603 +                       if (tp_prev->mptcp->next == tp) {
7604 +                               tp_prev->mptcp->next = tp->mptcp->next;
7605 +                               break;
7606 +                       }
7607 +               }
7608 +       }
7609 +       mpcb->cnt_subflows--;
7610 +       if (tp->mptcp->establish_increased)
7611 +               mpcb->cnt_established--;
7612 +
7613 +       tp->mptcp->next = NULL;
7614 +       tp->mptcp->attached = 0;
7615 +       mpcb->path_index_bits &= ~(1 << tp->mptcp->path_index);
7616 +
7617 +       if (!skb_queue_empty(&sk->sk_write_queue))
7618 +               mptcp_reinject_data(sk, 0);
7619 +
7620 +       if (is_master_tp(tp))
7621 +               mpcb->master_sk = NULL;
7622 +       else if (tp->mptcp->pre_established)
7623 +               sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
7624 +
7625 +       rcu_assign_pointer(inet_sk(sk)->inet_opt, NULL);
7626 +}
7627 +
7628 +/* Updates the metasocket ULID/port data, based on the given sock.
7629 + * The argument sock must be the sock accessible to the application.
7630 + * In this function, we update the meta socket info, based on the changes
7631 + * in the application socket (bind, address allocation, ...)
7632 + */
7633 +void mptcp_update_metasocket(struct sock *sk, struct sock *meta_sk)
7634 +{
7635 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
7636 +       union inet_addr addr;
7637 +       int index;
7638 +
7639 +       /* Get the index of the local address */
7640 +       if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
7641 +               addr.ip = inet_sk(sk)->inet_saddr;
7642 +               index = mpcb->pm_ops->get_local_index(AF_INET, &addr, sock_net(meta_sk));
7643 +       } else {
7644 +               addr.in6 = inet6_sk(sk)->saddr;
7645 +               index = mpcb->pm_ops->get_local_index(AF_INET6, &addr, sock_net(meta_sk));
7646 +       }
7647 +
7648 +       if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
7649 +               mptcp_v4_add_raddress(mpcb,
7650 +                                     (struct in_addr *)&inet_sk(sk)->inet_daddr,
7651 +                                     0, 0);
7652 +               if (index >= 0)
7653 +                       mptcp_v4_set_init_addr_bit(mpcb, inet_sk(sk)->inet_daddr, index);
7654 +       } else {
7655 +#if IS_ENABLED(CONFIG_IPV6)
7656 +               mptcp_v6_add_raddress(mpcb, &sk->sk_v6_daddr, 0, 0);
7657 +               if (index >= 0)
7658 +                       mptcp_v6_set_init_addr_bit(mpcb, &sk->sk_v6_daddr, index);
7659 +#endif
7660 +       }
7661 +
7662 +       if (mpcb->pm_ops->new_session)
7663 +               mpcb->pm_ops->new_session(meta_sk, index);
7664 +
7665 +       tcp_sk(sk)->mptcp->send_mp_prio = tcp_sk(sk)->mptcp->low_prio;
7666 +}
7667 +
7668 +/* Clean up the receive buffer for full frames taken by the user,
7669 + * then send an ACK if necessary.  COPIED is the number of bytes
7670 + * tcp_recvmsg has given to the user so far, it speeds up the
7671 + * calculation of whether or not we must ACK for the sake of
7672 + * a window update.
7673 + */
7674 +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied)
7675 +{
7676 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
7677 +       struct sock *sk;
7678 +       __u32 rcv_window_now = 0;
7679 +
7680 +       if (copied > 0 && !(meta_sk->sk_shutdown & RCV_SHUTDOWN)) {
7681 +               rcv_window_now = tcp_receive_window(meta_tp);
7682 +
7683 +               if (2 * rcv_window_now > meta_tp->window_clamp)
7684 +                       rcv_window_now = 0;
7685 +       }
7686 +
7687 +       mptcp_for_each_sk(meta_tp->mpcb, sk) {
7688 +               struct tcp_sock *tp = tcp_sk(sk);
7689 +               const struct inet_connection_sock *icsk = inet_csk(sk);
7690 +
7691 +               if (!mptcp_sk_can_send_ack(sk))
7692 +                       continue;
7693 +
7694 +               if (!inet_csk_ack_scheduled(sk))
7695 +                       goto second_part;
7696 +               /* Delayed ACKs frequently hit locked sockets during bulk
7697 +                * receive.
7698 +                */
7699 +               if (icsk->icsk_ack.blocked ||
7700 +                   /* Once-per-two-segments ACK was not sent by tcp_input.c */
7701 +                   tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
7702 +                   /* If this read emptied read buffer, we send ACK, if
7703 +                    * connection is not bidirectional, user drained
7704 +                    * receive buffer and there was a small segment
7705 +                    * in queue.
7706 +                    */
7707 +                   (copied > 0 &&
7708 +                    ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
7709 +                     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
7710 +                      !icsk->icsk_ack.pingpong)) &&
7711 +                    !atomic_read(&meta_sk->sk_rmem_alloc))) {
7712 +                       tcp_send_ack(sk);
7713 +                       continue;
7714 +               }
7715 +
7716 +second_part:
7717 +               /* This here is the second part of tcp_cleanup_rbuf */
7718 +               if (rcv_window_now) {
7719 +                       __u32 new_window = tp->__select_window(sk);
7720 +
7721 +                       /* Send ACK now, if this read freed lots of space
7722 +                        * in our buffer. Certainly, new_window is new window.
7723 +                        * We can advertise it now, if it is not less than
7724 +                        * current one.
7725 +                        * "Lots" means "at least twice" here.
7726 +                        */
7727 +                       if (new_window && new_window >= 2 * rcv_window_now)
7728 +                               tcp_send_ack(sk);
7729 +               }
7730 +       }
7731 +}
7732 +
7733 +static int mptcp_sub_send_fin(struct sock *sk)
7734 +{
7735 +       struct tcp_sock *tp = tcp_sk(sk);
7736 +       struct sk_buff *skb = tcp_write_queue_tail(sk);
7737 +       int mss_now;
7738 +
7739 +       /* Optimization, tack on the FIN if we have a queue of
7740 +        * unsent frames.  But be careful about outgoing SACKS
7741 +        * and IP options.
7742 +        */
7743 +       mss_now = tcp_current_mss(sk);
7744 +
7745 +       if (tcp_send_head(sk) != NULL) {
7746 +               TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
7747 +               TCP_SKB_CB(skb)->end_seq++;
7748 +               tp->write_seq++;
7749 +       } else {
7750 +               skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_ATOMIC);
7751 +               if (!skb)
7752 +                       return 1;
7753 +
7754 +               /* Reserve space for headers and prepare control bits. */
7755 +               skb_reserve(skb, MAX_TCP_HEADER);
7756 +               /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
7757 +               tcp_init_nondata_skb(skb, tp->write_seq,
7758 +                                    TCPHDR_ACK | TCPHDR_FIN);
7759 +               tcp_queue_skb(sk, skb);
7760 +       }
7761 +       __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
7762 +
7763 +       return 0;
7764 +}
7765 +
7766 +void mptcp_sub_close_wq(struct work_struct *work)
7767 +{
7768 +       struct mptcp_tcp_sock *mptcp = container_of(work, struct mptcp_tcp_sock, work.work);
7769 +       struct tcp_sock *tp = mptcp->tp;
7770 +       struct sock *sk = (struct sock *)tp;
7771 +       struct sock *meta_sk = mptcp_meta_sk(sk);
7772 +
7773 +       mutex_lock(&tp->mpcb->mpcb_mutex);
7774 +       lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
7775 +
7776 +       if (sock_flag(sk, SOCK_DEAD))
7777 +               goto exit;
7778 +
7779 +       /* We come from tcp_disconnect. We are sure that meta_sk is set */
7780 +       if (!tp->mpc) {
7781 +               tp->closing = 1;
7782 +               sock_rps_reset_flow(sk);
7783 +               tcp_close(sk, 0);
7784 +               goto exit;
7785 +       }
7786 +
7787 +       if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) {
7788 +               tp->closing = 1;
7789 +               sock_rps_reset_flow(sk);
7790 +               tcp_close(sk, 0);
7791 +       } else if (tcp_close_state(sk)) {
7792 +               sk->sk_shutdown |= SEND_SHUTDOWN;
7793 +               tcp_send_fin(sk);
7794 +       }
7795 +
7796 +exit:
7797 +       release_sock(meta_sk);
7798 +       mutex_unlock(&tp->mpcb->mpcb_mutex);
7799 +       sock_put(sk);
7800 +}
7801 +
7802 +void mptcp_sub_close(struct sock *sk, unsigned long delay)
7803 +{
7804 +       struct tcp_sock *tp = tcp_sk(sk);
7805 +       struct delayed_work *work = &tcp_sk(sk)->mptcp->work;
7806 +
7807 +       /* We are already closing - e.g., call from sock_def_error_report upon
7808 +        * tcp_disconnect in tcp_close.
7809 +        */
7810 +       if (tp->closing)
7811 +               return;
7812 +
7813 +       /* Work already scheduled ? */
7814 +       if (work_pending(&work->work)) {
7815 +               /* Work present - who will be first ? */
7816 +               if (jiffies + delay > work->timer.expires)
7817 +                       return;
7818 +
7819 +               /* Try canceling - if it fails, work will be executed soon */
7820 +               if (!cancel_delayed_work(work))
7821 +                       return;
7822 +               sock_put(sk);
7823 +       }
7824 +
7825 +       if (!delay) {
7826 +               unsigned char old_state = sk->sk_state;
7827 +
7828 +               /* If we are in user-context we can directly do the closing
7829 +                * procedure. No need to schedule a work-queue.
7830 +                */
7831 +               if (!in_softirq()) {
7832 +                       if (sock_flag(sk, SOCK_DEAD))
7833 +                               return;
7834 +
7835 +                       if (!tp->mpc) {
7836 +                               tp->closing = 1;
7837 +                               sock_rps_reset_flow(sk);
7838 +                               tcp_close(sk, 0);
7839 +                               return;
7840 +                       }
7841 +
7842 +                       if (mptcp_meta_sk(sk)->sk_shutdown == SHUTDOWN_MASK ||
7843 +                           sk->sk_state == TCP_CLOSE) {
7844 +                               tp->closing = 1;
7845 +                               sock_rps_reset_flow(sk);
7846 +                               tcp_close(sk, 0);
7847 +                       } else if (tcp_close_state(sk)) {
7848 +                               sk->sk_shutdown |= SEND_SHUTDOWN;
7849 +                               tcp_send_fin(sk);
7850 +                       }
7851 +
7852 +                       return;
7853 +               }
7854 +
7855 +               /* We directly send the FIN. Because it may take so a long time,
7856 +                * untile the work-queue will get scheduled...
7857 +                *
7858 +                * If mptcp_sub_send_fin returns 1, it failed and thus we reset
7859 +                * the old state so that tcp_close will finally send the fin
7860 +                * in user-context.
7861 +                */
7862 +               if (!sk->sk_err && old_state != TCP_CLOSE &&
7863 +                   tcp_close_state(sk) && mptcp_sub_send_fin(sk)) {
7864 +                       if (old_state == TCP_ESTABLISHED)
7865 +                               TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
7866 +                       sk->sk_state = old_state;
7867 +               }
7868 +       }
7869 +
7870 +       sock_hold(sk);
7871 +       queue_delayed_work(mptcp_wq, work, delay);
7872 +}
7873 +
7874 +void mptcp_sub_force_close(struct sock *sk)
7875 +{
7876 +       /* The below tcp_done may have freed the socket, if he is already dead.
7877 +        * Thus, we are not allowed to access it afterwards. That's why
7878 +        * we have to store the dead-state in this local variable.
7879 +        */
7880 +       int sock_is_dead = sock_flag(sk, SOCK_DEAD);
7881 +
7882 +       tcp_sk(sk)->mp_killed = 1;
7883 +
7884 +       if (sk->sk_state != TCP_CLOSE)
7885 +               tcp_done(sk);
7886 +
7887 +       if (!sock_is_dead)
7888 +               mptcp_sub_close(sk, 0);
7889 +}
7890 +EXPORT_SYMBOL(mptcp_sub_force_close);
7891 +
7892 +/* Update the mpcb send window, based on the contributions
7893 + * of each subflow
7894 + */
7895 +void mptcp_update_sndbuf(struct mptcp_cb *mpcb)
7896 +{
7897 +       struct sock *meta_sk = mpcb->meta_sk, *sk;
7898 +       int new_sndbuf = 0, old_sndbuf = meta_sk->sk_sndbuf;
7899 +       mptcp_for_each_sk(mpcb, sk) {
7900 +               if (!mptcp_sk_can_send(sk))
7901 +                       continue;
7902 +
7903 +               new_sndbuf += sk->sk_sndbuf;
7904 +
7905 +               if (new_sndbuf > sysctl_tcp_wmem[2] || new_sndbuf < 0) {
7906 +                       new_sndbuf = sysctl_tcp_wmem[2];
7907 +                       break;
7908 +               }
7909 +       }
7910 +       meta_sk->sk_sndbuf = max(min(new_sndbuf, sysctl_tcp_wmem[2]), meta_sk->sk_sndbuf);
7911 +
7912 +       /* The subflow's call to sk_write_space in tcp_new_space ends up in
7913 +        * mptcp_write_space.
7914 +        * It has nothing to do with waking up the application.
7915 +        * So, we do it here.
7916 +        */
7917 +       if (old_sndbuf != meta_sk->sk_sndbuf)
7918 +               meta_sk->sk_write_space(meta_sk);
7919 +}
7920 +
7921 +void mptcp_close(struct sock *meta_sk, long timeout)
7922 +{
7923 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
7924 +       struct sock *sk_it, *tmpsk;
7925 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
7926 +       struct sk_buff *skb;
7927 +       int data_was_unread = 0;
7928 +       int state;
7929 +
7930 +       mptcp_debug("%s: Close of meta_sk with tok %#x\n",
7931 +                   __func__, mpcb->mptcp_loc_token);
7932 +
7933 +       mutex_lock(&mpcb->mpcb_mutex);
7934 +       lock_sock(meta_sk);
7935 +
7936 +       if (meta_tp->inside_tk_table) {
7937 +               /* Detach the mpcb from the token hashtable */
7938 +               mptcp_hash_remove_bh(meta_tp);
7939 +               reqsk_queue_destroy(&inet_csk(meta_sk)->icsk_accept_queue);
7940 +       }
7941 +
7942 +       meta_sk->sk_shutdown = SHUTDOWN_MASK;
7943 +       /* We need to flush the recv. buffs.  We do this only on the
7944 +        * descriptor close, not protocol-sourced closes, because the
7945 +        * reader process may not have drained the data yet!
7946 +        */
7947 +       while ((skb = __skb_dequeue(&meta_sk->sk_receive_queue)) != NULL) {
7948 +               u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
7949 +                         tcp_hdr(skb)->fin;
7950 +               data_was_unread += len;
7951 +               __kfree_skb(skb);
7952 +       }
7953 +
7954 +       sk_mem_reclaim(meta_sk);
7955 +
7956 +       /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
7957 +       if (meta_sk->sk_state == TCP_CLOSE) {
7958 +               mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
7959 +                       if (tcp_sk(sk_it)->send_mp_fclose)
7960 +                               continue;
7961 +                       mptcp_sub_close(sk_it, 0);
7962 +               }
7963 +               goto adjudge_to_death;
7964 +       }
7965 +
7966 +       if (data_was_unread) {
7967 +               /* Unread data was tossed, zap the connection. */
7968 +               NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONCLOSE);
7969 +               tcp_set_state(meta_sk, TCP_CLOSE);
7970 +               tcp_send_active_reset(meta_sk, meta_sk->sk_allocation);
7971 +       } else if (sock_flag(meta_sk, SOCK_LINGER) && !meta_sk->sk_lingertime) {
7972 +               /* Check zero linger _after_ checking for unread data. */
7973 +               meta_sk->sk_prot->disconnect(meta_sk, 0);
7974 +               NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
7975 +       } else if (tcp_close_state(meta_sk)) {
7976 +               mptcp_send_fin(meta_sk);
7977 +       } else if (meta_tp->snd_una == meta_tp->write_seq) {
7978 +               /* The DATA_FIN has been sent and acknowledged
7979 +                * (e.g., by sk_shutdown). Close all the other subflows
7980 +                */
7981 +               mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
7982 +                       unsigned long delay = 0;
7983 +                       /* If we are the passive closer, don't trigger
7984 +                        * subflow-fin until the subflow has been finned
7985 +                        * by the peer. - thus we add a delay
7986 +                        */
7987 +                       if (mpcb->passive_close &&
7988 +                           sk_it->sk_state == TCP_ESTABLISHED)
7989 +                               delay = inet_csk(sk_it)->icsk_rto << 3;
7990 +
7991 +                       mptcp_sub_close(sk_it, delay);
7992 +               }
7993 +       }
7994 +
7995 +       sk_stream_wait_close(meta_sk, timeout);
7996 +
7997 +adjudge_to_death:
7998 +       state = meta_sk->sk_state;
7999 +       sock_hold(meta_sk);
8000 +       sock_orphan(meta_sk);
8001 +
8002 +       /* socket will be freed after mptcp_close - we have to prevent
8003 +        * access from the subflows.
8004 +        */
8005 +       mptcp_for_each_sk(mpcb, sk_it) {
8006 +               /* Similar to sock_orphan, but we don't set it DEAD, because
8007 +                * the callbacks are still set and must be called.
8008 +                */
8009 +               write_lock_bh(&sk_it->sk_callback_lock);
8010 +               sk_set_socket(sk_it, NULL);
8011 +               sk_it->sk_wq  = NULL;
8012 +               write_unlock_bh(&sk_it->sk_callback_lock);
8013 +       }
8014 +
8015 +       /* It is the last release_sock in its life. It will remove backlog. */
8016 +       release_sock(meta_sk);
8017 +
8018 +       /* Now socket is owned by kernel and we acquire BH lock
8019 +        * to finish close. No need to check for user refs.
8020 +        */
8021 +       local_bh_disable();
8022 +       bh_lock_sock(meta_sk);
8023 +       WARN_ON(sock_owned_by_user(meta_sk));
8024 +
8025 +       percpu_counter_inc(meta_sk->sk_prot->orphan_count);
8026 +
8027 +       /* Have we already been destroyed by a softirq or backlog? */
8028 +       if (state != TCP_CLOSE && meta_sk->sk_state == TCP_CLOSE)
8029 +               goto out;
8030 +
8031 +       /*      This is a (useful) BSD violating of the RFC. There is a
8032 +        *      problem with TCP as specified in that the other end could
8033 +        *      keep a socket open forever with no application left this end.
8034 +        *      We use a 3 minute timeout (about the same as BSD) then kill
8035 +        *      our end. If they send after that then tough - BUT: long enough
8036 +        *      that we won't make the old 4*rto = almost no time - whoops
8037 +        *      reset mistake.
8038 +        *
8039 +        *      Nope, it was not mistake. It is really desired behaviour
8040 +        *      f.e. on http servers, when such sockets are useless, but
8041 +        *      consume significant resources. Let's do it with special
8042 +        *      linger2 option.                                 --ANK
8043 +        */
8044 +
8045 +       if (meta_sk->sk_state == TCP_FIN_WAIT2) {
8046 +               if (meta_tp->linger2 < 0) {
8047 +                       tcp_set_state(meta_sk, TCP_CLOSE);
8048 +                       tcp_send_active_reset(meta_sk, GFP_ATOMIC);
8049 +                       NET_INC_STATS_BH(sock_net(meta_sk),
8050 +                                        LINUX_MIB_TCPABORTONLINGER);
8051 +               } else {
8052 +                       const int tmo = tcp_fin_time(meta_sk);
8053 +
8054 +                       if (tmo > TCP_TIMEWAIT_LEN) {
8055 +                               inet_csk_reset_keepalive_timer(meta_sk,
8056 +                                                              tmo - TCP_TIMEWAIT_LEN);
8057 +                       } else {
8058 +                               tcp_time_wait(meta_sk, TCP_FIN_WAIT2, tmo);
8059 +                               goto out;
8060 +                       }
8061 +               }
8062 +       }
8063 +       if (meta_sk->sk_state != TCP_CLOSE) {
8064 +               sk_mem_reclaim(meta_sk);
8065 +               if (tcp_too_many_orphans(meta_sk, 0)) {
8066 +                       if (net_ratelimit())
8067 +                               pr_info("MPTCP: too many of orphaned sockets\n");
8068 +                       tcp_set_state(meta_sk, TCP_CLOSE);
8069 +                       tcp_send_active_reset(meta_sk, GFP_ATOMIC);
8070 +                       NET_INC_STATS_BH(sock_net(meta_sk),
8071 +                                        LINUX_MIB_TCPABORTONMEMORY);
8072 +               }
8073 +       }
8074 +
8075 +
8076 +       if (meta_sk->sk_state == TCP_CLOSE)
8077 +               inet_csk_destroy_sock(meta_sk);
8078 +       /* Otherwise, socket is reprieved until protocol close. */
8079 +
8080 +out:
8081 +       bh_unlock_sock(meta_sk);
8082 +       local_bh_enable();
8083 +       mutex_unlock(&mpcb->mpcb_mutex);
8084 +       sock_put(meta_sk); /* Taken by sock_hold */
8085 +}
8086 +
8087 +void mptcp_disconnect(struct sock *sk)
8088 +{
8089 +       struct sock *subsk, *tmpsk;
8090 +       struct tcp_sock *tp = tcp_sk(sk);
8091 +
8092 +       __skb_queue_purge(&tp->mpcb->reinject_queue);
8093 +
8094 +       if (tp->inside_tk_table) {
8095 +               mptcp_hash_remove_bh(tp);
8096 +               reqsk_queue_destroy(&inet_csk(tp->meta_sk)->icsk_accept_queue);
8097 +       }
8098 +
8099 +       local_bh_disable();
8100 +       mptcp_for_each_sk_safe(tp->mpcb, subsk, tmpsk) {
8101 +               /* The socket will get removed from the subsocket-list
8102 +                * and made non-mptcp by setting mpc to 0.
8103 +                *
8104 +                * This is necessary, because tcp_disconnect assumes
8105 +                * that the connection is completly dead afterwards.
8106 +                * Thus we need to do a mptcp_del_sock. Due to this call
8107 +                * we have to make it non-mptcp.
8108 +                *
8109 +                * We have to lock the socket, because we set mpc to 0.
8110 +                * An incoming packet would take the subsocket's lock
8111 +                * and go on into the receive-path.
8112 +                * This would be a race.
8113 +                */
8114 +
8115 +               bh_lock_sock(subsk);
8116 +               mptcp_del_sock(subsk);
8117 +               reset_mpc(tcp_sk(subsk));
8118 +               mptcp_sub_force_close(subsk);
8119 +               bh_unlock_sock(subsk);
8120 +       }
8121 +       local_bh_enable();
8122 +
8123 +       tp->was_meta_sk = 1;
8124 +       reset_mpc(tp);
8125 +}
8126 +
8127 +
8128 +/* Returns 1 if we should enable MPTCP for that socket. */
8129 +int mptcp_doit(struct sock *sk)
8130 +{
8131 +       /* Do not allow MPTCP enabling if the MPTCP initialization failed */
8132 +       if (mptcp_init_failed)
8133 +               return 0;
8134 +
8135 +       if (sysctl_mptcp_enabled == MPTCP_APP && !tcp_sk(sk)->mptcp_enabled)
8136 +               return 0;
8137 +
8138 +       /* Socket may already be established (e.g., called from tcp_recvmsg) */
8139 +       if (tcp_sk(sk)->mpc || tcp_sk(sk)->request_mptcp)
8140 +               return 1;
8141 +
8142 +       /* Don't do mptcp over loopback */
8143 +       if (sk->sk_family == AF_INET &&
8144 +           (ipv4_is_loopback(inet_sk(sk)->inet_daddr) ||
8145 +            ipv4_is_loopback(inet_sk(sk)->inet_saddr)))
8146 +               return 0;
8147 +#if IS_ENABLED(CONFIG_IPV6)
8148 +       if (sk->sk_family == AF_INET6 &&
8149 +           (ipv6_addr_loopback(&sk->sk_v6_daddr) ||
8150 +            ipv6_addr_loopback(&inet6_sk(sk)->saddr)))
8151 +               return 0;
8152 +#endif
8153 +       if (mptcp_v6_is_v4_mapped(sk) &&
8154 +           ipv4_is_loopback(inet_sk(sk)->inet_saddr))
8155 +               return 0;
8156 +
8157 +#ifdef CONFIG_TCP_MD5SIG
8158 +       /* If TCP_MD5SIG is enabled, do not do MPTCP - there is no Option-Space */
8159 +       if (tcp_sk(sk)->af_specific->md5_lookup(sk, sk))
8160 +               return 0;
8161 +#endif
8162 +
8163 +       return 1;
8164 +}
8165 +
8166 +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, u32 window)
8167 +{
8168 +       struct tcp_sock *master_tp;
8169 +       struct sock *master_sk;
8170 +
8171 +       if (mptcp_alloc_mpcb(meta_sk, remote_key, window))
8172 +               goto err_alloc_mpcb;
8173 +
8174 +       master_sk = tcp_sk(meta_sk)->mpcb->master_sk;
8175 +       master_tp = tcp_sk(master_sk);
8176 +
8177 +       if (mptcp_add_sock(meta_sk, master_sk, 0, 0, GFP_ATOMIC))
8178 +               goto err_add_sock;
8179 +
8180 +       if (__inet_inherit_port(meta_sk, master_sk) < 0)
8181 +               goto err_add_sock;
8182 +
8183 +       meta_sk->sk_prot->unhash(meta_sk);
8184 +
8185 +       if (master_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(master_sk))
8186 +               __inet_hash_nolisten(master_sk, NULL);
8187 +#if IS_ENABLED(CONFIG_IPV6)
8188 +       else
8189 +               __inet6_hash(master_sk, NULL);
8190 +#endif
8191 +
8192 +       master_tp->mptcp->init_rcv_wnd = master_tp->rcv_wnd;
8193 +
8194 +       return 0;
8195 +
8196 +err_add_sock:
8197 +       mptcp_fallback_meta_sk(meta_sk);
8198 +
8199 +       inet_csk_prepare_forced_close(master_sk);
8200 +       tcp_done(master_sk);
8201 +       inet_csk_prepare_forced_close(meta_sk);
8202 +       tcp_done(meta_sk);
8203 +
8204 +err_alloc_mpcb:
8205 +       return -ENOBUFS;
8206 +}
8207 +
8208 +int mptcp_check_req_master(struct sock *sk, struct sock *child,
8209 +                          struct request_sock *req,
8210 +                          struct request_sock **prev,
8211 +                          struct mptcp_options_received *mopt)
8212 +{
8213 +       struct tcp_sock *child_tp = tcp_sk(child);
8214 +       struct sock *meta_sk = child;
8215 +       struct mptcp_cb *mpcb;
8216 +       struct mptcp_request_sock *mtreq;
8217 +
8218 +       if (!tcp_rsk(req)->saw_mpc)
8219 +               return 1;
8220 +
8221 +       /* Just set this values to pass them to mptcp_alloc_mpcb */
8222 +       mtreq = mptcp_rsk(req);
8223 +       child_tp->mptcp_loc_key = mtreq->mptcp_loc_key;
8224 +       child_tp->mptcp_loc_token = mtreq->mptcp_loc_token;
8225 +
8226 +       if (mptcp_create_master_sk(meta_sk, mtreq->mptcp_rem_key,
8227 +                                  child_tp->snd_wnd))
8228 +               return -ENOBUFS;
8229 +
8230 +       child = tcp_sk(child)->mpcb->master_sk;
8231 +       child_tp = tcp_sk(child);
8232 +       mpcb = child_tp->mpcb;
8233 +
8234 +       child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn;
8235 +       child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn;
8236 +
8237 +       mpcb->dss_csum = mtreq->dss_csum;
8238 +       mpcb->server_side = 1;
8239 +
8240 +       /* Will be moved to ESTABLISHED by  tcp_rcv_state_process() */
8241 +       mptcp_update_metasocket(child, meta_sk);
8242 +
8243 +       /* Needs to be done here additionally, because when accepting a
8244 +        * new connection we pass by __reqsk_free and not reqsk_free.
8245 +        */
8246 +       mptcp_reqsk_remove_tk(req);
8247 +
8248 +        /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */
8249 +       sock_put(meta_sk);
8250 +
8251 +       inet_csk_reqsk_queue_unlink(sk, req, prev);
8252 +       inet_csk_reqsk_queue_removed(sk, req);
8253 +       inet_csk_reqsk_queue_add(sk, req, meta_sk);
8254 +
8255 +       return 0;
8256 +}
8257 +
8258 +struct sock *mptcp_check_req_child(struct sock *meta_sk, struct sock *child,
8259 +                                  struct request_sock *req,
8260 +                                  struct request_sock **prev,
8261 +                                  struct mptcp_options_received *mopt)
8262 +{
8263 +       struct tcp_sock *child_tp = tcp_sk(child);
8264 +       struct mptcp_request_sock *mtreq = mptcp_rsk(req);
8265 +       struct mptcp_cb *mpcb = mtreq->mpcb;
8266 +       u8 hash_mac_check[20];
8267 +
8268 +       child_tp->inside_tk_table = 0;
8269 +
8270 +       if (!mopt->join_ack)
8271 +               goto teardown;
8272 +
8273 +       mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key,
8274 +                       (u8 *)&mpcb->mptcp_loc_key,
8275 +                       (u8 *)&mtreq->mptcp_rem_nonce,
8276 +                       (u8 *)&mtreq->mptcp_loc_nonce,
8277 +                       (u32 *)hash_mac_check);
8278 +
8279 +       if (memcmp(hash_mac_check, (char *)&mopt->mptcp_recv_mac, 20))
8280 +               goto teardown;
8281 +
8282 +       /* Point it to the same struct socket and wq as the meta_sk */
8283 +       sk_set_socket(child, meta_sk->sk_socket);
8284 +       child->sk_wq = meta_sk->sk_wq;
8285 +
8286 +       if (mptcp_add_sock(meta_sk, child, mtreq->loc_id, mtreq->rem_id, GFP_ATOMIC)) {
8287 +               reset_mpc(child_tp); /* Has been inherited, but now
8288 +                                     * child_tp->mptcp is NULL
8289 +                                     */
8290 +               /* TODO when we support acking the third ack for new subflows,
8291 +                * we should silently discard this third ack, by returning NULL.
8292 +                *
8293 +                * Maybe, at the retransmission we will have enough memory to
8294 +                * fully add the socket to the meta-sk.
8295 +                */
8296 +               goto teardown;
8297 +       }
8298 +
8299 +       /* The child is a clone of the meta socket, we must now reset
8300 +        * some of the fields
8301 +        */
8302 +       child_tp->mptcp->rcv_low_prio = mtreq->low_prio;
8303 +
8304 +       /* We should allow proper increase of the snd/rcv-buffers. Thus, we
8305 +        * use the original values instead of the bloated up ones from the
8306 +        * clone.
8307 +        */
8308 +       child->sk_sndbuf = mpcb->orig_sk_sndbuf;
8309 +       child->sk_rcvbuf = mpcb->orig_sk_rcvbuf;
8310 +
8311 +       child_tp->mptcp->slave_sk = 1;
8312 +       child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn;
8313 +       child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn;
8314 +       child_tp->mptcp->init_rcv_wnd = req->rcv_wnd;
8315 +
8316 +       child_tp->tsq_flags = 0;
8317 +
8318 +       /* Subflows do not use the accept queue, as they
8319 +        * are attached immediately to the mpcb.
8320 +        */
8321 +       inet_csk_reqsk_queue_drop(meta_sk, req, prev);
8322 +       return child;
8323 +
8324 +teardown:
8325 +       /* Drop this request - sock creation failed. */
8326 +       inet_csk_reqsk_queue_drop(meta_sk, req, prev);
8327 +       inet_csk_prepare_forced_close(child);
8328 +       tcp_done(child);
8329 +       return meta_sk;
8330 +}
8331 +
8332 +int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw)
8333 +{
8334 +       struct mptcp_tw *mptw;
8335 +       struct tcp_sock *tp = tcp_sk(sk);
8336 +       struct mptcp_cb *mpcb = tp->mpcb;
8337 +
8338 +       /* Alloc MPTCP-tw-sock */
8339 +       mptw = kmem_cache_alloc(mptcp_tw_cache, GFP_ATOMIC);
8340 +       if (!mptw)
8341 +               return -ENOBUFS;
8342 +
8343 +       atomic_inc(&mpcb->mpcb_refcnt);
8344 +
8345 +       tw->mptcp_tw = mptw;
8346 +       mptw->loc_key = mpcb->mptcp_loc_key;
8347 +       mptw->meta_tw = mpcb->in_time_wait;
8348 +       if (mptw->meta_tw) {
8349 +               mptw->rcv_nxt = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp));
8350 +               if (mpcb->mptw_state != TCP_TIME_WAIT)
8351 +                       mptw->rcv_nxt++;
8352 +       }
8353 +       rcu_assign_pointer(mptw->mpcb, mpcb);
8354 +
8355 +       spin_lock(&mpcb->tw_lock);
8356 +       list_add_rcu(&mptw->list, &tp->mpcb->tw_list);
8357 +       mptw->in_list = 1;
8358 +       spin_unlock(&mpcb->tw_lock);
8359 +
8360 +       return 0;
8361 +}
8362 +
8363 +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw)
8364 +{
8365 +       struct mptcp_cb *mpcb;
8366 +
8367 +       rcu_read_lock();
8368 +       mpcb = rcu_dereference(tw->mptcp_tw->mpcb);
8369 +
8370 +       /* If we are still holding a ref to the mpcb, we have to remove ourself
8371 +        * from the list and drop the ref properly.
8372 +        */
8373 +       if (mpcb && atomic_inc_not_zero(&mpcb->mpcb_refcnt)) {
8374 +               spin_lock(&mpcb->tw_lock);
8375 +               if (tw->mptcp_tw->in_list) {
8376 +                       list_del_rcu(&tw->mptcp_tw->list);
8377 +                       tw->mptcp_tw->in_list = 0;
8378 +               }
8379 +               spin_unlock(&mpcb->tw_lock);
8380 +
8381 +               /* Twice, because we increased it above */
8382 +               mptcp_mpcb_put(mpcb);
8383 +               mptcp_mpcb_put(mpcb);
8384 +       }
8385 +
8386 +       rcu_read_unlock();
8387 +
8388 +       kmem_cache_free(mptcp_tw_cache, tw->mptcp_tw);
8389 +}
8390 +
8391 +/* Updates the rcv_nxt of the time-wait-socks and allows them to ack a
8392 + * data-fin.
8393 + */
8394 +void mptcp_update_tw_socks(const struct tcp_sock *tp, int state)
8395 +{
8396 +       struct mptcp_tw *mptw;
8397 +
8398 +       /* Used for sockets that go into tw after the meta
8399 +        * (see mptcp_time_wait())
8400 +        */
8401 +       tp->mpcb->in_time_wait = 1;
8402 +       tp->mpcb->mptw_state = state;
8403 +
8404 +       /* Update the time-wait-sock's information */
8405 +       rcu_read_lock_bh();
8406 +       list_for_each_entry_rcu(mptw, &tp->mpcb->tw_list, list) {
8407 +               mptw->meta_tw = 1;
8408 +               mptw->rcv_nxt = mptcp_get_rcv_nxt_64(tp);
8409 +
8410 +               /* We want to ack a DATA_FIN, but are yet in FIN_WAIT_2 -
8411 +                * pretend as if the DATA_FIN has already reached us, that way
8412 +                * the checks in tcp_timewait_state_process will be good as the
8413 +                * DATA_FIN comes in.
8414 +                */
8415 +               if (state != TCP_TIME_WAIT)
8416 +                       mptw->rcv_nxt++;
8417 +       }
8418 +       rcu_read_unlock_bh();
8419 +}
8420 +
8421 +void mptcp_tsq_flags(struct sock *sk)
8422 +{
8423 +       struct tcp_sock *tp = tcp_sk(sk);
8424 +       struct sock *meta_sk = mptcp_meta_sk(sk);
8425 +
8426 +       /* It will be handled as a regular deferred-call */
8427 +       if (is_meta_sk(sk))
8428 +               return;
8429 +
8430 +       if (list_empty(&tp->mptcp->cb_list)) {
8431 +               list_add(&tp->mptcp->cb_list, &tp->mpcb->callback_list);
8432 +               /* We need to hold it here, as the sock_hold is not assured
8433 +                * by the release_sock as it is done in regular TCP.
8434 +                *
8435 +                * The subsocket may get inet_csk_destroy'd while it is inside
8436 +                * the callback_list.
8437 +                */
8438 +               sock_hold(sk);
8439 +       }
8440 +
8441 +       if (!test_and_set_bit(MPTCP_SUB_DEFERRED, &tcp_sk(meta_sk)->tsq_flags))
8442 +               sock_hold(meta_sk);
8443 +}
8444 +
8445 +void mptcp_tsq_sub_deferred(struct sock *meta_sk)
8446 +{
8447 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
8448 +       struct mptcp_tcp_sock *mptcp, *tmp;
8449 +
8450 +       BUG_ON(!is_meta_sk(meta_sk) && !meta_tp->was_meta_sk);
8451 +
8452 +       __sock_put(meta_sk);
8453 +       list_for_each_entry_safe(mptcp, tmp, &meta_tp->mpcb->callback_list, cb_list) {
8454 +               struct tcp_sock *tp = mptcp->tp;
8455 +               struct sock *sk = (struct sock *)tp;
8456 +
8457 +               list_del_init(&mptcp->cb_list);
8458 +               sk->sk_prot->release_cb(sk);
8459 +               /* Final sock_put (cfr. mptcp_tsq_flags */
8460 +               sock_put(sk);
8461 +       }
8462 +}
8463 +
8464 +struct workqueue_struct *mptcp_wq;
8465 +EXPORT_SYMBOL(mptcp_wq);
8466 +
8467 +/* Output /proc/net/mptcp */
8468 +static int mptcp_pm_seq_show(struct seq_file *seq, void *v)
8469 +{
8470 +       struct tcp_sock *meta_tp;
8471 +       struct net *net = seq->private;
8472 +       int i, n = 0;
8473 +
8474 +       seq_printf(seq, "  sl  loc_tok  rem_tok  v6 "
8475 +                  "local_address                         "
8476 +                  "remote_address                        "
8477 +                  "st ns tx_queue rx_queue inode");
8478 +       seq_putc(seq, '\n');
8479 +
8480 +       for (i = 0; i < MPTCP_HASH_SIZE; i++) {
8481 +               struct hlist_nulls_node *node;
8482 +               rcu_read_lock_bh();
8483 +               hlist_nulls_for_each_entry_rcu(meta_tp, node,
8484 +                                              &tk_hashtable[i], tk_table) {
8485 +                       struct mptcp_cb *mpcb = meta_tp->mpcb;
8486 +                       struct sock *meta_sk = (struct sock *)meta_tp;
8487 +                       struct inet_sock *isk = inet_sk(meta_sk);
8488 +
8489 +                       if (!meta_tp->mpc || !net_eq(net, sock_net(meta_sk)))
8490 +                               continue;
8491 +
8492 +                       seq_printf(seq, "%4d: %04X %04X ", n++,
8493 +                                  mpcb->mptcp_loc_token,
8494 +                                  mpcb->mptcp_rem_token);
8495 +                       if (meta_sk->sk_family == AF_INET ||
8496 +                           mptcp_v6_is_v4_mapped(meta_sk)) {
8497 +                               seq_printf(seq, " 0 %08X:%04X                         %08X:%04X                        ",
8498 +                                          isk->inet_saddr,
8499 +                                          ntohs(isk->inet_sport),
8500 +                                          isk->inet_daddr,
8501 +                                          ntohs(isk->inet_dport));
8502 +#if IS_ENABLED(CONFIG_IPV6)
8503 +                       } else if (meta_sk->sk_family == AF_INET6) {
8504 +                               struct in6_addr *src = &isk->pinet6->saddr;
8505 +                               struct in6_addr *dst = &meta_sk->sk_v6_daddr;
8506 +                               seq_printf(seq, " 1 %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X",
8507 +                                          src->s6_addr32[0], src->s6_addr32[1],
8508 +                                          src->s6_addr32[2], src->s6_addr32[3],
8509 +                                          ntohs(isk->inet_sport),
8510 +                                          dst->s6_addr32[0], dst->s6_addr32[1],
8511 +                                          dst->s6_addr32[2], dst->s6_addr32[3],
8512 +                                          ntohs(isk->inet_dport));
8513 +#endif
8514 +                       }
8515 +                       seq_printf(seq, " %02X %02X %08X:%08X %lu",
8516 +                                  meta_sk->sk_state, mpcb->cnt_subflows,
8517 +                                  meta_tp->write_seq - meta_tp->snd_una,
8518 +                                  max_t(int, meta_tp->rcv_nxt -
8519 +                                        meta_tp->copied_seq, 0),
8520 +                                  sock_i_ino(meta_sk));
8521 +                       seq_putc(seq, '\n');
8522 +               }
8523 +               rcu_read_unlock_bh();
8524 +       }
8525 +
8526 +       return 0;
8527 +}
8528 +
8529 +static int mptcp_pm_seq_open(struct inode *inode, struct file *file)
8530 +{
8531 +       return single_open_net(inode, file, mptcp_pm_seq_show);
8532 +}
8533 +
8534 +static const struct file_operations mptcp_pm_seq_fops = {
8535 +       .owner = THIS_MODULE,
8536 +       .open = mptcp_pm_seq_open,
8537 +       .read = seq_read,
8538 +       .llseek = seq_lseek,
8539 +       .release = single_release_net,
8540 +};
8541 +
8542 +static int mptcp_pm_init_net(struct net *net)
8543 +{
8544 +       if (!proc_create("mptcp", S_IRUGO, net->proc_net, &mptcp_pm_seq_fops))
8545 +               return -ENOMEM;
8546 +
8547 +       return 0;
8548 +}
8549 +
8550 +static void mptcp_pm_exit_net(struct net *net)
8551 +{
8552 +       remove_proc_entry("mptcp", net->proc_net);
8553 +}
8554 +
8555 +static struct pernet_operations mptcp_pm_proc_ops = {
8556 +       .init = mptcp_pm_init_net,
8557 +       .exit = mptcp_pm_exit_net,
8558 +};
8559 +
8560 +/* General initialization of mptcp */
8561 +void __init mptcp_init(void)
8562 +{
8563 +       int i;
8564 +       struct ctl_table_header *mptcp_sysctl;
8565 +
8566 +       mptcp_sock_cache = kmem_cache_create("mptcp_sock",
8567 +                                            sizeof(struct mptcp_tcp_sock),
8568 +                                            0, SLAB_HWCACHE_ALIGN,
8569 +                                            NULL);
8570 +       if (!mptcp_sock_cache)
8571 +               goto mptcp_sock_cache_failed;
8572 +
8573 +       mptcp_cb_cache = kmem_cache_create("mptcp_cb", sizeof(struct mptcp_cb),
8574 +                                          0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
8575 +                                          NULL);
8576 +       if (!mptcp_cb_cache)
8577 +               goto mptcp_cb_cache_failed;
8578 +
8579 +       mptcp_tw_cache = kmem_cache_create("mptcp_tw", sizeof(struct mptcp_tw),
8580 +                                          0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
8581 +                                          NULL);
8582 +       if (!mptcp_tw_cache)
8583 +               goto mptcp_tw_cache_failed;
8584 +
8585 +       get_random_bytes(mptcp_secret, sizeof(mptcp_secret));
8586 +
8587 +       mptcp_wq = alloc_workqueue("mptcp_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
8588 +       if (!mptcp_wq)
8589 +               goto alloc_workqueue_failed;
8590 +
8591 +       for (i = 0; i < MPTCP_HASH_SIZE; i++) {
8592 +               INIT_HLIST_NULLS_HEAD(&tk_hashtable[i], i);
8593 +               INIT_LIST_HEAD(&mptcp_reqsk_htb[i]);
8594 +               INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_tk_htb[i], i);
8595 +       }
8596 +
8597 +       spin_lock_init(&mptcp_reqsk_hlock);
8598 +       spin_lock_init(&mptcp_tk_hashlock);
8599 +
8600 +       if (register_pernet_subsys(&mptcp_pm_proc_ops))
8601 +               goto pernet_failed;
8602 +
8603 +#if IS_ENABLED(CONFIG_IPV6)
8604 +       if (mptcp_pm_v6_init())
8605 +               goto mptcp_pm_v6_failed;
8606 +#endif
8607 +       if (mptcp_pm_v4_init())
8608 +               goto mptcp_pm_v4_failed;
8609 +
8610 +       mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", mptcp_table);
8611 +       if (!mptcp_sysctl)
8612 +               goto register_sysctl_failed;
8613 +
8614 +       if (mptcp_register_path_manager(&mptcp_pm_default))
8615 +               goto register_pm_failed;
8616 +
8617 +       pr_info("MPTCP: Stable release v0.89.0-rc");
8618 +
8619 +       mptcp_init_failed = false;
8620 +
8621 +       return;
8622 +
8623 +register_pm_failed:
8624 +       unregister_net_sysctl_table(mptcp_sysctl);
8625 +register_sysctl_failed:
8626 +       mptcp_pm_v4_undo();
8627 +mptcp_pm_v4_failed:
8628 +#if IS_ENABLED(CONFIG_IPV6)
8629 +       mptcp_pm_v6_undo();
8630 +mptcp_pm_v6_failed:
8631 +#endif
8632 +       unregister_pernet_subsys(&mptcp_pm_proc_ops);
8633 +pernet_failed:
8634 +       destroy_workqueue(mptcp_wq);
8635 +alloc_workqueue_failed:
8636 +       kmem_cache_destroy(mptcp_tw_cache);
8637 +mptcp_tw_cache_failed:
8638 +       kmem_cache_destroy(mptcp_cb_cache);
8639 +mptcp_cb_cache_failed:
8640 +       kmem_cache_destroy(mptcp_sock_cache);
8641 +mptcp_sock_cache_failed:
8642 +       mptcp_init_failed = true;
8643 +}
8644 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_fullmesh.c linux-3.14.45/net/mptcp/mptcp_fullmesh.c
8645 --- linux-3.14.45.orig/net/mptcp/mptcp_fullmesh.c       1970-01-01 01:00:00.000000000 +0100
8646 +++ linux-3.14.45/net/mptcp/mptcp_fullmesh.c    2015-06-24 14:15:48.891862483 +0200
8647 @@ -0,0 +1,1313 @@
8648 +#include <linux/module.h>
8649 +
8650 +#include <net/mptcp.h>
8651 +#include <net/mptcp_v4.h>
8652 +
8653 +#if IS_ENABLED(CONFIG_IPV6)
8654 +#include <net/mptcp_v6.h>
8655 +#include <net/addrconf.h>
8656 +#endif
8657 +
8658 +enum {
8659 +       MPTCP_EVENT_ADD = 1,
8660 +       MPTCP_EVENT_DEL,
8661 +       MPTCP_EVENT_MOD,
8662 +};
8663 +
8664 +struct mptcp_loc_addr {
8665 +       struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR];
8666 +       u8 loc4_bits;
8667 +       u8 next_v4_index;
8668 +
8669 +       struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR];
8670 +       u8 loc6_bits;
8671 +       u8 next_v6_index;
8672 +};
8673 +
8674 +struct mptcp_addr_event {
8675 +       struct list_head list;
8676 +       unsigned short  family;
8677 +       u8      code:7,
8678 +               low_prio:1;
8679 +       union inet_addr addr;
8680 +};
8681 +
8682 +struct fullmesh_priv {
8683 +       /* Worker struct for subflow establishment */
8684 +       struct work_struct subflow_work;
8685 +       /* Delayed worker, when the routing-tables are not yet ready. */
8686 +       struct delayed_work subflow_retry_work;
8687 +
8688 +       struct mptcp_cb *mpcb;
8689 +
8690 +       u16 remove_addrs; /* Addresses to remove */
8691 +       u8 announced_addrs_v4; /* IPv4 Addresses we did announce */
8692 +       u8 announced_addrs_v6; /* IPv4 Addresses we did announce */
8693 +
8694 +       u8      add_addr; /* Are we sending an add_addr? */
8695 +};
8696 +
8697 +struct mptcp_fm_ns {
8698 +       struct mptcp_loc_addr __rcu *local;
8699 +       spinlock_t local_lock; /* Protecting the above pointer */
8700 +       struct list_head events;
8701 +       struct delayed_work address_worker;
8702 +
8703 +       struct net *net;
8704 +};
8705 +
8706 +static struct mptcp_pm_ops full_mesh __read_mostly;
8707 +
8708 +static struct mptcp_fm_ns *fm_get_ns(struct net *net)
8709 +{
8710 +       return (struct mptcp_fm_ns *)net->mptcp.path_managers[MPTCP_PM_FULLMESH];
8711 +}
8712 +
8713 +static void full_mesh_create_subflows(struct sock *meta_sk);
8714 +
8715 +static void retry_subflow_worker(struct work_struct *work)
8716 +{
8717 +       struct delayed_work *delayed_work = container_of(work,
8718 +                                                        struct delayed_work,
8719 +                                                        work);
8720 +       struct fullmesh_priv *pm_priv = container_of(delayed_work,
8721 +                                                    struct fullmesh_priv,
8722 +                                                    subflow_retry_work);
8723 +       struct mptcp_cb *mpcb = pm_priv->mpcb;
8724 +       struct sock *meta_sk = mpcb->meta_sk;
8725 +       struct mptcp_loc_addr *mptcp_local;
8726 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
8727 +       int iter = 0, i;
8728 +
8729 +       /* We need a local (stable) copy of the address-list. Really, it is not
8730 +        * such a big deal, if the address-list is not 100% up-to-date.
8731 +        */
8732 +       rcu_read_lock_bh();
8733 +       mptcp_local = rcu_dereference_bh(fm_ns->local);
8734 +       mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
8735 +       rcu_read_unlock_bh();
8736 +
8737 +       if (!mptcp_local)
8738 +               return;
8739 +
8740 +next_subflow:
8741 +       if (iter) {
8742 +               release_sock(meta_sk);
8743 +               mutex_unlock(&mpcb->mpcb_mutex);
8744 +
8745 +               yield();
8746 +       }
8747 +       mutex_lock(&mpcb->mpcb_mutex);
8748 +       lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
8749 +
8750 +       iter++;
8751 +
8752 +       if (sock_flag(meta_sk, SOCK_DEAD))
8753 +               goto exit;
8754 +
8755 +       mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
8756 +               struct mptcp_rem4 *rem = &mpcb->remaddr4[i];
8757 +               /* Do we need to retry establishing a subflow ? */
8758 +               if (rem->retry_bitfield) {
8759 +                       int i = mptcp_find_free_index(~rem->retry_bitfield);
8760 +
8761 +                       rem->bitfield |= (1 << i);
8762 +                       rem->retry_bitfield &= ~(1 << i);
8763 +
8764 +                       mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], rem);
8765 +                       goto next_subflow;
8766 +               }
8767 +       }
8768 +
8769 +#if IS_ENABLED(CONFIG_IPV6)
8770 +       mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
8771 +               struct mptcp_rem6 *rem = &mpcb->remaddr6[i];
8772 +
8773 +               /* Do we need to retry establishing a subflow ? */
8774 +               if (rem->retry_bitfield) {
8775 +                       int i = mptcp_find_free_index(~rem->retry_bitfield);
8776 +
8777 +                       rem->bitfield |= (1 << i);
8778 +                       rem->retry_bitfield &= ~(1 << i);
8779 +
8780 +                       mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], rem);
8781 +                       goto next_subflow;
8782 +               }
8783 +       }
8784 +#endif
8785 +
8786 +exit:
8787 +       kfree(mptcp_local);
8788 +       release_sock(meta_sk);
8789 +       mutex_unlock(&mpcb->mpcb_mutex);
8790 +       sock_put(meta_sk);
8791 +}
8792 +
8793 +/**
8794 + * Create all new subflows, by doing calls to mptcp_initX_subsockets
8795 + *
8796 + * This function uses a goto next_subflow, to allow releasing the lock between
8797 + * new subflows and giving other processes a chance to do some work on the
8798 + * socket and potentially finishing the communication.
8799 + **/
8800 +static void create_subflow_worker(struct work_struct *work)
8801 +{
8802 +       struct fullmesh_priv *pm_priv = container_of(work,
8803 +                                                    struct fullmesh_priv,
8804 +                                                    subflow_work);
8805 +       struct mptcp_cb *mpcb = pm_priv->mpcb;
8806 +       struct sock *meta_sk = mpcb->meta_sk;
8807 +       struct mptcp_loc_addr *mptcp_local;
8808 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
8809 +       int iter = 0, retry = 0;
8810 +       int i;
8811 +
8812 +       /* We need a local (stable) copy of the address-list. Really, it is not
8813 +        * such a big deal, if the address-list is not 100% up-to-date.
8814 +        */
8815 +       rcu_read_lock_bh();
8816 +       mptcp_local = rcu_dereference_bh(fm_ns->local);
8817 +       mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
8818 +       rcu_read_unlock_bh();
8819 +
8820 +       if (!mptcp_local)
8821 +               return;
8822 +
8823 +next_subflow:
8824 +       if (iter) {
8825 +               release_sock(meta_sk);
8826 +               mutex_unlock(&mpcb->mpcb_mutex);
8827 +
8828 +               yield();
8829 +       }
8830 +       mutex_lock(&mpcb->mpcb_mutex);
8831 +       lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
8832 +
8833 +       iter++;
8834 +
8835 +       if (sock_flag(meta_sk, SOCK_DEAD))
8836 +               goto exit;
8837 +
8838 +       if (mpcb->master_sk &&
8839 +           !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
8840 +               goto exit;
8841 +
8842 +       mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
8843 +               struct mptcp_rem4 *rem;
8844 +               u8 remaining_bits;
8845 +
8846 +               rem = &mpcb->remaddr4[i];
8847 +               remaining_bits = ~(rem->bitfield) & mptcp_local->loc4_bits;
8848 +
8849 +               /* Are there still combinations to handle? */
8850 +               if (remaining_bits) {
8851 +                       int i = mptcp_find_free_index(~remaining_bits);
8852 +
8853 +                       rem->bitfield |= (1 << i);
8854 +
8855 +                       /* If a route is not yet available then retry once */
8856 +                       if (mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i],
8857 +                                                  rem) == -ENETUNREACH)
8858 +                               retry = rem->retry_bitfield |= (1 << i);
8859 +                       goto next_subflow;
8860 +               }
8861 +       }
8862 +
8863 +#if IS_ENABLED(CONFIG_IPV6)
8864 +       mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
8865 +               struct mptcp_rem6 *rem;
8866 +               u8 remaining_bits;
8867 +
8868 +               rem = &mpcb->remaddr6[i];
8869 +               remaining_bits = ~(rem->bitfield) & mptcp_local->loc6_bits;
8870 +
8871 +               /* Are there still combinations to handle? */
8872 +               if (remaining_bits) {
8873 +                       int i = mptcp_find_free_index(~remaining_bits);
8874 +
8875 +                       rem->bitfield |= (1 << i);
8876 +
8877 +                       /* If a route is not yet available then retry once */
8878 +                       if (mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i],
8879 +                                                  rem) == -ENETUNREACH)
8880 +                               retry = rem->retry_bitfield |= (1 << i);
8881 +                       goto next_subflow;
8882 +               }
8883 +       }
8884 +#endif
8885 +
8886 +       if (retry && !delayed_work_pending(&pm_priv->subflow_retry_work)) {
8887 +               sock_hold(meta_sk);
8888 +               queue_delayed_work(mptcp_wq, &pm_priv->subflow_retry_work,
8889 +                                  msecs_to_jiffies(MPTCP_SUBFLOW_RETRY_DELAY));
8890 +       }
8891 +
8892 +exit:
8893 +       kfree(mptcp_local);
8894 +       release_sock(meta_sk);
8895 +       mutex_unlock(&mpcb->mpcb_mutex);
8896 +       sock_put(meta_sk);
8897 +}
8898 +
8899 +static void update_remove_addrs(u8 addr_id, struct sock *meta_sk,
8900 +                               struct mptcp_loc_addr *mptcp_local)
8901 +{
8902 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
8903 +       struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
8904 +       struct sock *sk;
8905 +       int i;
8906 +
8907 +       fmp->remove_addrs |= (1 << addr_id);
8908 +       /* v4 goes from 0 to MPTCP_MAX_ADDR, v6 beyond */
8909 +       if (addr_id < MPTCP_MAX_ADDR) {
8910 +               fmp->announced_addrs_v4 &= ~(1 << addr_id);
8911 +
8912 +               mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
8913 +                       mpcb->remaddr4[i].bitfield &= mptcp_local->loc4_bits;
8914 +                       mpcb->remaddr4[i].retry_bitfield &= mptcp_local->loc4_bits;
8915 +               }
8916 +       } else {
8917 +               fmp->announced_addrs_v6 &= ~(1 << (addr_id - MPTCP_MAX_ADDR));
8918 +
8919 +               mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
8920 +                       mpcb->remaddr6[i].bitfield &= mptcp_local->loc6_bits;
8921 +                       mpcb->remaddr6[i].retry_bitfield &= mptcp_local->loc6_bits;
8922 +               }
8923 +       }
8924 +
8925 +       sk = mptcp_select_ack_sock(meta_sk, 0);
8926 +       if (sk)
8927 +               tcp_send_ack(sk);
8928 +}
8929 +
8930 +static int mptcp_find_address(struct mptcp_loc_addr *mptcp_local,
8931 +                             sa_family_t family, union inet_addr *addr)
8932 +{
8933 +       int i;
8934 +       u8 loc_bits;
8935 +       bool found = false;
8936 +
8937 +       if (family == AF_INET)
8938 +               loc_bits = mptcp_local->loc4_bits;
8939 +       else
8940 +               loc_bits = mptcp_local->loc6_bits;
8941 +
8942 +       mptcp_for_each_bit_set(loc_bits, i) {
8943 +               if (family == AF_INET &&
8944 +                   mptcp_local->locaddr4[i].addr.s_addr == addr->in.s_addr) {
8945 +                       found = true;
8946 +                       break;
8947 +               }
8948 +               if (family == AF_INET6 &&
8949 +                   ipv6_addr_equal(&mptcp_local->locaddr6[i].addr,
8950 +                                   &addr->in6)) {
8951 +                       found = true;
8952 +                       break;
8953 +               }
8954 +       }
8955 +
8956 +       if (!found)
8957 +               return -1;
8958 +
8959 +       return i;
8960 +}
8961 +
8962 +static void mptcp_address_worker(struct work_struct *work)
8963 +{
8964 +       struct delayed_work *delayed_work = container_of(work,
8965 +                                                        struct delayed_work,
8966 +                                                        work);
8967 +       struct mptcp_fm_ns *fm_ns = container_of(delayed_work,
8968 +                                                struct mptcp_fm_ns,
8969 +                                                address_worker);
8970 +       struct net *net = fm_ns->net;
8971 +       struct mptcp_addr_event *event = NULL;
8972 +       struct mptcp_loc_addr *mptcp_local, *old;
8973 +       int i, id = -1; /* id is used in the socket-code on a delete-event */
8974 +       bool success; /* Used to indicate if we succeeded handling the event */
8975 +
8976 +next_event:
8977 +       success = false;
8978 +       kfree(event);
8979 +
8980 +       /* First, let's dequeue an event from our event-list */
8981 +       rcu_read_lock_bh();
8982 +       spin_lock(&fm_ns->local_lock);
8983 +
8984 +       event = list_first_entry_or_null(&fm_ns->events,
8985 +                                        struct mptcp_addr_event, list);
8986 +       if (!event) {
8987 +               spin_unlock(&fm_ns->local_lock);
8988 +               rcu_read_unlock_bh();
8989 +               return;
8990 +       }
8991 +
8992 +       list_del(&event->list);
8993 +
8994 +       mptcp_local = rcu_dereference_bh(fm_ns->local);
8995 +
8996 +       if (event->code == MPTCP_EVENT_DEL) {
8997 +               id = mptcp_find_address(mptcp_local, event->family, &event->addr);
8998 +
8999 +               /* Not in the list - so we don't care */
9000 +               if (id < 0)
9001 +                       goto duno;
9002 +
9003 +               old = mptcp_local;
9004 +               mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
9005 +                                     GFP_ATOMIC);
9006 +               if (!mptcp_local)
9007 +                       goto duno;
9008 +
9009 +               if (event->family == AF_INET)
9010 +                       mptcp_local->loc4_bits &= ~(1 << id);
9011 +               else
9012 +                       mptcp_local->loc6_bits &= ~(1 << id);
9013 +
9014 +               rcu_assign_pointer(fm_ns->local, mptcp_local);
9015 +               kfree(old);
9016 +       } else {
9017 +               int i = mptcp_find_address(mptcp_local, event->family, &event->addr);
9018 +               int j = i;
9019 +
9020 +               if (j < 0) {
9021 +                       /* Not in the list, so we have to find an empty slot */
9022 +                       if (event->family == AF_INET)
9023 +                               i = __mptcp_find_free_index(mptcp_local->loc4_bits, -1,
9024 +                                                           mptcp_local->next_v4_index);
9025 +                       if (event->family == AF_INET6)
9026 +                               i = __mptcp_find_free_index(mptcp_local->loc6_bits, -1,
9027 +                                                           mptcp_local->next_v6_index);
9028 +
9029 +                       if (i < 0) {
9030 +                               mptcp_debug("%s no more space\n", __func__);
9031 +                               goto duno;
9032 +                       }
9033 +
9034 +                       /* It might have been a MOD-event. */
9035 +                       event->code = MPTCP_EVENT_ADD;
9036 +               } else {
9037 +                       /* Let's check if anything changes */
9038 +                       if (event->family == AF_INET &&
9039 +                           event->low_prio == mptcp_local->locaddr4[i].low_prio)
9040 +                               goto duno;
9041 +
9042 +                       if (event->family == AF_INET6 &&
9043 +                           event->low_prio == mptcp_local->locaddr6[i].low_prio)
9044 +                               goto duno;
9045 +               }
9046 +
9047 +               old = mptcp_local;
9048 +               mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
9049 +                                     GFP_ATOMIC);
9050 +               if (!mptcp_local)
9051 +                       goto duno;
9052 +
9053 +               if (event->family == AF_INET) {
9054 +                       mptcp_local->locaddr4[i].addr.s_addr = event->addr.in.s_addr;
9055 +                       mptcp_local->locaddr4[i].loc4_id = i + 1;
9056 +                       mptcp_local->locaddr4[i].low_prio = event->low_prio;
9057 +               } else {
9058 +                       mptcp_local->locaddr6[i].addr = event->addr.in6;
9059 +                       mptcp_local->locaddr6[i].loc6_id = i + MPTCP_MAX_ADDR;
9060 +                       mptcp_local->locaddr6[i].low_prio = event->low_prio;
9061 +               }
9062 +
9063 +               if (j < 0) {
9064 +                       if (event->family == AF_INET) {
9065 +                               mptcp_local->loc4_bits |= (1 << i);
9066 +                               mptcp_local->next_v4_index = i + 1;
9067 +                       } else {
9068 +                               mptcp_local->loc6_bits |= (1 << i);
9069 +                               mptcp_local->next_v6_index = i + 1;
9070 +                       }
9071 +               }
9072 +
9073 +               rcu_assign_pointer(fm_ns->local, mptcp_local);
9074 +               kfree(old);
9075 +       }
9076 +       success = true;
9077 +
9078 +duno:
9079 +       spin_unlock(&fm_ns->local_lock);
9080 +       rcu_read_unlock_bh();
9081 +
9082 +       if (!success)
9083 +               goto next_event;
9084 +
9085 +       /* Now we iterate over the MPTCP-sockets and apply the event. */
9086 +       for (i = 0; i < MPTCP_HASH_SIZE; i++) {
9087 +               struct hlist_nulls_node *node;
9088 +               struct tcp_sock *meta_tp;
9089 +
9090 +               rcu_read_lock_bh();
9091 +               hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[i],
9092 +                                              tk_table) {
9093 +                       struct mptcp_cb *mpcb = meta_tp->mpcb;
9094 +                       struct sock *meta_sk = (struct sock *)meta_tp, *sk;
9095 +                       struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
9096 +
9097 +                       if (sock_net(meta_sk) != net)
9098 +                               continue;
9099 +
9100 +                       if (unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
9101 +                               continue;
9102 +
9103 +                       bh_lock_sock(meta_sk);
9104 +
9105 +                       if (!meta_tp->mpc || !is_meta_sk(meta_sk) ||
9106 +                           mpcb->infinite_mapping_snd ||
9107 +                           mpcb->infinite_mapping_rcv ||
9108 +                           mpcb->send_infinite_mapping)
9109 +                               goto next;
9110 +
9111 +                       /* May be that the pm has changed in-between */
9112 +                       if (mpcb->pm_ops != &full_mesh)
9113 +                               goto next;
9114 +
9115 +                       if (sock_owned_by_user(meta_sk)) {
9116 +                               if (!test_and_set_bit(MPTCP_PATH_MANAGER,
9117 +                                                     &meta_tp->tsq_flags))
9118 +                                       sock_hold(meta_sk);
9119 +
9120 +                               goto next;
9121 +                       }
9122 +
9123 +                       if (event->code == MPTCP_EVENT_ADD) {
9124 +                               if (event->family == AF_INET)
9125 +                                       fmp->add_addr++;
9126 +#if IS_ENABLED(CONFIG_IPV6)
9127 +                               if (event->family == AF_INET6)
9128 +                                       fmp->add_addr++;
9129 +#endif
9130 +
9131 +                               sk = mptcp_select_ack_sock(meta_sk, 0);
9132 +                               if (sk)
9133 +                                       tcp_send_ack(sk);
9134 +
9135 +                               full_mesh_create_subflows(meta_sk);
9136 +                       }
9137 +
9138 +                       if (event->code == MPTCP_EVENT_DEL) {
9139 +                               struct sock *sk, *tmpsk;
9140 +                               struct mptcp_loc_addr *mptcp_local;
9141 +                               bool found = false;
9142 +
9143 +                               mptcp_local = rcu_dereference_bh(fm_ns->local);
9144 +
9145 +                               /* Look for the socket and remove him */
9146 +                               mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
9147 +                                       if ((event->family == AF_INET6 &&
9148 +                                            (sk->sk_family == AF_INET ||
9149 +                                             mptcp_v6_is_v4_mapped(sk))) ||
9150 +                                           (event->family == AF_INET &&
9151 +                                            (sk->sk_family == AF_INET6 &&
9152 +                                             !mptcp_v6_is_v4_mapped(sk))))
9153 +                                               continue;
9154 +
9155 +                                       if (event->family == AF_INET &&
9156 +                                           (sk->sk_family == AF_INET ||
9157 +                                            mptcp_v6_is_v4_mapped(sk)) &&
9158 +                                            inet_sk(sk)->inet_saddr != event->addr.in.s_addr)
9159 +                                               continue;
9160 +
9161 +                                       if (event->family == AF_INET6 &&
9162 +                                           sk->sk_family == AF_INET6 &&
9163 +                                           !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6))
9164 +                                               continue;
9165 +
9166 +                                       /* Reinject, so that pf = 1 and so we
9167 +                                        * won't select this one as the
9168 +                                        * ack-sock.
9169 +                                        */
9170 +                                       mptcp_reinject_data(sk, 0);
9171 +
9172 +                                       /* A master is special, it has
9173 +                                        * address-id 0
9174 +                                        */
9175 +                                       if (!tcp_sk(sk)->mptcp->loc_id)
9176 +                                               update_remove_addrs(0, meta_sk, mptcp_local);
9177 +                                       else if (tcp_sk(sk)->mptcp->loc_id != id)
9178 +                                               update_remove_addrs(tcp_sk(sk)->mptcp->loc_id, meta_sk, mptcp_local);
9179 +
9180 +                                       mptcp_sub_force_close(sk);
9181 +                                       found = true;
9182 +                               }
9183 +
9184 +                               if (!found)
9185 +                                       goto next;
9186 +
9187 +                               /* The id may have been given by the event,
9188 +                                * matching on a local address. And it may not
9189 +                                * have matched on one of the above sockets,
9190 +                                * because the client never created a subflow.
9191 +                                * So, we have to finally remove it here.
9192 +                                */
9193 +                               if (id > 0)
9194 +                                       update_remove_addrs(id, meta_sk, mptcp_local);
9195 +                       }
9196 +
9197 +                       if (event->code == MPTCP_EVENT_MOD) {
9198 +                               struct sock *sk;
9199 +
9200 +                               mptcp_for_each_sk(mpcb, sk) {
9201 +                                       struct tcp_sock *tp = tcp_sk(sk);
9202 +                                       if (event->family == AF_INET &&
9203 +                                           (sk->sk_family == AF_INET ||
9204 +                                            mptcp_v6_is_v4_mapped(sk)) &&
9205 +                                            inet_sk(sk)->inet_saddr == event->addr.in.s_addr) {
9206 +                                               if (event->low_prio != tp->mptcp->low_prio) {
9207 +                                                       tp->mptcp->send_mp_prio = 1;
9208 +                                                       tp->mptcp->low_prio = event->low_prio;
9209 +
9210 +                                                       tcp_send_ack(sk);
9211 +                                               }
9212 +                                       }
9213 +
9214 +                                       if (event->family == AF_INET6 &&
9215 +                                           sk->sk_family == AF_INET6 &&
9216 +                                           !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) {
9217 +                                               if (event->low_prio != tp->mptcp->low_prio) {
9218 +                                                       tp->mptcp->send_mp_prio = 1;
9219 +                                                       tp->mptcp->low_prio = event->low_prio;
9220 +
9221 +                                                       tcp_send_ack(sk);
9222 +                                               }
9223 +                                       }
9224 +                               }
9225 +                       }
9226 +next:
9227 +                       bh_unlock_sock(meta_sk);
9228 +                       sock_put(meta_sk);
9229 +               }
9230 +               rcu_read_unlock_bh();
9231 +       }
9232 +       goto next_event;
9233 +}
9234 +
9235 +static struct mptcp_addr_event *lookup_similar_event(struct net *net,
9236 +                                                    struct mptcp_addr_event *event)
9237 +{
9238 +       struct mptcp_addr_event *eventq;
9239 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9240 +
9241 +       list_for_each_entry(eventq, &fm_ns->events, list) {
9242 +               if (eventq->family != event->family)
9243 +                       continue;
9244 +               if (event->family == AF_INET) {
9245 +                       if (eventq->addr.in.s_addr == event->addr.in.s_addr)
9246 +                               return eventq;
9247 +               } else {
9248 +                       if (ipv6_addr_equal(&eventq->addr.in6, &event->addr.in6))
9249 +                               return eventq;
9250 +               }
9251 +       }
9252 +       return NULL;
9253 +}
9254 +
9255 +/* We already hold the net-namespace MPTCP-lock */
9256 +static void add_pm_event(struct net *net, struct mptcp_addr_event *event)
9257 +{
9258 +       struct mptcp_addr_event *eventq = lookup_similar_event(net, event);
9259 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9260 +
9261 +       if (eventq) {
9262 +               switch (event->code) {
9263 +               case MPTCP_EVENT_DEL:
9264 +                       list_del(&eventq->list);
9265 +                       kfree(eventq);
9266 +                       break;
9267 +               case MPTCP_EVENT_ADD:
9268 +                       eventq->low_prio = event->low_prio;
9269 +                       eventq->code = MPTCP_EVENT_ADD;
9270 +                       return;
9271 +               case MPTCP_EVENT_MOD:
9272 +                       eventq->low_prio = event->low_prio;
9273 +                       return;
9274 +               }
9275 +       }
9276 +
9277 +       /* OK, we have to add the new address to the wait queue */
9278 +       eventq = kmemdup(event, sizeof(struct mptcp_addr_event), GFP_ATOMIC);
9279 +       if (!eventq)
9280 +               return;
9281 +
9282 +       list_add_tail(&eventq->list, &fm_ns->events);
9283 +
9284 +       /* Create work-queue */
9285 +       if (!delayed_work_pending(&fm_ns->address_worker))
9286 +               queue_delayed_work(mptcp_wq, &fm_ns->address_worker,
9287 +                                  msecs_to_jiffies(500));
9288 +}
9289 +
9290 +static void addr4_event_handler(struct in_ifaddr *ifa, unsigned long event,
9291 +                               struct net *net)
9292 +{
9293 +       struct net_device *netdev = ifa->ifa_dev->dev;
9294 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9295 +       struct mptcp_addr_event mpevent;
9296 +
9297 +       if (ifa->ifa_scope > RT_SCOPE_LINK ||
9298 +           ipv4_is_loopback(ifa->ifa_local))
9299 +               return;
9300 +
9301 +       spin_lock_bh(&fm_ns->local_lock);
9302 +
9303 +       mpevent.family = AF_INET;
9304 +       mpevent.addr.in.s_addr = ifa->ifa_local;
9305 +       mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
9306 +
9307 +       if (event == NETDEV_DOWN || !netif_running(netdev) ||
9308 +           (netdev->flags & IFF_NOMULTIPATH))
9309 +               mpevent.code = MPTCP_EVENT_DEL;
9310 +       else if (event == NETDEV_UP)
9311 +               mpevent.code = MPTCP_EVENT_ADD;
9312 +       else if (event == NETDEV_CHANGE)
9313 +               mpevent.code = MPTCP_EVENT_MOD;
9314 +
9315 +       add_pm_event(net, &mpevent);
9316 +
9317 +       spin_unlock_bh(&fm_ns->local_lock);
9318 +       return;
9319 +}
9320 +
9321 +/* React on IPv4-addr add/rem-events */
9322 +static int mptcp_pm_inetaddr_event(struct notifier_block *this,
9323 +                                  unsigned long event, void *ptr)
9324 +{
9325 +       struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
9326 +       struct net *net = dev_net(ifa->ifa_dev->dev);
9327 +
9328 +       addr4_event_handler(ifa, event, net);
9329 +
9330 +       return NOTIFY_DONE;
9331 +}
9332 +
9333 +static struct notifier_block mptcp_pm_inetaddr_notifier = {
9334 +               .notifier_call = mptcp_pm_inetaddr_event,
9335 +};
9336 +
9337 +#if IS_ENABLED(CONFIG_IPV6)
9338 +
9339 +/* IPV6-related address/interface watchers */
9340 +struct mptcp_dad_data {
9341 +       struct timer_list timer;
9342 +       struct inet6_ifaddr *ifa;
9343 +};
9344 +
9345 +static void dad_callback(unsigned long arg);
9346 +static int inet6_addr_event(struct notifier_block *this,
9347 +                                    unsigned long event, void *ptr);
9348 +
9349 +static int ipv6_is_in_dad_state(struct inet6_ifaddr *ifa)
9350 +{
9351 +       return ((ifa->flags & IFA_F_TENTATIVE) &&
9352 +               ifa->state == INET6_IFADDR_STATE_DAD);
9353 +}
9354 +
9355 +static void dad_init_timer(struct mptcp_dad_data *data,
9356 +                                struct inet6_ifaddr *ifa)
9357 +{
9358 +       data->ifa = ifa;
9359 +       data->timer.data = (unsigned long)data;
9360 +       data->timer.function = dad_callback;
9361 +       if (ifa->idev->cnf.rtr_solicit_delay)
9362 +               data->timer.expires = jiffies + ifa->idev->cnf.rtr_solicit_delay;
9363 +       else
9364 +               data->timer.expires = jiffies + (HZ/10);
9365 +}
9366 +
9367 +static void dad_callback(unsigned long arg)
9368 +{
9369 +       struct mptcp_dad_data *data = (struct mptcp_dad_data *)arg;
9370 +
9371 +       if (ipv6_is_in_dad_state(data->ifa)) {
9372 +               dad_init_timer(data, data->ifa);
9373 +               add_timer(&data->timer);
9374 +       } else {
9375 +               inet6_addr_event(NULL, NETDEV_UP, data->ifa);
9376 +               in6_ifa_put(data->ifa);
9377 +               kfree(data);
9378 +       }
9379 +}
9380 +
9381 +static inline void dad_setup_timer(struct inet6_ifaddr *ifa)
9382 +{
9383 +       struct mptcp_dad_data *data;
9384 +
9385 +       data = kmalloc(sizeof(*data), GFP_ATOMIC);
9386 +
9387 +       if (!data)
9388 +               return;
9389 +
9390 +       init_timer(&data->timer);
9391 +       dad_init_timer(data, ifa);
9392 +       add_timer(&data->timer);
9393 +       in6_ifa_hold(ifa);
9394 +}
9395 +
9396 +static void addr6_event_handler(struct inet6_ifaddr *ifa, unsigned long event,
9397 +                               struct net *net)
9398 +{
9399 +       struct net_device *netdev = ifa->idev->dev;
9400 +       int addr_type = ipv6_addr_type(&ifa->addr);
9401 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9402 +       struct mptcp_addr_event mpevent;
9403 +
9404 +       if (ifa->scope > RT_SCOPE_LINK ||
9405 +           addr_type == IPV6_ADDR_ANY ||
9406 +           (addr_type & IPV6_ADDR_LOOPBACK) ||
9407 +           (addr_type & IPV6_ADDR_LINKLOCAL))
9408 +               return;
9409 +
9410 +       spin_lock_bh(&fm_ns->local_lock);
9411 +
9412 +       mpevent.family = AF_INET6;
9413 +       mpevent.addr.in6 = ifa->addr;
9414 +       mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
9415 +
9416 +       if (event == NETDEV_DOWN ||!netif_running(netdev) ||
9417 +           (netdev->flags & IFF_NOMULTIPATH))
9418 +               mpevent.code = MPTCP_EVENT_DEL;
9419 +       else if (event == NETDEV_UP)
9420 +               mpevent.code = MPTCP_EVENT_ADD;
9421 +       else if (event == NETDEV_CHANGE)
9422 +               mpevent.code = MPTCP_EVENT_MOD;
9423 +
9424 +       add_pm_event(net, &mpevent);
9425 +
9426 +       spin_unlock_bh(&fm_ns->local_lock);
9427 +       return;
9428 +}
9429 +
9430 +/* React on IPv6-addr add/rem-events */
9431 +static int inet6_addr_event(struct notifier_block *this, unsigned long event,
9432 +                           void *ptr)
9433 +{
9434 +       struct inet6_ifaddr *ifa6 = (struct inet6_ifaddr *)ptr;
9435 +       struct net *net = dev_net(ifa6->idev->dev);
9436 +
9437 +       if (ipv6_is_in_dad_state(ifa6))
9438 +               dad_setup_timer(ifa6);
9439 +       else
9440 +               addr6_event_handler(ifa6, event, net);
9441 +
9442 +       return NOTIFY_DONE;
9443 +}
9444 +
9445 +static struct notifier_block inet6_addr_notifier = {
9446 +               .notifier_call = inet6_addr_event,
9447 +};
9448 +
9449 +#endif
9450 +
9451 +/* React on ifup/down-events */
9452 +static int netdev_event(struct notifier_block *this, unsigned long event,
9453 +                       void *ptr)
9454 +{
9455 +       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
9456 +       struct in_device *in_dev;
9457 +#if IS_ENABLED(CONFIG_IPV6)
9458 +       struct inet6_dev *in6_dev;
9459 +#endif
9460 +
9461 +       if (!(event == NETDEV_UP || event == NETDEV_DOWN ||
9462 +             event == NETDEV_CHANGE))
9463 +               return NOTIFY_DONE;
9464 +
9465 +       rcu_read_lock();
9466 +       in_dev = __in_dev_get_rtnl(dev);
9467 +
9468 +       if (in_dev) {
9469 +               for_ifa(in_dev) {
9470 +                       mptcp_pm_inetaddr_event(NULL, event, ifa);
9471 +               } endfor_ifa(in_dev);
9472 +       }
9473 +
9474 +#if IS_ENABLED(CONFIG_IPV6)
9475 +       in6_dev = __in6_dev_get(dev);
9476 +
9477 +       if (in6_dev) {
9478 +               struct inet6_ifaddr *ifa6;
9479 +               list_for_each_entry(ifa6, &in6_dev->addr_list, if_list)
9480 +                       inet6_addr_event(NULL, event, ifa6);
9481 +       }
9482 +#endif
9483 +
9484 +       rcu_read_unlock();
9485 +       return NOTIFY_DONE;
9486 +}
9487 +
9488 +static struct notifier_block mptcp_pm_netdev_notifier = {
9489 +               .notifier_call = netdev_event,
9490 +};
9491 +
9492 +static void full_mesh_new_session(struct sock *meta_sk, int index)
9493 +{
9494 +       struct mptcp_loc_addr *mptcp_local;
9495 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
9496 +       struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
9497 +       struct net *net = sock_net(meta_sk);
9498 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9499 +       struct sock *sk;
9500 +       int i;
9501 +
9502 +       if (index == -1) {
9503 +               mptcp_fallback_default(mpcb);
9504 +               return;
9505 +       }
9506 +
9507 +       /* Initialize workqueue-struct */
9508 +       INIT_WORK(&fmp->subflow_work, create_subflow_worker);
9509 +       INIT_DELAYED_WORK(&fmp->subflow_retry_work, retry_subflow_worker);
9510 +       fmp->mpcb = mpcb;
9511 +
9512 +       sk = mptcp_select_ack_sock(meta_sk, 0);
9513 +
9514 +       rcu_read_lock();
9515 +       mptcp_local = rcu_dereference(fm_ns->local);
9516 +
9517 +       /* Look for the address among the local addresses */
9518 +       mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
9519 +               __be32 ifa_address = mptcp_local->locaddr4[i].addr.s_addr;
9520 +
9521 +               /* We do not need to announce the initial subflow's address again */
9522 +               if ((meta_sk->sk_family == AF_INET ||
9523 +                    mptcp_v6_is_v4_mapped(meta_sk)) &&
9524 +                   inet_sk(meta_sk)->inet_saddr == ifa_address)
9525 +                       continue;
9526 +
9527 +               fmp->add_addr++;
9528 +
9529 +               if (sk)
9530 +                       tcp_send_ack(sk);
9531 +       }
9532 +
9533 +#if IS_ENABLED(CONFIG_IPV6)
9534 +       mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
9535 +               struct in6_addr *ifa6 = &mptcp_local->locaddr6[i].addr;
9536 +
9537 +               /* We do not need to announce the initial subflow's address again */
9538 +               if (meta_sk->sk_family == AF_INET6 &&
9539 +                   ipv6_addr_equal(&inet6_sk(meta_sk)->saddr, ifa6))
9540 +                       continue;
9541 +
9542 +               fmp->add_addr++;
9543 +
9544 +               if (sk)
9545 +                       tcp_send_ack(sk);
9546 +       }
9547 +#endif
9548 +
9549 +       rcu_read_unlock();
9550 +
9551 +       if (meta_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(meta_sk))
9552 +               fmp->announced_addrs_v4 |= (1 << index);
9553 +       else
9554 +               fmp->announced_addrs_v6 |= (1 << index);
9555 +}
9556 +
9557 +static void full_mesh_create_subflows(struct sock *meta_sk)
9558 +{
9559 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
9560 +       struct fullmesh_priv *pm_priv = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
9561 +
9562 +       if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
9563 +           mpcb->send_infinite_mapping ||
9564 +           mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
9565 +               return;
9566 +
9567 +       /* The master may not yet be fully established (address added through
9568 +        * mptcp_update_metasocket). Then, we should not attempt to create new
9569 +        * subflows.
9570 +        */
9571 +       if (mpcb->master_sk &&
9572 +           !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
9573 +               return;
9574 +
9575 +       if (!work_pending(&pm_priv->subflow_work)) {
9576 +               sock_hold(meta_sk);
9577 +               queue_work(mptcp_wq, &pm_priv->subflow_work);
9578 +       }
9579 +}
9580 +
9581 +/* Called upon release_sock, if the socket was owned by the user during
9582 + * a path-management event.
9583 + */
9584 +static void full_mesh_release_sock(struct sock *meta_sk)
9585 +{
9586 +       struct mptcp_loc_addr *mptcp_local;
9587 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
9588 +       struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
9589 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
9590 +       struct sock *sk, *tmpsk;
9591 +       int i;
9592 +
9593 +       rcu_read_lock();
9594 +       mptcp_local = rcu_dereference(fm_ns->local);
9595 +
9596 +       /* First, detect modifications or additions */
9597 +       mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
9598 +               struct in_addr ifa = mptcp_local->locaddr4[i].addr;
9599 +               bool found = false;
9600 +
9601 +               mptcp_for_each_sk(mpcb, sk) {
9602 +                       struct tcp_sock *tp = tcp_sk(sk);
9603 +
9604 +                       if (sk->sk_family == AF_INET6 &&
9605 +                           !mptcp_v6_is_v4_mapped(sk))
9606 +                               continue;
9607 +
9608 +                       if (inet_sk(sk)->inet_saddr != ifa.s_addr)
9609 +                               continue;
9610 +
9611 +                       found = true;
9612 +
9613 +                       if (mptcp_local->locaddr4[i].low_prio != tp->mptcp->low_prio) {
9614 +                               tp->mptcp->send_mp_prio = 1;
9615 +                               tp->mptcp->low_prio = mptcp_local->locaddr4[i].low_prio;
9616 +
9617 +                               tcp_send_ack(sk);
9618 +                       }
9619 +               }
9620 +
9621 +               if (!found) {
9622 +                       fmp->add_addr++;
9623 +
9624 +                       sk = mptcp_select_ack_sock(meta_sk, 0);
9625 +                       if (sk)
9626 +                               tcp_send_ack(sk);
9627 +                       full_mesh_create_subflows(meta_sk);
9628 +               }
9629 +       }
9630 +
9631 +#if IS_ENABLED(CONFIG_IPV6)
9632 +       mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
9633 +               struct in6_addr ifa = mptcp_local->locaddr6[i].addr;
9634 +               bool found = false;
9635 +
9636 +               mptcp_for_each_sk(mpcb, sk) {
9637 +                       struct tcp_sock *tp = tcp_sk(sk);
9638 +
9639 +                       if (sk->sk_family == AF_INET ||
9640 +                           mptcp_v6_is_v4_mapped(sk))
9641 +                               continue;
9642 +
9643 +                       if (!ipv6_addr_equal(&inet6_sk(sk)->saddr, &ifa))
9644 +                               continue;
9645 +
9646 +                       found = true;
9647 +
9648 +                       if (mptcp_local->locaddr6[i].low_prio != tp->mptcp->low_prio) {
9649 +                               tp->mptcp->send_mp_prio = 1;
9650 +                               tp->mptcp->low_prio = mptcp_local->locaddr6[i].low_prio;
9651 +
9652 +                               tcp_send_ack(sk);
9653 +                       }
9654 +               }
9655 +
9656 +               if (!found) {
9657 +                       fmp->add_addr++;
9658 +
9659 +                       sk = mptcp_select_ack_sock(meta_sk, 0);
9660 +                       if (sk)
9661 +                               tcp_send_ack(sk);
9662 +                       full_mesh_create_subflows(meta_sk);
9663 +               }
9664 +       }
9665 +#endif
9666 +
9667 +       /* Now, detect address-removals */
9668 +       mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
9669 +               bool shall_remove = true;
9670 +
9671 +               if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
9672 +                       mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
9673 +                               if (inet_sk(sk)->inet_saddr == mptcp_local->locaddr4[i].addr.s_addr) {
9674 +                                       shall_remove = false;
9675 +                                       break;
9676 +                               }
9677 +                       }
9678 +               } else {
9679 +                       mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
9680 +                               if (ipv6_addr_equal(&inet6_sk(sk)->saddr, &mptcp_local->locaddr6[i].addr)) {
9681 +                                       shall_remove = false;
9682 +                                       break;
9683 +                               }
9684 +                       }
9685 +               }
9686 +
9687 +               if (shall_remove) {
9688 +                       /* Reinject, so that pf = 1 and so we
9689 +                        * won't select this one as the
9690 +                        * ack-sock.
9691 +                        */
9692 +                       mptcp_reinject_data(sk, 0);
9693 +
9694 +                       update_remove_addrs(tcp_sk(sk)->mptcp->loc_id, meta_sk,
9695 +                                           mptcp_local);
9696 +
9697 +                       if (mpcb->master_sk == sk)
9698 +                               update_remove_addrs(0, meta_sk, mptcp_local);
9699 +
9700 +                       mptcp_sub_force_close(sk);
9701 +               }
9702 +       }
9703 +       rcu_read_unlock();
9704 +}
9705 +
9706 +static int full_mesh_get_local_index(sa_family_t family, union inet_addr *addr,
9707 +                                    struct net *net)
9708 +{
9709 +       struct mptcp_loc_addr *mptcp_local;
9710 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9711 +       int index;
9712 +
9713 +       /* Handle the backup-flows */
9714 +       rcu_read_lock();
9715 +       mptcp_local = rcu_dereference(fm_ns->local);
9716 +
9717 +       index = mptcp_find_address(mptcp_local, family, addr);
9718 +
9719 +       rcu_read_unlock();
9720 +
9721 +       return index;
9722 +}
9723 +
9724 +static int full_mesh_get_local_id(sa_family_t family, union inet_addr *addr,
9725 +                                 struct net *net)
9726 +{
9727 +       struct mptcp_loc_addr *mptcp_local;
9728 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
9729 +       int index, id = -1;
9730 +
9731 +       /* Handle the backup-flows */
9732 +       rcu_read_lock();
9733 +       mptcp_local = rcu_dereference(fm_ns->local);
9734 +
9735 +       index = mptcp_find_address(mptcp_local, family, addr);
9736 +
9737 +       if (index != -1) {
9738 +               if (family == AF_INET)
9739 +                       id = mptcp_local->locaddr4[index].loc4_id;
9740 +               else
9741 +                       id = mptcp_local->locaddr6[index].loc6_id;
9742 +       }
9743 +
9744 +
9745 +       rcu_read_unlock();
9746 +
9747 +       return id;
9748 +}
9749 +
9750 +static void full_mesh_addr_signal(struct sock *sk, unsigned *size,
9751 +                                 struct tcp_out_options *opts,
9752 +                                 struct sk_buff *skb)
9753 +{
9754 +       struct tcp_sock *tp = tcp_sk(sk);
9755 +       struct mptcp_cb *mpcb = tp->mpcb;
9756 +       struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
9757 +       struct mptcp_loc_addr *mptcp_local;
9758 +       struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk));
9759 +       int remove_addr_len;
9760 +       u8 unannouncedv4, unannouncedv6;
9761 +
9762 +       if (likely(!fmp->add_addr))
9763 +               goto remove_addr;
9764 +
9765 +       rcu_read_lock();
9766 +       mptcp_local = rcu_dereference(fm_ns->local);
9767 +
9768 +       /* IPv4 */
9769 +       unannouncedv4 = (~fmp->announced_addrs_v4) & mptcp_local->loc4_bits;
9770 +       if (unannouncedv4 &&
9771 +           MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) {
9772 +               int ind = mptcp_find_free_index(~unannouncedv4);
9773 +
9774 +               opts->options |= OPTION_MPTCP;
9775 +               opts->mptcp_options |= OPTION_ADD_ADDR;
9776 +               opts->add_addr4.addr_id = mptcp_local->locaddr4[ind].loc4_id;
9777 +               opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr;
9778 +               opts->add_addr_v4 = 1;
9779 +
9780 +               if (skb) {
9781 +                       fmp->announced_addrs_v4 |= (1 << ind);
9782 +                       fmp->add_addr--;
9783 +               }
9784 +               *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN;
9785 +       }
9786 +
9787 +       /* IPv6 */
9788 +       unannouncedv6 = (~fmp->announced_addrs_v6) & mptcp_local->loc6_bits;
9789 +       if (unannouncedv6 &&
9790 +           MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) {
9791 +               int ind = mptcp_find_free_index(~unannouncedv6);
9792 +
9793 +               opts->options |= OPTION_MPTCP;
9794 +               opts->mptcp_options |= OPTION_ADD_ADDR;
9795 +               opts->add_addr6.addr_id = mptcp_local->locaddr6[ind].loc6_id;
9796 +               opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr;
9797 +               opts->add_addr_v6 = 1;
9798 +
9799 +               if (skb) {
9800 +                       fmp->announced_addrs_v6 |= (1 << ind);
9801 +                       fmp->add_addr--;
9802 +               }
9803 +               *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN;
9804 +       }
9805 +
9806 +       rcu_read_unlock();
9807 +
9808 +       if (!unannouncedv4 && !unannouncedv6 && skb) {
9809 +               fmp->add_addr--;
9810 +       }
9811 +
9812 +remove_addr:
9813 +       if (likely(!fmp->remove_addrs))
9814 +               return;
9815 +
9816 +       remove_addr_len = mptcp_sub_len_remove_addr_align(fmp->remove_addrs);
9817 +       if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len)
9818 +               return;
9819 +
9820 +       opts->options |= OPTION_MPTCP;
9821 +       opts->mptcp_options |= OPTION_REMOVE_ADDR;
9822 +       opts->remove_addrs = fmp->remove_addrs;
9823 +       *size += remove_addr_len;
9824 +       if (skb)
9825 +               fmp->remove_addrs = 0;
9826 +}
9827 +
9828 +static int mptcp_fm_init_net(struct net *net)
9829 +{
9830 +       struct mptcp_loc_addr *mptcp_local;
9831 +       struct mptcp_fm_ns *fm_ns;
9832 +
9833 +       fm_ns = kzalloc(sizeof(*fm_ns), GFP_KERNEL);
9834 +       if (!fm_ns)
9835 +               return -ENOBUFS;
9836 +
9837 +       mptcp_local = kzalloc(sizeof(*mptcp_local), GFP_KERNEL);
9838 +       if (!mptcp_local) {
9839 +               kfree(fm_ns);
9840 +               return -ENOBUFS;
9841 +       }
9842 +
9843 +       mptcp_local->next_v4_index = 1;
9844 +
9845 +       rcu_assign_pointer(fm_ns->local, mptcp_local);
9846 +       INIT_DELAYED_WORK(&fm_ns->address_worker, mptcp_address_worker);
9847 +       INIT_LIST_HEAD(&fm_ns->events);
9848 +       spin_lock_init(&fm_ns->local_lock);
9849 +       fm_ns->net = net;
9850 +       net->mptcp.path_managers[MPTCP_PM_FULLMESH] = fm_ns;
9851 +
9852 +       return 0;
9853 +}
9854 +
9855 +static void mptcp_fm_exit_net(struct net *net)
9856 +{
9857 +       struct mptcp_addr_event *eventq, *tmp;
9858 +       struct mptcp_fm_ns *fm_ns;
9859 +       struct mptcp_loc_addr *mptcp_local;
9860 +
9861 +       fm_ns = fm_get_ns(net);
9862 +       cancel_delayed_work_sync(&fm_ns->address_worker);
9863 +
9864 +       rcu_read_lock_bh();
9865 +
9866 +       mptcp_local = rcu_dereference_bh(fm_ns->local);
9867 +       kfree(mptcp_local);
9868 +
9869 +       spin_lock(&fm_ns->local_lock);
9870 +       list_for_each_entry_safe(eventq, tmp, &fm_ns->events, list) {
9871 +               list_del(&eventq->list);
9872 +               kfree(eventq);
9873 +       }
9874 +       spin_unlock(&fm_ns->local_lock);
9875 +
9876 +       rcu_read_unlock_bh();
9877 +
9878 +       kfree(fm_ns);
9879 +}
9880 +
9881 +static struct pernet_operations full_mesh_net_ops = {
9882 +       .init = mptcp_fm_init_net,
9883 +       .exit = mptcp_fm_exit_net,
9884 +};
9885 +
9886 +static struct mptcp_pm_ops full_mesh __read_mostly = {
9887 +       .new_session = full_mesh_new_session,
9888 +       .release_sock = full_mesh_release_sock,
9889 +       .fully_established = full_mesh_create_subflows,
9890 +       .new_remote_address = full_mesh_create_subflows,
9891 +       .get_local_index = full_mesh_get_local_index,
9892 +       .get_local_id = full_mesh_get_local_id,
9893 +       .addr_signal = full_mesh_addr_signal,
9894 +       .name = "fullmesh",
9895 +       .owner = THIS_MODULE,
9896 +};
9897 +
9898 +/* General initialization of MPTCP_PM */
9899 +static int __init full_mesh_register(void)
9900 +{
9901 +       int ret;
9902 +
9903 +       BUILD_BUG_ON(sizeof(struct fullmesh_priv) > MPTCP_PM_SIZE);
9904 +
9905 +       ret = register_pernet_subsys(&full_mesh_net_ops);
9906 +       if (ret)
9907 +               goto out;
9908 +
9909 +       ret = register_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
9910 +       if (ret)
9911 +               goto err_reg_inetaddr;
9912 +       ret = register_netdevice_notifier(&mptcp_pm_netdev_notifier);
9913 +       if (ret)
9914 +               goto err_reg_netdev;
9915 +
9916 +#if IS_ENABLED(CONFIG_IPV6)
9917 +       ret = register_inet6addr_notifier(&inet6_addr_notifier);
9918 +       if (ret)
9919 +               goto err_reg_inet6addr;
9920 +#endif
9921 +
9922 +       ret = mptcp_register_path_manager(&full_mesh);
9923 +       if (ret)
9924 +               goto err_reg_pm;
9925 +
9926 +out:
9927 +       return ret;
9928 +
9929 +
9930 +err_reg_pm:
9931 +#if IS_ENABLED(CONFIG_IPV6)
9932 +       unregister_inet6addr_notifier(&inet6_addr_notifier);
9933 +err_reg_inet6addr:
9934 +#endif
9935 +       unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
9936 +err_reg_netdev:
9937 +       unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
9938 +err_reg_inetaddr:
9939 +       unregister_pernet_subsys(&full_mesh_net_ops);
9940 +       goto out;
9941 +}
9942 +
9943 +static void full_mesh_unregister(void)
9944 +{
9945 +#if IS_ENABLED(CONFIG_IPV6)
9946 +       unregister_inet6addr_notifier(&inet6_addr_notifier);
9947 +#endif
9948 +       unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
9949 +       unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
9950 +       unregister_pernet_subsys(&full_mesh_net_ops);
9951 +       mptcp_unregister_path_manager(&full_mesh);
9952 +}
9953 +
9954 +module_init(full_mesh_register);
9955 +module_exit(full_mesh_unregister);
9956 +
9957 +MODULE_AUTHOR("Christoph Paasch");
9958 +MODULE_LICENSE("GPL");
9959 +MODULE_DESCRIPTION("Full-Mesh MPTCP");
9960 +MODULE_VERSION("0.88");
9961 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_input.c linux-3.14.45/net/mptcp/mptcp_input.c
9962 --- linux-3.14.45.orig/net/mptcp/mptcp_input.c  1970-01-01 01:00:00.000000000 +0100
9963 +++ linux-3.14.45/net/mptcp/mptcp_input.c       2015-06-24 14:15:48.895862487 +0200
9964 @@ -0,0 +1,2254 @@
9965 +/*
9966 + *     MPTCP implementation - Sending side
9967 + *
9968 + *     Initial Design & Implementation:
9969 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
9970 + *
9971 + *     Current Maintainer & Author:
9972 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
9973 + *
9974 + *     Additional authors:
9975 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
9976 + *     Gregory Detal <gregory.detal@uclouvain.be>
9977 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
9978 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
9979 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
9980 + *     Andreas Ripke <ripke@neclab.eu>
9981 + *     Vlad Dogaru <vlad.dogaru@intel.com>
9982 + *     Octavian Purdila <octavian.purdila@intel.com>
9983 + *     John Ronan <jronan@tssg.org>
9984 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
9985 + *     Brandon Heller <brandonh@stanford.edu>
9986 + *
9987 + *
9988 + *     This program is free software; you can redistribute it and/or
9989 + *      modify it under the terms of the GNU General Public License
9990 + *      as published by the Free Software Foundation; either version
9991 + *      2 of the License, or (at your option) any later version.
9992 + */
9993 +
9994 +#include <asm/unaligned.h>
9995 +
9996 +#include <net/mptcp.h>
9997 +#include <net/mptcp_v4.h>
9998 +#include <net/mptcp_v6.h>
9999 +
10000 +#include <linux/kconfig.h>
10001 +
10002 +/* is seq1 < seq2 ? */
10003 +static inline int before64(const u64 seq1, const u64 seq2)
10004 +{
10005 +       return (s64)(seq1 - seq2) < 0;
10006 +}
10007 +
10008 +/* is seq1 > seq2 ? */
10009 +#define after64(seq1, seq2)    before64(seq2, seq1)
10010 +
10011 +static inline void mptcp_become_fully_estab(struct sock *sk)
10012 +{
10013 +       tcp_sk(sk)->mptcp->fully_established = 1;
10014 +
10015 +       if (is_master_tp(tcp_sk(sk)) &&
10016 +           tcp_sk(sk)->mpcb->pm_ops->fully_established)
10017 +               tcp_sk(sk)->mpcb->pm_ops->fully_established(mptcp_meta_sk(sk));
10018 +}
10019 +
10020 +/* Similar to tcp_tso_acked without any memory accounting */
10021 +static inline int mptcp_tso_acked_reinject(struct sock *sk, struct sk_buff *skb)
10022 +{
10023 +       struct tcp_sock *tp = tcp_sk(sk);
10024 +       u32 packets_acked, len;
10025 +
10026 +       BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
10027 +
10028 +       packets_acked = tcp_skb_pcount(skb);
10029 +
10030 +       if (skb_unclone(skb, GFP_ATOMIC))
10031 +               return 0;
10032 +
10033 +       len = tp->snd_una - TCP_SKB_CB(skb)->seq;
10034 +       __pskb_trim_head(skb, len);
10035 +
10036 +       TCP_SKB_CB(skb)->seq += len;
10037 +       skb->ip_summed = CHECKSUM_PARTIAL;
10038 +       skb->truesize        -= len;
10039 +
10040 +       /* Any change of skb->len requires recalculation of tso factor. */
10041 +       if (tcp_skb_pcount(skb) > 1)
10042 +               tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
10043 +       packets_acked -= tcp_skb_pcount(skb);
10044 +
10045 +       if (packets_acked) {
10046 +               BUG_ON(tcp_skb_pcount(skb) == 0);
10047 +               BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
10048 +       }
10049 +
10050 +       return packets_acked;
10051 +}
10052 +
10053 +/**
10054 + * Cleans the meta-socket retransmission queue and the reinject-queue.
10055 + * @sk must be the metasocket.
10056 + */
10057 +static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una)
10058 +{
10059 +       struct sk_buff *skb, *tmp;
10060 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
10061 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
10062 +       bool acked = false;
10063 +       u32 acked_pcount;
10064 +
10065 +       while ((skb = tcp_write_queue_head(meta_sk)) &&
10066 +              skb != tcp_send_head(meta_sk)) {
10067 +               bool fully_acked = true;
10068 +
10069 +               if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) {
10070 +                       if (tcp_skb_pcount(skb) == 1 ||
10071 +                           !after(meta_tp->snd_una, TCP_SKB_CB(skb)->seq))
10072 +                               break;
10073 +
10074 +                       acked_pcount = tcp_tso_acked(meta_sk, skb);
10075 +                       if (!acked_pcount)
10076 +                               break;
10077 +
10078 +                       fully_acked = false;
10079 +               } else {
10080 +                       acked_pcount = tcp_skb_pcount(skb);
10081 +               }
10082 +
10083 +               acked = true;
10084 +               meta_tp->packets_out -= acked_pcount;
10085 +               meta_tp->retrans_stamp = 0;
10086 +
10087 +               if (!fully_acked)
10088 +                       break;
10089 +
10090 +               tcp_unlink_write_queue(skb, meta_sk);
10091 +
10092 +               if (mptcp_is_data_fin(skb)) {
10093 +                       struct sock *sk_it;
10094 +
10095 +                       /* DATA_FIN has been acknowledged - now we can close
10096 +                        * the subflows
10097 +                        */
10098 +                       mptcp_for_each_sk(mpcb, sk_it) {
10099 +                               unsigned long delay = 0;
10100 +
10101 +                               /* If we are the passive closer, don't trigger
10102 +                                * subflow-fin until the subflow has been finned
10103 +                                * by the peer - thus we add a delay.
10104 +                                */
10105 +                               if (mpcb->passive_close &&
10106 +                                   sk_it->sk_state == TCP_ESTABLISHED)
10107 +                                       delay = inet_csk(sk_it)->icsk_rto << 3;
10108 +
10109 +                               mptcp_sub_close(sk_it, delay);
10110 +                       }
10111 +               }
10112 +               sk_wmem_free_skb(meta_sk, skb);
10113 +       }
10114 +       /* Remove acknowledged data from the reinject queue */
10115 +       skb_queue_walk_safe(&mpcb->reinject_queue, skb, tmp) {
10116 +               if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) {
10117 +                       if (tcp_skb_pcount(skb) == 1 ||
10118 +                           !after(meta_tp->snd_una, TCP_SKB_CB(skb)->seq))
10119 +                               break;
10120 +
10121 +                       mptcp_tso_acked_reinject(meta_sk, skb);
10122 +                       break;
10123 +               }
10124 +
10125 +               __skb_unlink(skb, &mpcb->reinject_queue);
10126 +               __kfree_skb(skb);
10127 +       }
10128 +
10129 +       if (likely(between(meta_tp->snd_up, prior_snd_una, meta_tp->snd_una)))
10130 +               meta_tp->snd_up = meta_tp->snd_una;
10131 +
10132 +       if (acked) {
10133 +               tcp_rearm_rto(meta_sk);
10134 +               /* Normally this is done in tcp_try_undo_loss - but MPTCP
10135 +                * does not call this function.
10136 +                */
10137 +               inet_csk(meta_sk)->icsk_retransmits = 0;
10138 +       }
10139 +}
10140 +
10141 +/* Inspired by tcp_rcv_state_process */
10142 +static int mptcp_rcv_state_process(struct sock *meta_sk, struct sock *sk,
10143 +                                  const struct sk_buff *skb, u32 data_seq,
10144 +                                  u16 data_len)
10145 +{
10146 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk);
10147 +       struct tcphdr *th = tcp_hdr(skb);
10148 +
10149 +       /* State-machine handling if FIN has been enqueued and he has
10150 +        * been acked (snd_una == write_seq) - it's important that this
10151 +        * here is after sk_wmem_free_skb because otherwise
10152 +        * sk_forward_alloc is wrong upon inet_csk_destroy_sock()
10153 +        */
10154 +       switch (meta_sk->sk_state) {
10155 +       case TCP_FIN_WAIT1:
10156 +               if (meta_tp->snd_una == meta_tp->write_seq) {
10157 +                       struct dst_entry *dst = __sk_dst_get(meta_sk);
10158 +
10159 +                       tcp_set_state(meta_sk, TCP_FIN_WAIT2);
10160 +                       meta_sk->sk_shutdown |= SEND_SHUTDOWN;
10161 +
10162 +                       dst = __sk_dst_get(sk);
10163 +                       if (dst)
10164 +                               dst_confirm(dst);
10165 +
10166 +                       if (!sock_flag(meta_sk, SOCK_DEAD)) {
10167 +                               /* Wake up lingering close() */
10168 +                               meta_sk->sk_state_change(meta_sk);
10169 +                       } else {
10170 +                               int tmo;
10171 +
10172 +                               if (meta_tp->linger2 < 0 ||
10173 +                                   (data_len &&
10174 +                                    after(data_seq + data_len - (mptcp_is_data_fin2(skb, tp) ? 1 : 0),
10175 +                                          meta_tp->rcv_nxt))) {
10176 +                                       mptcp_send_active_reset(meta_sk, GFP_ATOMIC);
10177 +                                       tcp_done(meta_sk);
10178 +                                       NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
10179 +                                       return 1;
10180 +                               }
10181 +
10182 +                               tmo = tcp_fin_time(meta_sk);
10183 +                               if (tmo > TCP_TIMEWAIT_LEN) {
10184 +                                       inet_csk_reset_keepalive_timer(meta_sk, tmo - TCP_TIMEWAIT_LEN);
10185 +                               } else if (mptcp_is_data_fin2(skb, tp) ||
10186 +                                          sock_owned_by_user(meta_sk)) {
10187 +                                       /* Bad case. We could lose such FIN otherwise.
10188 +                                        * It is not a big problem, but it looks confusing
10189 +                                        * and not so rare event. We still can lose it now,
10190 +                                        * if it spins in bh_lock_sock(), but it is really
10191 +                                        * marginal case.
10192 +                                        */
10193 +                                       inet_csk_reset_keepalive_timer(meta_sk, tmo);
10194 +                               } else {
10195 +                                       tcp_time_wait(meta_sk, TCP_FIN_WAIT2, tmo);
10196 +                               }
10197 +                       }
10198 +               }
10199 +               break;
10200 +       case TCP_CLOSING:
10201 +       case TCP_LAST_ACK:
10202 +               if (meta_tp->snd_una == meta_tp->write_seq) {
10203 +                       tcp_done(meta_sk);
10204 +                       return 1;
10205 +               }
10206 +               break;
10207 +       }
10208 +
10209 +       /* step 7: process the segment text */
10210 +       switch (meta_sk->sk_state) {
10211 +       case TCP_FIN_WAIT1:
10212 +       case TCP_FIN_WAIT2:
10213 +               /* RFC 793 says to queue data in these states,
10214 +                * RFC 1122 says we MUST send a reset.
10215 +                * BSD 4.4 also does reset.
10216 +                */
10217 +               if (meta_sk->sk_shutdown & RCV_SHUTDOWN) {
10218 +                       if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
10219 +                           after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) &&
10220 +                           !mptcp_is_data_fin2(skb, tp)) {
10221 +                               NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
10222 +                               mptcp_send_active_reset(meta_sk, GFP_ATOMIC);
10223 +                               tcp_reset(meta_sk);
10224 +                               return 1;
10225 +                       }
10226 +               }
10227 +               break;
10228 +       }
10229 +
10230 +       return 0;
10231 +}
10232 +
10233 +/**
10234 + * @return:
10235 + *  i) 1: Everything's fine.
10236 + *  ii) -1: A reset has been sent on the subflow - csum-failure
10237 + *  iii) 0: csum-failure but no reset sent, because it's the last subflow.
10238 + *      Last packet should not be destroyed by the caller because it has
10239 + *      been done here.
10240 + */
10241 +static int mptcp_verif_dss_csum(struct sock *sk)
10242 +{
10243 +       struct tcp_sock *tp = tcp_sk(sk);
10244 +       struct sk_buff *tmp, *tmp1, *last = NULL;
10245 +       __wsum csum_tcp = 0; /* cumulative checksum of pld + mptcp-header */
10246 +       int ans = 1, overflowed = 0, offset = 0, dss_csum_added = 0;
10247 +       int iter = 0;
10248 +
10249 +       skb_queue_walk_safe(&sk->sk_receive_queue, tmp, tmp1) {
10250 +               unsigned int csum_len;
10251 +
10252 +               if (before(tp->mptcp->map_subseq + tp->mptcp->map_data_len, TCP_SKB_CB(tmp)->end_seq))
10253 +                       /* Mapping ends in the middle of the packet -
10254 +                        * csum only these bytes
10255 +                        */
10256 +                       csum_len = tp->mptcp->map_subseq + tp->mptcp->map_data_len - TCP_SKB_CB(tmp)->seq;
10257 +               else
10258 +                       csum_len = tmp->len;
10259 +
10260 +               offset = 0;
10261 +               if (overflowed) {
10262 +                       char first_word[4];
10263 +                       first_word[0] = 0;
10264 +                       first_word[1] = 0;
10265 +                       first_word[2] = 0;
10266 +                       first_word[3] = *(tmp->data);
10267 +                       csum_tcp = csum_partial(first_word, 4, csum_tcp);
10268 +                       offset = 1;
10269 +                       csum_len--;
10270 +                       overflowed = 0;
10271 +               }
10272 +
10273 +               csum_tcp = skb_checksum(tmp, offset, csum_len, csum_tcp);
10274 +
10275 +               /* Was it on an odd-length? Then we have to merge the next byte
10276 +                * correctly (see above)
10277 +                */
10278 +               if (csum_len != (csum_len & (~1)))
10279 +                       overflowed = 1;
10280 +
10281 +               if (mptcp_is_data_seq(tmp) && !dss_csum_added) {
10282 +                       __be32 data_seq = htonl((u32)(tp->mptcp->map_data_seq >> 32));
10283 +
10284 +                       /* If a 64-bit dss is present, we increase the offset
10285 +                        * by 4 bytes, as the high-order 64-bits will be added
10286 +                        * in the final csum_partial-call.
10287 +                        */
10288 +                       u32 offset = skb_transport_offset(tmp) +
10289 +                                    TCP_SKB_CB(tmp)->dss_off;
10290 +                       if (TCP_SKB_CB(tmp)->mptcp_flags & MPTCPHDR_SEQ64_SET)
10291 +                               offset += 4;
10292 +
10293 +                       csum_tcp = skb_checksum(tmp, offset,
10294 +                                               MPTCP_SUB_LEN_SEQ_CSUM,
10295 +                                               csum_tcp);
10296 +
10297 +                       csum_tcp = csum_partial(&data_seq,
10298 +                                               sizeof(data_seq), csum_tcp);
10299 +
10300 +                       dss_csum_added = 1; /* Just do it once */
10301 +               }
10302 +               last = tmp;
10303 +               iter++;
10304 +
10305 +               if (!skb_queue_is_last(&sk->sk_receive_queue, tmp) &&
10306 +                   !before(TCP_SKB_CB(tmp1)->seq,
10307 +                           tp->mptcp->map_subseq + tp->mptcp->map_data_len))
10308 +                       break;
10309 +       }
10310 +
10311 +       /* Now, checksum must be 0 */
10312 +       if (unlikely(csum_fold(csum_tcp))) {
10313 +               pr_err("%s csum is wrong: %#x data_seq %u dss_csum_added %d overflowed %d iterations %d\n",
10314 +                           __func__, csum_fold(csum_tcp),
10315 +                           TCP_SKB_CB(last)->seq, dss_csum_added, overflowed,
10316 +                           iter);
10317 +
10318 +               tp->mptcp->send_mp_fail = 1;
10319 +
10320 +               /* map_data_seq is the data-seq number of the
10321 +                * mapping we are currently checking
10322 +                */
10323 +               tp->mpcb->csum_cutoff_seq = tp->mptcp->map_data_seq;
10324 +
10325 +               if (tp->mpcb->cnt_subflows > 1) {
10326 +                       mptcp_send_reset(sk);
10327 +                       ans = -1;
10328 +               } else {
10329 +                       tp->mpcb->send_infinite_mapping = 1;
10330 +
10331 +                       /* Need to purge the rcv-queue as it's no more valid */
10332 +                       while ((tmp = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
10333 +                               tp->copied_seq = TCP_SKB_CB(tmp)->end_seq;
10334 +                               kfree_skb(tmp);
10335 +                       }
10336 +
10337 +                       ans = 0;
10338 +               }
10339 +       }
10340 +
10341 +       return ans;
10342 +}
10343 +
10344 +static inline void mptcp_prepare_skb(struct sk_buff *skb, struct sk_buff *next,
10345 +                                    struct sock *sk)
10346 +{
10347 +       struct tcp_sock *tp = tcp_sk(sk);
10348 +       struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
10349 +       /* Adapt data-seq's to the packet itself. We kinda transform the
10350 +        * dss-mapping to a per-packet granularity. This is necessary to
10351 +        * correctly handle overlapping mappings coming from different
10352 +        * subflows. Otherwise it would be a complete mess.
10353 +        */
10354 +       tcb->seq = ((u32)tp->mptcp->map_data_seq) + tcb->seq - tp->mptcp->map_subseq;
10355 +       tcb->end_seq = tcb->seq + skb->len;
10356 +
10357 +       /* If cur is the last one in the rcv-queue (or the last one for this
10358 +        * mapping), and data_fin is enqueued, the end_data_seq is +1.
10359 +        */
10360 +       if (skb_queue_is_last(&sk->sk_receive_queue, skb) ||
10361 +           after(TCP_SKB_CB(next)->end_seq, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) {
10362 +               tcb->end_seq += tp->mptcp->map_data_fin;
10363 +
10364 +               /* We manually set the fin-flag if it is a data-fin. For easy
10365 +                * processing in tcp_recvmsg.
10366 +                */
10367 +               if (mptcp_is_data_fin2(skb, tp))
10368 +                       tcp_hdr(skb)->fin = 1;
10369 +               else
10370 +                       tcp_hdr(skb)->fin = 0;
10371 +       } else {
10372 +               /* We may have a subflow-fin with data but without data-fin */
10373 +               tcp_hdr(skb)->fin = 0;
10374 +       }
10375 +}
10376 +
10377 +/**
10378 + * @return: 1 if the segment has been eaten and can be suppressed,
10379 + *          otherwise 0.
10380 + */
10381 +static inline int mptcp_direct_copy(struct sk_buff *skb, struct sock *meta_sk)
10382 +{
10383 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
10384 +       int chunk = min_t(unsigned int, skb->len, meta_tp->ucopy.len);
10385 +       int eaten = 0;
10386 +
10387 +       __set_current_state(TASK_RUNNING);
10388 +
10389 +       local_bh_enable();
10390 +       if (!skb_copy_datagram_iovec(skb, 0, meta_tp->ucopy.iov, chunk)) {
10391 +               meta_tp->ucopy.len -= chunk;
10392 +               meta_tp->copied_seq += chunk;
10393 +               eaten = (chunk == skb->len);
10394 +               tcp_rcv_space_adjust(meta_sk);
10395 +       }
10396 +       local_bh_disable();
10397 +       return eaten;
10398 +}
10399 +
10400 +static inline void mptcp_reset_mapping(struct tcp_sock *tp)
10401 +{
10402 +       tp->mptcp->map_data_len = 0;
10403 +       tp->mptcp->map_data_seq = 0;
10404 +       tp->mptcp->map_subseq = 0;
10405 +       tp->mptcp->map_data_fin = 0;
10406 +       tp->mptcp->mapping_present = 0;
10407 +}
10408 +
10409 +/* The DSS-mapping received on the sk only covers the second half of the skb
10410 + * (cut at seq). We trim the head from the skb.
10411 + * Data will be freed upon kfree().
10412 + *
10413 + * Inspired by tcp_trim_head().
10414 + */
10415 +static void mptcp_skb_trim_head(struct sk_buff *skb, struct sock *sk, u32 seq)
10416 +{
10417 +       int len = seq - TCP_SKB_CB(skb)->seq;
10418 +       u32 new_seq = TCP_SKB_CB(skb)->seq + len;
10419 +
10420 +       if (len < skb_headlen(skb))
10421 +               __skb_pull(skb, len);
10422 +       else
10423 +               __pskb_trim_head(skb, len - skb_headlen(skb));
10424 +
10425 +       TCP_SKB_CB(skb)->seq = new_seq;
10426 +
10427 +       skb->truesize -= len;
10428 +       atomic_sub(len, &sk->sk_rmem_alloc);
10429 +       sk_mem_uncharge(sk, len);
10430 +}
10431 +
10432 +/* The DSS-mapping received on the sk only covers the first half of the skb
10433 + * (cut at seq). We create a second skb (@return), and queue it in the rcv-queue
10434 + * as further packets may resolve the mapping of the second half of data.
10435 + *
10436 + * Inspired by tcp_fragment().
10437 + */
10438 +static int mptcp_skb_split_tail(struct sk_buff *skb, struct sock *sk, u32 seq)
10439 +{
10440 +       struct sk_buff *buff;
10441 +       int nsize;
10442 +       int nlen, len;
10443 +
10444 +       len = seq - TCP_SKB_CB(skb)->seq;
10445 +       nsize = skb_headlen(skb) - len + tcp_sk(sk)->tcp_header_len;
10446 +       if (nsize < 0)
10447 +               nsize = 0;
10448 +
10449 +       /* Get a new skb... force flag on. */
10450 +       buff = alloc_skb(nsize, GFP_ATOMIC);
10451 +       if (buff == NULL)
10452 +               return -ENOMEM;
10453 +
10454 +       skb_reserve(buff, tcp_sk(sk)->tcp_header_len);
10455 +       skb_reset_transport_header(buff);
10456 +
10457 +       tcp_hdr(buff)->fin = tcp_hdr(skb)->fin;
10458 +       tcp_hdr(skb)->fin = 0;
10459 +
10460 +       /* We absolutly need to call skb_set_owner_r before refreshing the
10461 +        * truesize of buff, otherwise the moved data will account twice.
10462 +        */
10463 +       skb_set_owner_r(buff, sk);
10464 +       nlen = skb->len - len - nsize;
10465 +       buff->truesize += nlen;
10466 +       skb->truesize -= nlen;
10467 +
10468 +       /* Correct the sequence numbers. */
10469 +       TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
10470 +       TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
10471 +       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
10472 +
10473 +       skb_split(skb, buff, len);
10474 +
10475 +       __skb_queue_after(&sk->sk_receive_queue, skb, buff);
10476 +
10477 +       return 0;
10478 +}
10479 +
10480 +/* @return: 0  everything is fine. Just continue processing
10481 + *         1  subflow is broken stop everything
10482 + *         -1 this packet was broken - continue with the next one.
10483 + */
10484 +static int mptcp_prevalidate_skb(struct sock *sk, struct sk_buff *skb)
10485 +{
10486 +       struct tcp_sock *tp = tcp_sk(sk);
10487 +
10488 +       /* If we are in infinite mode, the subflow-fin is in fact a data-fin. */
10489 +       if (!skb->len && tcp_hdr(skb)->fin && !mptcp_is_data_fin(skb) &&
10490 +           !tp->mpcb->infinite_mapping_rcv) {
10491 +               /* Remove a pure subflow-fin from the queue and increase
10492 +                * copied_seq.
10493 +                */
10494 +               tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
10495 +               __skb_unlink(skb, &sk->sk_receive_queue);
10496 +               __kfree_skb(skb);
10497 +               return -1;
10498 +       }
10499 +
10500 +       /* If we are not yet fully established and do not know the mapping for
10501 +        * this segment, this path has to fallback to infinite or be torn down.
10502 +        */
10503 +       if (!tp->mptcp->fully_established && !mptcp_is_data_seq(skb) &&
10504 +           !tp->mptcp->mapping_present && !tp->mpcb->infinite_mapping_rcv) {
10505 +               pr_err("%s %#x will fallback - pi %d from %pS, seq %u\n",
10506 +                      __func__, tp->mpcb->mptcp_loc_token,
10507 +                      tp->mptcp->path_index, __builtin_return_address(0),
10508 +                      TCP_SKB_CB(skb)->seq);
10509 +
10510 +               if (!is_master_tp(tp)) {
10511 +                       mptcp_send_reset(sk);
10512 +                       return 1;
10513 +               }
10514 +
10515 +               tp->mpcb->infinite_mapping_snd = 1;
10516 +               tp->mpcb->infinite_mapping_rcv = 1;
10517 +               tp->mptcp->fully_established = 1;
10518 +       }
10519 +
10520 +       /* Receiver-side becomes fully established when a whole rcv-window has
10521 +        * been received without the need to fallback due to the previous
10522 +        * condition. */
10523 +       if (!tp->mptcp->fully_established) {
10524 +               tp->mptcp->init_rcv_wnd -= skb->len;
10525 +               if (tp->mptcp->init_rcv_wnd < 0)
10526 +                       mptcp_become_fully_estab(sk);
10527 +       }
10528 +
10529 +       return 0;
10530 +}
10531 +
10532 +/* @return: 0  everything is fine. Just continue processing
10533 + *         1  subflow is broken stop everything
10534 + *         -1 this packet was broken - continue with the next one.
10535 + */
10536 +static int mptcp_detect_mapping(struct sock *sk, struct sk_buff *skb)
10537 +{
10538 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
10539 +       struct mptcp_cb *mpcb = tp->mpcb;
10540 +       struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
10541 +       u32 *ptr;
10542 +       u32 data_seq, sub_seq, data_len, tcp_end_seq;
10543 +
10544 +       /* If we are in infinite-mapping-mode, the subflow is guaranteed to be
10545 +        * in-order at the data-level. Thus data-seq-numbers can be inferred
10546 +        * from what is expected at the data-level.
10547 +        */
10548 +       if (mpcb->infinite_mapping_rcv) {
10549 +               tp->mptcp->map_data_seq = mptcp_get_rcv_nxt_64(meta_tp);
10550 +               tp->mptcp->map_subseq = tcb->seq;
10551 +               tp->mptcp->map_data_len = skb->len;
10552 +               tp->mptcp->map_data_fin = tcp_hdr(skb)->fin;
10553 +               tp->mptcp->mapping_present = 1;
10554 +               return 0;
10555 +       }
10556 +
10557 +       /* No mapping here? Exit - it is either already set or still on its way */
10558 +       if (!mptcp_is_data_seq(skb)) {
10559 +               /* Too many packets without a mapping - this subflow is broken */
10560 +               if (!tp->mptcp->mapping_present &&
10561 +                   tp->rcv_nxt - tp->copied_seq > 65536) {
10562 +                       mptcp_send_reset(sk);
10563 +                       return 1;
10564 +               }
10565 +
10566 +               return 0;
10567 +       }
10568 +
10569 +       ptr = mptcp_skb_set_data_seq(skb, &data_seq, mpcb);
10570 +       ptr++;
10571 +       sub_seq = get_unaligned_be32(ptr) + tp->mptcp->rcv_isn;
10572 +       ptr++;
10573 +       data_len = get_unaligned_be16(ptr);
10574 +
10575 +       /* If it's an empty skb with DATA_FIN, sub_seq must get fixed.
10576 +        * The draft sets it to 0, but we really would like to have the
10577 +        * real value, to have an easy handling afterwards here in this
10578 +        * function.
10579 +        */
10580 +       if (mptcp_is_data_fin(skb) && skb->len == 0)
10581 +               sub_seq = TCP_SKB_CB(skb)->seq;
10582 +
10583 +       /* If there is already a mapping - we check if it maps with the current
10584 +        * one. If not - we reset.
10585 +        */
10586 +       if (tp->mptcp->mapping_present &&
10587 +           (data_seq != (u32)tp->mptcp->map_data_seq ||
10588 +            sub_seq != tp->mptcp->map_subseq ||
10589 +            data_len != tp->mptcp->map_data_len + tp->mptcp->map_data_fin ||
10590 +            mptcp_is_data_fin(skb) != tp->mptcp->map_data_fin)) {
10591 +               /* Mapping in packet is different from what we want */
10592 +               pr_err("%s Mappings do not match!\n", __func__);
10593 +               pr_err("%s dseq %u mdseq %u, sseq %u msseq %u dlen %u mdlen %u dfin %d mdfin %d\n",
10594 +                      __func__, data_seq, (u32)tp->mptcp->map_data_seq,
10595 +                      sub_seq, tp->mptcp->map_subseq, data_len,
10596 +                      tp->mptcp->map_data_len, mptcp_is_data_fin(skb),
10597 +                      tp->mptcp->map_data_fin);
10598 +               mptcp_send_reset(sk);
10599 +               return 1;
10600 +       }
10601 +
10602 +       /* If the previous check was good, the current mapping is valid and we exit. */
10603 +       if (tp->mptcp->mapping_present)
10604 +               return 0;
10605 +
10606 +       /* Mapping not yet set on this subflow - we set it here! */
10607 +
10608 +       if (!data_len) {
10609 +               mpcb->infinite_mapping_rcv = 1;
10610 +               tp->mptcp->fully_established = 1;
10611 +               /* We need to repeat mp_fail's until the sender felt
10612 +                * back to infinite-mapping - here we stop repeating it.
10613 +                */
10614 +               tp->mptcp->send_mp_fail = 0;
10615 +
10616 +               /* We have to fixup data_len - it must be the same as skb->len */
10617 +               data_len = skb->len + (mptcp_is_data_fin(skb) ? 1 : 0);
10618 +               sub_seq = tcb->seq;
10619 +
10620 +               /* TODO kill all other subflows than this one */
10621 +               /* data_seq and so on are set correctly */
10622 +
10623 +               /* At this point, the meta-ofo-queue has to be emptied,
10624 +                * as the following data is guaranteed to be in-order at
10625 +                * the data and subflow-level
10626 +                */
10627 +               mptcp_purge_ofo_queue(meta_tp);
10628 +       }
10629 +
10630 +       /* We are sending mp-fail's and thus are in fallback mode.
10631 +        * Ignore packets which do not announce the fallback and still
10632 +        * want to provide a mapping.
10633 +        */
10634 +       if (tp->mptcp->send_mp_fail) {
10635 +               tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
10636 +               __skb_unlink(skb, &sk->sk_receive_queue);
10637 +               __kfree_skb(skb);
10638 +               return -1;
10639 +       }
10640 +
10641 +       /* FIN increased the mapping-length by 1 */
10642 +       if (mptcp_is_data_fin(skb))
10643 +               data_len--;
10644 +
10645 +       /* Subflow-sequences of packet must be
10646 +        * (at least partially) be part of the DSS-mapping's
10647 +        * subflow-sequence-space.
10648 +        *
10649 +        * Basically the mapping is not valid, if either of the
10650 +        * following conditions is true:
10651 +        *
10652 +        * 1. It's not a data_fin and
10653 +        *    MPTCP-sub_seq >= TCP-end_seq
10654 +        *
10655 +        * 2. It's a data_fin and TCP-end_seq > TCP-seq and
10656 +        *    MPTCP-sub_seq >= TCP-end_seq
10657 +        *
10658 +        * The previous two can be merged into:
10659 +        *    TCP-end_seq > TCP-seq and MPTCP-sub_seq >= TCP-end_seq
10660 +        *    Because if it's not a data-fin, TCP-end_seq > TCP-seq
10661 +        *
10662 +        * 3. It's a data_fin and skb->len == 0 and
10663 +        *    MPTCP-sub_seq > TCP-end_seq
10664 +        *
10665 +        * 4. It's not a data_fin and TCP-end_seq > TCP-seq and
10666 +        *    MPTCP-sub_seq + MPTCP-data_len <= TCP-seq
10667 +        *
10668 +        * 5. MPTCP-sub_seq is prior to what we already copied (copied_seq)
10669 +        */
10670 +
10671 +       /* subflow-fin is not part of the mapping - ignore it here ! */
10672 +       tcp_end_seq = tcb->end_seq - tcp_hdr(skb)->fin;
10673 +       if ((!before(sub_seq, tcb->end_seq) && after(tcp_end_seq, tcb->seq)) ||
10674 +           (mptcp_is_data_fin(skb) && skb->len == 0 && after(sub_seq, tcb->end_seq)) ||
10675 +           (!after(sub_seq + data_len, tcb->seq) && after(tcp_end_seq, tcb->seq)) ||
10676 +           before(sub_seq, tp->copied_seq)) {
10677 +               /* Subflow-sequences of packet is different from what is in the
10678 +                * packet's dss-mapping. The peer is misbehaving - reset
10679 +                */
10680 +               pr_err("%s Packet's mapping does not map to the DSS sub_seq %u "
10681 +                      "end_seq %u, tcp_end_seq %u seq %u dfin %u len %u data_len %u"
10682 +                      "copied_seq %u\n", __func__, sub_seq, tcb->end_seq, tcp_end_seq, tcb->seq, mptcp_is_data_fin(skb),
10683 +                      skb->len, data_len, tp->copied_seq);
10684 +               mptcp_send_reset(sk);
10685 +               return 1;
10686 +       }
10687 +
10688 +       /* Does the DSS had 64-bit seqnum's ? */
10689 +       if (!(tcb->mptcp_flags & MPTCPHDR_SEQ64_SET)) {
10690 +               /* Wrapped around? */
10691 +               if (unlikely(after(data_seq, meta_tp->rcv_nxt) && data_seq < meta_tp->rcv_nxt)) {
10692 +                       tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, !mpcb->rcv_hiseq_index, data_seq);
10693 +               } else {
10694 +                       /* Else, access the default high-order bits */
10695 +                       tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, data_seq);
10696 +               }
10697 +       } else {
10698 +               tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, (tcb->mptcp_flags & MPTCPHDR_SEQ64_INDEX) ? 1 : 0, data_seq);
10699 +
10700 +               if (unlikely(tcb->mptcp_flags & MPTCPHDR_SEQ64_OFO)) {
10701 +                       /* We make sure that the data_seq is invalid.
10702 +                        * It will be dropped later.
10703 +                        */
10704 +                       tp->mptcp->map_data_seq += 0xFFFFFFFF;
10705 +                       tp->mptcp->map_data_seq += 0xFFFFFFFF;
10706 +               }
10707 +       }
10708 +
10709 +       tp->mptcp->map_data_len = data_len;
10710 +       tp->mptcp->map_subseq = sub_seq;
10711 +       tp->mptcp->map_data_fin = mptcp_is_data_fin(skb) ? 1 : 0;
10712 +       tp->mptcp->mapping_present = 1;
10713 +
10714 +       return 0;
10715 +}
10716 +
10717 +/* Similar to tcp_sequence(...) */
10718 +static inline int mptcp_sequence(const struct tcp_sock *meta_tp,
10719 +                                u64 data_seq, u64 end_data_seq)
10720 +{
10721 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
10722 +       u64 rcv_wup64;
10723 +
10724 +       /* Wrap-around? */
10725 +       if (meta_tp->rcv_wup > meta_tp->rcv_nxt) {
10726 +               rcv_wup64 = ((u64)(mpcb->rcv_high_order[mpcb->rcv_hiseq_index] - 1) << 32) |
10727 +                               meta_tp->rcv_wup;
10728 +       } else {
10729 +               rcv_wup64 = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index,
10730 +                                                 meta_tp->rcv_wup);
10731 +       }
10732 +
10733 +       return  !before64(end_data_seq, rcv_wup64) &&
10734 +               !after64(data_seq, mptcp_get_rcv_nxt_64(meta_tp) + tcp_receive_window(meta_tp));
10735 +}
10736 +
10737 +/* @return: 0  everything is fine. Just continue processing
10738 + *         -1 this packet was broken - continue with the next one.
10739 + */
10740 +static int mptcp_validate_mapping(struct sock *sk, struct sk_buff *skb)
10741 +{
10742 +       struct tcp_sock *tp = tcp_sk(sk);
10743 +       struct sk_buff *tmp, *tmp1;
10744 +       u32 tcp_end_seq;
10745 +
10746 +       if (!tp->mptcp->mapping_present)
10747 +               return 0;
10748 +
10749 +       /* either, the new skb gave us the mapping and the first segment
10750 +        * in the sub-rcv-queue has to be trimmed ...
10751 +        */
10752 +       tmp = skb_peek(&sk->sk_receive_queue);
10753 +       if (before(TCP_SKB_CB(tmp)->seq, tp->mptcp->map_subseq) &&
10754 +           after(TCP_SKB_CB(tmp)->end_seq, tp->mptcp->map_subseq))
10755 +               mptcp_skb_trim_head(tmp, sk, tp->mptcp->map_subseq);
10756 +
10757 +       /* ... or the new skb (tail) has to be split at the end. */
10758 +       tcp_end_seq = TCP_SKB_CB(skb)->end_seq - (tcp_hdr(skb)->fin ? 1 : 0);
10759 +       if (after(tcp_end_seq, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) {
10760 +               u32 seq = tp->mptcp->map_subseq + tp->mptcp->map_data_len;
10761 +               if (mptcp_skb_split_tail(skb, sk, seq)) { /* Allocation failed */
10762 +                       /* TODO : maybe handle this here better.
10763 +                        * We now just force meta-retransmission.
10764 +                        */
10765 +                       tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
10766 +                       __skb_unlink(skb, &sk->sk_receive_queue);
10767 +                       __kfree_skb(skb);
10768 +                       return -1;
10769 +               }
10770 +       }
10771 +
10772 +       /* Now, remove old sk_buff's from the receive-queue.
10773 +        * This may happen if the mapping has been lost for these segments and
10774 +        * the next mapping has already been received.
10775 +        */
10776 +       if (tp->mptcp->mapping_present &&
10777 +           before(TCP_SKB_CB(skb_peek(&sk->sk_receive_queue))->seq, tp->mptcp->map_subseq)) {
10778 +               skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
10779 +                       if (!before(TCP_SKB_CB(tmp1)->seq, tp->mptcp->map_subseq))
10780 +                               break;
10781 +
10782 +                       tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
10783 +                       __skb_unlink(tmp1, &sk->sk_receive_queue);
10784 +
10785 +                       /* Impossible that we could free skb here, because his
10786 +                        * mapping is known to be valid from previous checks
10787 +                        */
10788 +                       __kfree_skb(tmp1);
10789 +               }
10790 +       }
10791 +
10792 +       return 0;
10793 +}
10794 +
10795 +/* @return: 0  everything is fine. Just continue processing
10796 + *         1  subflow is broken stop everything
10797 + *         -1 this mapping has been put in the meta-receive-queue
10798 + *         -2 this mapping has been eaten by the application
10799 + */
10800 +static int mptcp_queue_skb(struct sock *sk)
10801 +{
10802 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
10803 +       struct sock *meta_sk = mptcp_meta_sk(sk);
10804 +       struct mptcp_cb *mpcb = tp->mpcb;
10805 +       struct sk_buff *tmp, *tmp1;
10806 +       u64 rcv_nxt64 = mptcp_get_rcv_nxt_64(meta_tp);
10807 +       bool data_queued = false;
10808 +
10809 +       /* Have we not yet received the full mapping? */
10810 +       if (!tp->mptcp->mapping_present ||
10811 +           before(tp->rcv_nxt, tp->mptcp->map_subseq + tp->mptcp->map_data_len))
10812 +               return 0;
10813 +
10814 +       /* Is this an overlapping mapping? rcv_nxt >= end_data_seq
10815 +        * OR
10816 +        * This mapping is out of window
10817 +        */
10818 +       if (!before64(rcv_nxt64, tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin) ||
10819 +           !mptcp_sequence(meta_tp, tp->mptcp->map_data_seq,
10820 +                           tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin)) {
10821 +               skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
10822 +                       __skb_unlink(tmp1, &sk->sk_receive_queue);
10823 +                       tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
10824 +                       __kfree_skb(tmp1);
10825 +
10826 +                       if (!skb_queue_empty(&sk->sk_receive_queue) &&
10827 +                           !before(TCP_SKB_CB(tmp)->seq,
10828 +                                   tp->mptcp->map_subseq + tp->mptcp->map_data_len))
10829 +                               break;
10830 +               }
10831 +
10832 +               mptcp_reset_mapping(tp);
10833 +
10834 +               return -1;
10835 +       }
10836 +
10837 +       /* Record it, because we want to send our data_fin on the same path */
10838 +       if (tp->mptcp->map_data_fin) {
10839 +               mpcb->dfin_path_index = tp->mptcp->path_index;
10840 +               mpcb->dfin_combined = !!(sk->sk_shutdown & RCV_SHUTDOWN);
10841 +       }
10842 +
10843 +       /* Verify the checksum */
10844 +       if (mpcb->dss_csum && !mpcb->infinite_mapping_rcv) {
10845 +               int ret = mptcp_verif_dss_csum(sk);
10846 +
10847 +               if (ret <= 0) {
10848 +                       mptcp_reset_mapping(tp);
10849 +                       return 1;
10850 +               }
10851 +       }
10852 +
10853 +       if (before64(rcv_nxt64, tp->mptcp->map_data_seq)) {
10854 +               /* Seg's have to go to the meta-ofo-queue */
10855 +               skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
10856 +                       tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
10857 +                       mptcp_prepare_skb(tmp1, tmp, sk);
10858 +                       __skb_unlink(tmp1, &sk->sk_receive_queue);
10859 +                       /* MUST be done here, because fragstolen may be true later.
10860 +                        * Then, kfree_skb_partial will not account the memory.
10861 +                        */
10862 +                       skb_orphan(tmp1);
10863 +
10864 +                       if (!mpcb->in_time_wait) /* In time-wait, do not receive data */
10865 +                               mptcp_add_meta_ofo_queue(meta_sk, tmp1, sk);
10866 +                       else
10867 +                               __kfree_skb(tmp1);
10868 +
10869 +                       if (!skb_queue_empty(&sk->sk_receive_queue) &&
10870 +                           !before(TCP_SKB_CB(tmp)->seq,
10871 +                                   tp->mptcp->map_subseq + tp->mptcp->map_data_len))
10872 +                               break;
10873 +
10874 +               }
10875 +       } else {
10876 +               /* Ready for the meta-rcv-queue */
10877 +               skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
10878 +                       int eaten = 0;
10879 +                       int copied_early = 0;
10880 +                       bool fragstolen = false;
10881 +                       u32 old_rcv_nxt = meta_tp->rcv_nxt;
10882 +
10883 +                       tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
10884 +                       mptcp_prepare_skb(tmp1, tmp, sk);
10885 +                       __skb_unlink(tmp1, &sk->sk_receive_queue);
10886 +                       /* MUST be done here, because fragstolen may be true.
10887 +                        * Then, kfree_skb_partial will not account the memory.
10888 +                        */
10889 +                       skb_orphan(tmp1);
10890 +
10891 +                       /* This segment has already been received */
10892 +                       if (!after(TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt)) {
10893 +                               __kfree_skb(tmp1);
10894 +                               goto next;
10895 +                       }
10896 +
10897 +#ifdef CONFIG_NET_DMA
10898 +                       if (TCP_SKB_CB(tmp1)->seq == meta_tp->rcv_nxt  &&
10899 +                           meta_tp->ucopy.task == current &&
10900 +                           meta_tp->copied_seq == meta_tp->rcv_nxt &&
10901 +                           tmp1->len <= meta_tp->ucopy.len &&
10902 +                           sock_owned_by_user(meta_sk) &&
10903 +                           tcp_dma_try_early_copy(meta_sk, tmp1, 0)) {
10904 +                               copied_early = 1;
10905 +                               eaten = 1;
10906 +                       }
10907 +#endif
10908 +
10909 +                       /* Is direct copy possible ? */
10910 +                       if (TCP_SKB_CB(tmp1)->seq == meta_tp->rcv_nxt &&
10911 +                           meta_tp->ucopy.task == current &&
10912 +                           meta_tp->copied_seq == meta_tp->rcv_nxt &&
10913 +                           meta_tp->ucopy.len && sock_owned_by_user(meta_sk) &&
10914 +                           !copied_early)
10915 +                               eaten = mptcp_direct_copy(tmp1, meta_sk);
10916 +
10917 +                       if (mpcb->in_time_wait) /* In time-wait, do not receive data */
10918 +                               eaten = 1;
10919 +
10920 +                       if (!eaten)
10921 +                               eaten = tcp_queue_rcv(meta_sk, tmp1, 0, &fragstolen);
10922 +
10923 +                       meta_tp->rcv_nxt = TCP_SKB_CB(tmp1)->end_seq;
10924 +                       mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt);
10925 +
10926 +                       if (copied_early)
10927 +                               tcp_cleanup_rbuf(meta_sk, tmp1->len);
10928 +
10929 +                       if (tcp_hdr(tmp1)->fin && !mpcb->in_time_wait)
10930 +                               mptcp_fin(meta_sk);
10931 +
10932 +                       /* Check if this fills a gap in the ofo queue */
10933 +                       if (!skb_queue_empty(&meta_tp->out_of_order_queue))
10934 +                               mptcp_ofo_queue(meta_sk);
10935 +
10936 +#ifdef CONFIG_NET_DMA
10937 +                       if (copied_early)
10938 +                               __skb_queue_tail(&meta_sk->sk_async_wait_queue,
10939 +                                                tmp1);
10940 +                       else
10941 +#endif
10942 +                       if (eaten)
10943 +                               kfree_skb_partial(tmp1, fragstolen);
10944 +
10945 +                       data_queued = true;
10946 +next:
10947 +                       if (!skb_queue_empty(&sk->sk_receive_queue) &&
10948 +                           !before(TCP_SKB_CB(tmp)->seq,
10949 +                                   tp->mptcp->map_subseq + tp->mptcp->map_data_len))
10950 +                               break;
10951 +               }
10952 +       }
10953 +
10954 +       inet_csk(meta_sk)->icsk_ack.lrcvtime = tcp_time_stamp;
10955 +       tp->mptcp->last_data_seq = tp->mptcp->map_data_seq;
10956 +       mptcp_reset_mapping(tp);
10957 +
10958 +       return data_queued ? -1 : -2;
10959 +}
10960 +
10961 +void mptcp_data_ready(struct sock *sk, int bytes)
10962 +{
10963 +       struct sock *meta_sk = mptcp_meta_sk(sk);
10964 +       struct sk_buff *skb, *tmp;
10965 +       int queued = 0;
10966 +
10967 +       /* If the meta is already closed, there is no point in pushing data */
10968 +       if (meta_sk->sk_state == TCP_CLOSE && !tcp_sk(sk)->mpcb->in_time_wait) {
10969 +               skb_queue_purge(&sk->sk_receive_queue);
10970 +               tcp_sk(sk)->copied_seq = tcp_sk(sk)->rcv_nxt;
10971 +               goto exit;
10972 +       }
10973 +
10974 +restart:
10975 +       /* Iterate over all segments, detect their mapping (if we don't have
10976 +        * one yet), validate them and push everything one level higher.
10977 +        */
10978 +       skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
10979 +               int ret;
10980 +               /* Pre-validation - e.g., early fallback */
10981 +               ret = mptcp_prevalidate_skb(sk, skb);
10982 +               if (ret < 0)
10983 +                       goto restart;
10984 +               else if (ret > 0)
10985 +                       break;
10986 +
10987 +               /* Set the current mapping */
10988 +               ret = mptcp_detect_mapping(sk, skb);
10989 +               if (ret < 0)
10990 +                       goto restart;
10991 +               else if (ret > 0)
10992 +                       break;
10993 +
10994 +               /* Validation */
10995 +               if (mptcp_validate_mapping(sk, skb) < 0)
10996 +                       goto restart;
10997 +
10998 +               /* Push a level higher */
10999 +               ret = mptcp_queue_skb(sk);
11000 +               if (ret < 0) {
11001 +                       if (ret == -1)
11002 +                               queued = ret;
11003 +                       goto restart;
11004 +               } else if (ret == 0) {
11005 +                       continue;
11006 +               } else { /* ret == 1 */
11007 +                       break;
11008 +               }
11009 +       }
11010 +
11011 +exit:
11012 +       if (tcp_sk(sk)->close_it) {
11013 +               tcp_send_ack(sk);
11014 +               tcp_time_wait(sk, TCP_TIME_WAIT, 0);
11015 +       }
11016 +
11017 +       if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD))
11018 +               meta_sk->sk_data_ready(meta_sk, 0);
11019 +}
11020 +
11021 +
11022 +int mptcp_check_req(struct sk_buff *skb, struct net *net)
11023 +{
11024 +       struct tcphdr *th = tcp_hdr(skb);
11025 +       struct sock *meta_sk = NULL;
11026 +
11027 +       /* MPTCP structures not initialized */
11028 +       if (mptcp_init_failed)
11029 +               return 0;
11030 +
11031 +       if (skb->protocol == htons(ETH_P_IP))
11032 +               meta_sk = mptcp_v4_search_req(th->source, ip_hdr(skb)->saddr,
11033 +                                             ip_hdr(skb)->daddr, net);
11034 +#if IS_ENABLED(CONFIG_IPV6)
11035 +       else /* IPv6 */
11036 +               meta_sk = mptcp_v6_search_req(th->source, &ipv6_hdr(skb)->saddr,
11037 +                                             &ipv6_hdr(skb)->daddr, net);
11038 +#endif /* CONFIG_IPV6 */
11039 +
11040 +       if (!meta_sk)
11041 +               return 0;
11042 +
11043 +       TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
11044 +
11045 +       bh_lock_sock_nested(meta_sk);
11046 +       if (sock_owned_by_user(meta_sk)) {
11047 +               skb->sk = meta_sk;
11048 +               if (unlikely(sk_add_backlog(meta_sk, skb,
11049 +                                           meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
11050 +                       bh_unlock_sock(meta_sk);
11051 +                       NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
11052 +                       sock_put(meta_sk); /* Taken by mptcp_search_req */
11053 +                       kfree_skb(skb);
11054 +                       return 1;
11055 +               }
11056 +       } else if (skb->protocol == htons(ETH_P_IP)) {
11057 +               tcp_v4_do_rcv(meta_sk, skb);
11058 +#if IS_ENABLED(CONFIG_IPV6)
11059 +       } else { /* IPv6 */
11060 +               tcp_v6_do_rcv(meta_sk, skb);
11061 +#endif /* CONFIG_IPV6 */
11062 +       }
11063 +       bh_unlock_sock(meta_sk);
11064 +       sock_put(meta_sk); /* Taken by mptcp_vX_search_req */
11065 +       return 1;
11066 +}
11067 +
11068 +struct mp_join *mptcp_find_join(struct sk_buff *skb)
11069 +{
11070 +       struct tcphdr *th = tcp_hdr(skb);
11071 +       unsigned char *ptr;
11072 +       int length = (th->doff * 4) - sizeof(struct tcphdr);
11073 +
11074 +       /* Jump through the options to check whether JOIN is there */
11075 +       ptr = (unsigned char *)(th + 1);
11076 +       while (length > 0) {
11077 +               int opcode = *ptr++;
11078 +               int opsize;
11079 +
11080 +               switch (opcode) {
11081 +               case TCPOPT_EOL:
11082 +                       return NULL;
11083 +               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
11084 +                       length--;
11085 +                       continue;
11086 +               default:
11087 +                       opsize = *ptr++;
11088 +                       if (opsize < 2) /* "silly options" */
11089 +                               return NULL;
11090 +                       if (opsize > length)
11091 +                               return NULL;  /* don't parse partial options */
11092 +                       if (opcode == TCPOPT_MPTCP &&
11093 +                           ((struct mptcp_option *)(ptr - 2))->sub == MPTCP_SUB_JOIN) {
11094 +                               return (struct mp_join *)(ptr - 2);
11095 +                       }
11096 +                       ptr += opsize - 2;
11097 +                       length -= opsize;
11098 +               }
11099 +       }
11100 +       return NULL;
11101 +}
11102 +
11103 +int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw)
11104 +{
11105 +       struct mptcp_cb *mpcb;
11106 +       struct sock *meta_sk;
11107 +       u32 token;
11108 +       struct mp_join *join_opt = mptcp_find_join(skb);
11109 +       if (!join_opt)
11110 +               return 0;
11111 +
11112 +       /* MPTCP structures were not initialized, so return error */
11113 +       if (mptcp_init_failed)
11114 +               return -1;
11115 +
11116 +       token = join_opt->u.syn.token;
11117 +       meta_sk = mptcp_hash_find(dev_net(skb_dst(skb)->dev), token);
11118 +       if (!meta_sk) {
11119 +               mptcp_debug("%s:mpcb not found:%x\n", __func__, token);
11120 +               return -1;
11121 +       }
11122 +
11123 +       mpcb = tcp_sk(meta_sk)->mpcb;
11124 +       if (mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping) {
11125 +               /* We are in fallback-mode on the reception-side -
11126 +                * no new subflows!
11127 +                */
11128 +               sock_put(meta_sk); /* Taken by mptcp_hash_find */
11129 +               return -1;
11130 +       }
11131 +
11132 +       /* Coming from time-wait-sock processing in tcp_v4_rcv.
11133 +        * We have to deschedule it before continuing, because otherwise
11134 +        * mptcp_v4_do_rcv will hit again on it inside tcp_v4_hnd_req.
11135 +        */
11136 +       if (tw) {
11137 +               inet_twsk_deschedule(tw, &tcp_death_row);
11138 +               inet_twsk_put(tw);
11139 +       }
11140 +
11141 +       TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
11142 +       /* OK, this is a new syn/join, let's create a new open request and
11143 +        * send syn+ack
11144 +        */
11145 +       bh_lock_sock_nested(meta_sk);
11146 +       if (sock_owned_by_user(meta_sk)) {
11147 +               skb->sk = meta_sk;
11148 +               if (unlikely(sk_add_backlog(meta_sk, skb,
11149 +                                           meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
11150 +                       bh_unlock_sock(meta_sk);
11151 +                       NET_INC_STATS_BH(sock_net(meta_sk),
11152 +                                        LINUX_MIB_TCPBACKLOGDROP);
11153 +                       sock_put(meta_sk); /* Taken by mptcp_hash_find */
11154 +                       kfree_skb(skb);
11155 +                       return 1;
11156 +               }
11157 +       } else if (skb->protocol == htons(ETH_P_IP)) {
11158 +               tcp_v4_do_rcv(meta_sk, skb);
11159 +#if IS_ENABLED(CONFIG_IPV6)
11160 +       } else {
11161 +               tcp_v6_do_rcv(meta_sk, skb);
11162 +#endif /* CONFIG_IPV6 */
11163 +       }
11164 +       bh_unlock_sock(meta_sk);
11165 +       sock_put(meta_sk); /* Taken by mptcp_hash_find */
11166 +       return 1;
11167 +}
11168 +
11169 +int mptcp_do_join_short(struct sk_buff *skb, struct mptcp_options_received *mopt,
11170 +                       struct tcp_options_received *tmp_opt, struct net *net)
11171 +{
11172 +       struct sock *meta_sk;
11173 +       u32 token;
11174 +
11175 +       token = mopt->mptcp_rem_token;
11176 +       meta_sk = mptcp_hash_find(net, token);
11177 +       if (!meta_sk) {
11178 +               mptcp_debug("%s:mpcb not found:%x\n", __func__, token);
11179 +               return -1;
11180 +       }
11181 +
11182 +       TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
11183 +
11184 +       /* OK, this is a new syn/join, let's create a new open request and
11185 +        * send syn+ack
11186 +        */
11187 +       bh_lock_sock(meta_sk);
11188 +
11189 +       /* This check is also done in mptcp_vX_do_rcv. But, there we cannot
11190 +        * call tcp_vX_send_reset, because we hold already two socket-locks.
11191 +        * (the listener and the meta from above)
11192 +        *
11193 +        * And the send-reset will try to take yet another one (ip_send_reply).
11194 +        * Thus, we propagate the reset up to tcp_rcv_state_process.
11195 +        */
11196 +       if (tcp_sk(meta_sk)->mpcb->infinite_mapping_rcv ||
11197 +           tcp_sk(meta_sk)->mpcb->send_infinite_mapping ||
11198 +           meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table) {
11199 +               bh_unlock_sock(meta_sk);
11200 +               sock_put(meta_sk); /* Taken by mptcp_hash_find */
11201 +               return -1;
11202 +       }
11203 +
11204 +       if (sock_owned_by_user(meta_sk)) {
11205 +               skb->sk = meta_sk;
11206 +               if (unlikely(sk_add_backlog(meta_sk, skb,
11207 +                                           meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf)))
11208 +                       NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
11209 +               else
11210 +                       /* Must make sure that upper layers won't free the
11211 +                        * skb if it is added to the backlog-queue.
11212 +                        */
11213 +                       skb_get(skb);
11214 +       } else {
11215 +               /* mptcp_v4_do_rcv tries to free the skb - we prevent this, as
11216 +                * the skb will finally be freed by tcp_v4_do_rcv (where we are
11217 +                * coming from)
11218 +                */
11219 +               skb_get(skb);
11220 +               if (skb->protocol == htons(ETH_P_IP)) {
11221 +                       tcp_v4_do_rcv(meta_sk, skb);
11222 +#if IS_ENABLED(CONFIG_IPV6)
11223 +               } else { /* IPv6 */
11224 +                       tcp_v6_do_rcv(meta_sk, skb);
11225 +#endif /* CONFIG_IPV6 */
11226 +               }
11227 +       }
11228 +
11229 +       bh_unlock_sock(meta_sk);
11230 +       sock_put(meta_sk); /* Taken by mptcp_hash_find */
11231 +       return 0;
11232 +}
11233 +
11234 +/**
11235 + * Equivalent of tcp_fin() for MPTCP
11236 + * Can be called only when the FIN is validly part
11237 + * of the data seqnum space. Not before when we get holes.
11238 + */
11239 +void mptcp_fin(struct sock *meta_sk)
11240 +{
11241 +       struct sock *sk = NULL, *sk_it;
11242 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
11243 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
11244 +
11245 +       mptcp_for_each_sk(mpcb, sk_it) {
11246 +               if (tcp_sk(sk_it)->mptcp->path_index == mpcb->dfin_path_index) {
11247 +                       sk = sk_it;
11248 +                       break;
11249 +               }
11250 +       }
11251 +
11252 +       if (!sk || sk->sk_state == TCP_CLOSE)
11253 +               sk = mptcp_select_ack_sock(meta_sk, 0);
11254 +
11255 +       inet_csk_schedule_ack(sk);
11256 +
11257 +       meta_sk->sk_shutdown |= RCV_SHUTDOWN;
11258 +       sock_set_flag(meta_sk, SOCK_DONE);
11259 +
11260 +       switch (meta_sk->sk_state) {
11261 +       case TCP_SYN_RECV:
11262 +       case TCP_ESTABLISHED:
11263 +               /* Move to CLOSE_WAIT */
11264 +               tcp_set_state(meta_sk, TCP_CLOSE_WAIT);
11265 +               inet_csk(sk)->icsk_ack.pingpong = 1;
11266 +               break;
11267 +
11268 +       case TCP_CLOSE_WAIT:
11269 +       case TCP_CLOSING:
11270 +               /* Received a retransmission of the FIN, do
11271 +                * nothing.
11272 +                */
11273 +               break;
11274 +       case TCP_LAST_ACK:
11275 +               /* RFC793: Remain in the LAST-ACK state. */
11276 +               break;
11277 +
11278 +       case TCP_FIN_WAIT1:
11279 +               /* This case occurs when a simultaneous close
11280 +                * happens, we must ack the received FIN and
11281 +                * enter the CLOSING state.
11282 +                */
11283 +               tcp_send_ack(sk);
11284 +               tcp_set_state(meta_sk, TCP_CLOSING);
11285 +               break;
11286 +       case TCP_FIN_WAIT2:
11287 +               /* Received a FIN -- send ACK and enter TIME_WAIT. */
11288 +               tcp_send_ack(sk);
11289 +               tcp_time_wait(meta_sk, TCP_TIME_WAIT, 0);
11290 +               break;
11291 +       default:
11292 +               /* Only TCP_LISTEN and TCP_CLOSE are left, in these
11293 +                * cases we should never reach this piece of code.
11294 +                */
11295 +               pr_err("%s: Impossible, meta_sk->sk_state=%d\n", __func__,
11296 +                      meta_sk->sk_state);
11297 +               break;
11298 +       }
11299 +
11300 +       /* It _is_ possible, that we have something out-of-order _after_ FIN.
11301 +        * Probably, we should reset in this case. For now drop them.
11302 +        */
11303 +       mptcp_purge_ofo_queue(meta_tp);
11304 +       sk_mem_reclaim(meta_sk);
11305 +
11306 +       if (!sock_flag(meta_sk, SOCK_DEAD)) {
11307 +               meta_sk->sk_state_change(meta_sk);
11308 +
11309 +               /* Do not send POLL_HUP for half duplex close. */
11310 +               if (meta_sk->sk_shutdown == SHUTDOWN_MASK ||
11311 +                   meta_sk->sk_state == TCP_CLOSE)
11312 +                       sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_HUP);
11313 +               else
11314 +                       sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_IN);
11315 +       }
11316 +
11317 +       return;
11318 +}
11319 +
11320 +static void mptcp_xmit_retransmit_queue(struct sock *meta_sk)
11321 +{
11322 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
11323 +       struct sk_buff *skb;
11324 +
11325 +       if (!meta_tp->packets_out)
11326 +               return;
11327 +
11328 +       tcp_for_write_queue(skb, meta_sk) {
11329 +               if (skb == tcp_send_head(meta_sk))
11330 +                       break;
11331 +
11332 +               if (mptcp_retransmit_skb(meta_sk, skb))
11333 +                       return;
11334 +
11335 +               if (skb == tcp_write_queue_head(meta_sk))
11336 +                       inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS,
11337 +                                                 inet_csk(meta_sk)->icsk_rto,
11338 +                                                 TCP_RTO_MAX);
11339 +       }
11340 +}
11341 +
11342 +/* Handle the DATA_ACK */
11343 +static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb)
11344 +{
11345 +       struct sock *meta_sk = mptcp_meta_sk(sk);
11346 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk);
11347 +       struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
11348 +       u32 prior_snd_una = meta_tp->snd_una;
11349 +       int prior_packets;
11350 +       u32 nwin, data_ack, data_seq;
11351 +       u16 data_len = 0;
11352 +
11353 +       /* A valid packet came in - subflow is operational again */
11354 +       tp->pf = 0;
11355 +
11356 +       /* Even if there is no data-ack, we stop retransmitting.
11357 +        * Except if this is a SYN/ACK. Then it is just a retransmission
11358 +        */
11359 +       if (tp->mptcp->pre_established && !tcp_hdr(skb)->syn) {
11360 +               tp->mptcp->pre_established = 0;
11361 +               sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
11362 +       }
11363 +
11364 +       /* If we are in infinite mapping mode, rx_opt.data_ack has been
11365 +        * set by mptcp_clean_rtx_infinite.
11366 +        */
11367 +       if (!(tcb->mptcp_flags & MPTCPHDR_ACK) && !tp->mpcb->infinite_mapping_snd)
11368 +               goto exit;
11369 +
11370 +       data_ack = tp->mptcp->rx_opt.data_ack;
11371 +
11372 +       if (unlikely(!tp->mptcp->fully_established) &&
11373 +           (data_ack != meta_tp->mptcp->snt_isn ||
11374 +           tp->mptcp->snt_isn + 1 != TCP_SKB_CB(skb)->ack_seq))
11375 +               /* As soon as data has been data-acked,
11376 +                * or a subflow-data-ack (not acking syn - thus snt_isn + 1)
11377 +                * includes a data-ack, we are fully established
11378 +                */
11379 +               mptcp_become_fully_estab(sk);
11380 +
11381 +       /* Get the data_seq */
11382 +       if (mptcp_is_data_seq(skb)) {
11383 +               data_seq = tp->mptcp->rx_opt.data_seq;
11384 +               data_len = tp->mptcp->rx_opt.data_len;
11385 +       } else {
11386 +               data_seq = meta_tp->snd_wl1;
11387 +       }
11388 +
11389 +       /* If the ack is older than previous acks
11390 +        * then we can probably ignore it.
11391 +        */
11392 +       if (before(data_ack, prior_snd_una))
11393 +               goto exit;
11394 +
11395 +       /* If the ack includes data we haven't sent yet, discard
11396 +        * this segment (RFC793 Section 3.9).
11397 +        */
11398 +       if (after(data_ack, meta_tp->snd_nxt))
11399 +               goto exit;
11400 +
11401 +       /*** Now, update the window  - inspired by tcp_ack_update_window ***/
11402 +       nwin = ntohs(tcp_hdr(skb)->window);
11403 +
11404 +       if (likely(!tcp_hdr(skb)->syn))
11405 +               nwin <<= tp->rx_opt.snd_wscale;
11406 +
11407 +       if (tcp_may_update_window(meta_tp, data_ack, data_seq, nwin)) {
11408 +               tcp_update_wl(meta_tp, data_seq);
11409 +
11410 +               /* Draft v09, Section 3.3.5:
11411 +                * [...] It should only update its local receive window values
11412 +                * when the largest sequence number allowed (i.e.  DATA_ACK +
11413 +                * receive window) increases. [...]
11414 +                */
11415 +               if (meta_tp->snd_wnd != nwin &&
11416 +                   !before(data_ack + nwin, tcp_wnd_end(meta_tp))) {
11417 +                       meta_tp->snd_wnd = nwin;
11418 +
11419 +                       if (nwin > meta_tp->max_window)
11420 +                               meta_tp->max_window = nwin;
11421 +               }
11422 +       }
11423 +       /*** Done, update the window ***/
11424 +
11425 +       /* We passed data and got it acked, remove any soft error
11426 +        * log. Something worked...
11427 +        */
11428 +       sk->sk_err_soft = 0;
11429 +       inet_csk(meta_sk)->icsk_probes_out = 0;
11430 +       meta_tp->rcv_tstamp = tcp_time_stamp;
11431 +       prior_packets = meta_tp->packets_out;
11432 +       if (!prior_packets)
11433 +               goto no_queue;
11434 +
11435 +       meta_tp->snd_una = data_ack;
11436 +
11437 +       mptcp_clean_rtx_queue(meta_sk, prior_snd_una);
11438 +
11439 +       /* We are in loss-state, and something got acked, retransmit the whole
11440 +        * queue now!
11441 +        */
11442 +       if (inet_csk(meta_sk)->icsk_ca_state == TCP_CA_Loss &&
11443 +           after(data_ack, prior_snd_una)) {
11444 +               mptcp_xmit_retransmit_queue(meta_sk);
11445 +               inet_csk(meta_sk)->icsk_ca_state = TCP_CA_Open;
11446 +       }
11447 +
11448 +       /* Simplified version of tcp_new_space, because the snd-buffer
11449 +        * is handled by all the subflows.
11450 +        */
11451 +       if (sock_flag(meta_sk, SOCK_QUEUE_SHRUNK)) {
11452 +               sock_reset_flag(meta_sk, SOCK_QUEUE_SHRUNK);
11453 +               if (meta_sk->sk_socket &&
11454 +                   test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags))
11455 +                       meta_sk->sk_write_space(meta_sk);
11456 +       }
11457 +
11458 +       if (meta_sk->sk_state != TCP_ESTABLISHED &&
11459 +           mptcp_rcv_state_process(meta_sk, sk, skb, data_seq, data_len))
11460 +               return;
11461 +
11462 +exit:
11463 +       mptcp_push_pending_frames(meta_sk);
11464 +
11465 +       return;
11466 +
11467 +no_queue:
11468 +       if (tcp_send_head(meta_sk))
11469 +               tcp_ack_probe(meta_sk);
11470 +
11471 +       mptcp_push_pending_frames(meta_sk);
11472 +
11473 +       return;
11474 +}
11475 +
11476 +void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk)
11477 +{
11478 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(mptcp_meta_sk(sk));
11479 +
11480 +       if (!tp->mpcb->infinite_mapping_snd)
11481 +               return;
11482 +
11483 +       /* The difference between both write_seq's represents the offset between
11484 +        * data-sequence and subflow-sequence. As we are infinite, this must
11485 +        * match.
11486 +        *
11487 +        * Thus, from this difference we can infer the meta snd_una.
11488 +        */
11489 +       tp->mptcp->rx_opt.data_ack = meta_tp->snd_nxt - tp->snd_nxt +
11490 +                                    tp->snd_una;
11491 +
11492 +       mptcp_data_ack(sk, skb);
11493 +}
11494 +
11495 +/**** static functions used by mptcp_parse_options */
11496 +
11497 +static inline int mptcp_rem_raddress(struct mptcp_cb *mpcb, u8 rem_id)
11498 +{
11499 +       if (mptcp_v4_rem_raddress(mpcb, rem_id) < 0) {
11500 +#if IS_ENABLED(CONFIG_IPV6)
11501 +               if (mptcp_v6_rem_raddress(mpcb, rem_id) < 0)
11502 +                       return -1;
11503 +#else
11504 +               return -1;
11505 +#endif /* CONFIG_IPV6 */
11506 +       }
11507 +       return 0;
11508 +}
11509 +
11510 +static void mptcp_send_reset_rem_id(const struct mptcp_cb *mpcb, u8 rem_id)
11511 +{
11512 +       struct sock *sk_it, *tmpsk;
11513 +
11514 +       mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
11515 +               if (tcp_sk(sk_it)->mptcp->rem_id == rem_id) {
11516 +                       mptcp_reinject_data(sk_it, 0);
11517 +                       sk_it->sk_err = ECONNRESET;
11518 +                       if (tcp_need_reset(sk_it->sk_state))
11519 +                               tcp_send_active_reset(sk_it, GFP_ATOMIC);
11520 +                       mptcp_sub_force_close(sk_it);
11521 +               }
11522 +       }
11523 +}
11524 +
11525 +void mptcp_parse_options(const uint8_t *ptr, int opsize,
11526 +                        struct tcp_options_received *opt_rx,
11527 +                        struct mptcp_options_received *mopt,
11528 +                        const struct sk_buff *skb)
11529 +{
11530 +       struct mptcp_option *mp_opt = (struct mptcp_option *)ptr;
11531 +
11532 +       /* If the socket is mp-capable we would have a mopt. */
11533 +       if (!mopt)
11534 +               return;
11535 +
11536 +       switch (mp_opt->sub) {
11537 +       case MPTCP_SUB_CAPABLE:
11538 +       {
11539 +               struct mp_capable *mpcapable = (struct mp_capable *)ptr;
11540 +
11541 +               if (opsize != MPTCP_SUB_LEN_CAPABLE_SYN &&
11542 +                   opsize != MPTCP_SUB_LEN_CAPABLE_ACK) {
11543 +                       mptcp_debug("%s: mp_capable: bad option size %d\n",
11544 +                                   __func__, opsize);
11545 +                       break;
11546 +               }
11547 +
11548 +               if (!sysctl_mptcp_enabled)
11549 +                       break;
11550 +
11551 +               /* We only support MPTCP version 0 */
11552 +               if (mpcapable->ver != 0)
11553 +                       break;
11554 +
11555 +               /* MPTCP-RFC 6824:
11556 +                * "If receiving a message with the 'B' flag set to 1, and this
11557 +                * is not understood, then this SYN MUST be silently ignored;
11558 +                */
11559 +               if (mpcapable->b) {
11560 +                       mopt->drop_me = 1;
11561 +                       break;
11562 +               }
11563 +
11564 +               /* MPTCP-RFC 6824:
11565 +                * "An implementation that only supports this method MUST set
11566 +                *  bit "H" to 1, and bits "C" through "G" to 0."
11567 +                */
11568 +               if (!mpcapable->h)
11569 +                       break;
11570 +
11571 +               mopt->saw_mpc = 1;
11572 +               mopt->dss_csum = sysctl_mptcp_checksum || mpcapable->a;
11573 +
11574 +               if (opsize >= MPTCP_SUB_LEN_CAPABLE_SYN)
11575 +                       mopt->mptcp_key = mpcapable->sender_key;
11576 +
11577 +               break;
11578 +       }
11579 +       case MPTCP_SUB_JOIN:
11580 +       {
11581 +               struct mp_join *mpjoin = (struct mp_join *)ptr;
11582 +
11583 +               if (opsize != MPTCP_SUB_LEN_JOIN_SYN &&
11584 +                   opsize != MPTCP_SUB_LEN_JOIN_SYNACK &&
11585 +                   opsize != MPTCP_SUB_LEN_JOIN_ACK) {
11586 +                       mptcp_debug("%s: mp_join: bad option size %d\n",
11587 +                                   __func__, opsize);
11588 +                       break;
11589 +               }
11590 +
11591 +               /* saw_mpc must be set, because in tcp_check_req we assume that
11592 +                * it is set to support falling back to reg. TCP if a rexmitted
11593 +                * SYN has no MP_CAPABLE or MP_JOIN
11594 +                */
11595 +               switch (opsize) {
11596 +               case MPTCP_SUB_LEN_JOIN_SYN:
11597 +                       mopt->is_mp_join = 1;
11598 +                       mopt->saw_mpc = 1;
11599 +                       mopt->low_prio = mpjoin->b;
11600 +                       mopt->rem_id = mpjoin->addr_id;
11601 +                       mopt->mptcp_rem_token = mpjoin->u.syn.token;
11602 +                       mopt->mptcp_recv_nonce = mpjoin->u.syn.nonce;
11603 +                       break;
11604 +               case MPTCP_SUB_LEN_JOIN_SYNACK:
11605 +                       mopt->saw_mpc = 1;
11606 +                       mopt->low_prio = mpjoin->b;
11607 +                       mopt->rem_id = mpjoin->addr_id;
11608 +                       mopt->mptcp_recv_tmac = mpjoin->u.synack.mac;
11609 +                       mopt->mptcp_recv_nonce = mpjoin->u.synack.nonce;
11610 +                       break;
11611 +               case MPTCP_SUB_LEN_JOIN_ACK:
11612 +                       mopt->saw_mpc = 1;
11613 +                       mopt->join_ack = 1;
11614 +                       memcpy(mopt->mptcp_recv_mac, mpjoin->u.ack.mac, 20);
11615 +                       break;
11616 +               }
11617 +               break;
11618 +       }
11619 +       case MPTCP_SUB_DSS:
11620 +       {
11621 +               struct mp_dss *mdss = (struct mp_dss *)ptr;
11622 +               struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
11623 +
11624 +               /* We check opsize for the csum and non-csum case. We do this,
11625 +                * because the draft says that the csum SHOULD be ignored if
11626 +                * it has not been negotiated in the MP_CAPABLE but still is
11627 +                * present in the data.
11628 +                *
11629 +                * It will get ignored later in mptcp_queue_skb.
11630 +                */
11631 +               if (opsize != mptcp_sub_len_dss(mdss, 0) &&
11632 +                   opsize != mptcp_sub_len_dss(mdss, 1)) {
11633 +                       mptcp_debug("%s: mp_dss: bad option size %d\n",
11634 +                                   __func__, opsize);
11635 +                       break;
11636 +               }
11637 +
11638 +               ptr += 4;
11639 +
11640 +               if (mdss->A) {
11641 +                       tcb->mptcp_flags |= MPTCPHDR_ACK;
11642 +
11643 +                       if (mdss->a) {
11644 +                               mopt->data_ack = (u32) get_unaligned_be64(ptr);
11645 +                               ptr += MPTCP_SUB_LEN_ACK_64;
11646 +                       } else {
11647 +                               mopt->data_ack = get_unaligned_be32(ptr);
11648 +                               ptr += MPTCP_SUB_LEN_ACK;
11649 +                       }
11650 +               }
11651 +
11652 +               tcb->dss_off = (ptr - skb_transport_header(skb));
11653 +
11654 +               if (mdss->M) {
11655 +                       if (mdss->m) {
11656 +                               u64 data_seq64 = get_unaligned_be64(ptr);
11657 +
11658 +                               tcb->mptcp_flags |= MPTCPHDR_SEQ64_SET;
11659 +                               mopt->data_seq = (u32) data_seq64;
11660 +
11661 +                               ptr += 12; /* 64-bit dseq + subseq */
11662 +                       } else {
11663 +                               mopt->data_seq = get_unaligned_be32(ptr);
11664 +                               ptr += 8; /* 32-bit dseq + subseq */
11665 +                       }
11666 +                       mopt->data_len = get_unaligned_be16(ptr);
11667 +
11668 +                       tcb->mptcp_flags |= MPTCPHDR_SEQ;
11669 +
11670 +                       /* Is a check-sum present? */
11671 +                       if (opsize == mptcp_sub_len_dss(mdss, 1))
11672 +                               tcb->mptcp_flags |= MPTCPHDR_DSS_CSUM;
11673 +
11674 +                       /* DATA_FIN only possible with DSS-mapping */
11675 +                       if (mdss->F)
11676 +                               tcb->mptcp_flags |= MPTCPHDR_FIN;
11677 +               }
11678 +
11679 +               break;
11680 +       }
11681 +       case MPTCP_SUB_ADD_ADDR:
11682 +       {
11683 +#if IS_ENABLED(CONFIG_IPV6)
11684 +               struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
11685 +
11686 +               if ((mpadd->ipver == 4 && opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
11687 +                    opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) ||
11688 +                   (mpadd->ipver == 6 && opsize != MPTCP_SUB_LEN_ADD_ADDR6 &&
11689 +                    opsize != MPTCP_SUB_LEN_ADD_ADDR6 + 2)) {
11690 +#else
11691 +               if (opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
11692 +                   opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) {
11693 +#endif /* CONFIG_IPV6 */
11694 +                       mptcp_debug("%s: mp_add_addr: bad option size %d\n",
11695 +                                   __func__, opsize);
11696 +                       break;
11697 +               }
11698 +
11699 +               /* We have to manually parse the options if we got two of them. */
11700 +               if (mopt->saw_add_addr) {
11701 +                       mopt->more_add_addr = 1;
11702 +                       break;
11703 +               }
11704 +               mopt->saw_add_addr = 1;
11705 +               mopt->add_addr_ptr = ptr;
11706 +               break;
11707 +       }
11708 +       case MPTCP_SUB_REMOVE_ADDR:
11709 +               if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0) {
11710 +                       mptcp_debug("%s: mp_remove_addr: bad option size %d\n",
11711 +                                   __func__, opsize);
11712 +                       break;
11713 +               }
11714 +
11715 +               if (mopt->saw_rem_addr) {
11716 +                       mopt->more_rem_addr = 1;
11717 +                       break;
11718 +               }
11719 +               mopt->saw_rem_addr = 1;
11720 +               mopt->rem_addr_ptr = ptr;
11721 +               break;
11722 +       case MPTCP_SUB_PRIO:
11723 +       {
11724 +               struct mp_prio *mpprio = (struct mp_prio *)ptr;
11725 +
11726 +               if (opsize != MPTCP_SUB_LEN_PRIO &&
11727 +                   opsize != MPTCP_SUB_LEN_PRIO_ADDR) {
11728 +                       mptcp_debug("%s: mp_prio: bad option size %d\n",
11729 +                                   __func__, opsize);
11730 +                       break;
11731 +               }
11732 +
11733 +               mopt->saw_low_prio = 1;
11734 +               mopt->low_prio = mpprio->b;
11735 +
11736 +               if (opsize == MPTCP_SUB_LEN_PRIO_ADDR) {
11737 +                       mopt->saw_low_prio = 2;
11738 +                       mopt->prio_addr_id = mpprio->addr_id;
11739 +               }
11740 +               break;
11741 +       }
11742 +       case MPTCP_SUB_FAIL:
11743 +               if (opsize != MPTCP_SUB_LEN_FAIL) {
11744 +                       mptcp_debug("%s: mp_fail: bad option size %d\n",
11745 +                                   __func__, opsize);
11746 +                       break;
11747 +               }
11748 +               mopt->mp_fail = 1;
11749 +               break;
11750 +       case MPTCP_SUB_FCLOSE:
11751 +               if (opsize != MPTCP_SUB_LEN_FCLOSE) {
11752 +                       mptcp_debug("%s: mp_fclose: bad option size %d\n",
11753 +                                   __func__, opsize);
11754 +                       break;
11755 +               }
11756 +
11757 +               mopt->mp_fclose = 1;
11758 +               mopt->mptcp_key = ((struct mp_fclose *)ptr)->key;
11759 +
11760 +               break;
11761 +       default:
11762 +               mptcp_debug("%s: Received unkown subtype: %d\n",
11763 +                           __func__, mp_opt->sub);
11764 +               break;
11765 +       }
11766 +}
11767 +
11768 +int mptcp_check_rtt(const struct tcp_sock *tp, int time)
11769 +{
11770 +       struct mptcp_cb *mpcb = tp->mpcb;
11771 +       struct sock *sk;
11772 +       u32 rtt_max = 0;
11773 +
11774 +       /* In MPTCP, we take the max delay across all flows,
11775 +        * in order to take into account meta-reordering buffers.
11776 +        */
11777 +       mptcp_for_each_sk(mpcb, sk) {
11778 +               if (!mptcp_sk_can_recv(sk))
11779 +                       continue;
11780 +
11781 +               if (rtt_max < tcp_sk(sk)->rcv_rtt_est.rtt)
11782 +                       rtt_max = tcp_sk(sk)->rcv_rtt_est.rtt;
11783 +       }
11784 +       if (time < (rtt_max >> 3) || !rtt_max)
11785 +               return 1;
11786 +
11787 +       return 0;
11788 +}
11789 +
11790 +static void mptcp_handle_add_addr(const unsigned char *ptr, struct sock *sk)
11791 +{
11792 +       struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
11793 +
11794 +       if (mpadd->ipver == 4) {
11795 +               __be16 port = 0;
11796 +               if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4 + 2)
11797 +                       port  = mpadd->u.v4.port;
11798 +
11799 +               mptcp_v4_add_raddress(tcp_sk(sk)->mpcb, &mpadd->u.v4.addr, port,
11800 +                                     mpadd->addr_id);
11801 +#if IS_ENABLED(CONFIG_IPV6)
11802 +       } else if (mpadd->ipver == 6) {
11803 +               __be16 port = 0;
11804 +               if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6 + 2)
11805 +                       port  = mpadd->u.v6.port;
11806 +
11807 +               mptcp_v6_add_raddress(tcp_sk(sk)->mpcb, &mpadd->u.v6.addr, port,
11808 +                                     mpadd->addr_id);
11809 +#endif /* CONFIG_IPV6 */
11810 +       }
11811 +}
11812 +
11813 +static void mptcp_handle_rem_addr(const unsigned char *ptr, struct sock *sk)
11814 +{
11815 +       struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr;
11816 +       int i;
11817 +       u8 rem_id;
11818 +
11819 +       for (i = 0; i <= mprem->len - MPTCP_SUB_LEN_REMOVE_ADDR; i++) {
11820 +               rem_id = (&mprem->addrs_id)[i];
11821 +               if (!mptcp_rem_raddress(tcp_sk(sk)->mpcb, rem_id))
11822 +                       mptcp_send_reset_rem_id(tcp_sk(sk)->mpcb, rem_id);
11823 +       }
11824 +}
11825 +
11826 +static void mptcp_parse_addropt(const struct sk_buff *skb, struct sock *sk)
11827 +{
11828 +       struct tcphdr *th = tcp_hdr(skb);
11829 +       unsigned char *ptr;
11830 +       int length = (th->doff * 4) - sizeof(struct tcphdr);
11831 +
11832 +       /* Jump through the options to check whether ADD_ADDR is there */
11833 +       ptr = (unsigned char *)(th + 1);
11834 +       while (length > 0) {
11835 +               int opcode = *ptr++;
11836 +               int opsize;
11837 +
11838 +               switch (opcode) {
11839 +               case TCPOPT_EOL:
11840 +                       return;
11841 +               case TCPOPT_NOP:
11842 +                       length--;
11843 +                       continue;
11844 +               default:
11845 +                       opsize = *ptr++;
11846 +                       if (opsize < 2)
11847 +                               return;
11848 +                       if (opsize > length)
11849 +                               return;  /* don't parse partial options */
11850 +                       if (opcode == TCPOPT_MPTCP &&
11851 +                           ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_ADD_ADDR) {
11852 +#if IS_ENABLED(CONFIG_IPV6)
11853 +                               struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
11854 +                               if ((mpadd->ipver == 4 && opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
11855 +                                    opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) ||
11856 +                                   (mpadd->ipver == 6 && opsize != MPTCP_SUB_LEN_ADD_ADDR6 &&
11857 +                                    opsize != MPTCP_SUB_LEN_ADD_ADDR6 + 2))
11858 +#else
11859 +                               if (opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
11860 +                                   opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2)
11861 +#endif /* CONFIG_IPV6 */
11862 +                                       goto cont;
11863 +
11864 +                               mptcp_handle_add_addr(ptr, sk);
11865 +                       }
11866 +                       if (opcode == TCPOPT_MPTCP &&
11867 +                           ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_REMOVE_ADDR) {
11868 +                               if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0)
11869 +                                       goto cont;
11870 +
11871 +                               mptcp_handle_rem_addr(ptr, sk);
11872 +                       }
11873 +cont:
11874 +                       ptr += opsize - 2;
11875 +                       length -= opsize;
11876 +               }
11877 +       }
11878 +       return;
11879 +}
11880 +
11881 +static inline int mptcp_mp_fail_rcvd(struct sock *sk, const struct tcphdr *th)
11882 +{
11883 +       struct mptcp_tcp_sock *mptcp = tcp_sk(sk)->mptcp;
11884 +       struct sock *meta_sk = mptcp_meta_sk(sk);
11885 +       struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
11886 +
11887 +       if (unlikely(mptcp->rx_opt.mp_fail)) {
11888 +               mptcp->rx_opt.mp_fail = 0;
11889 +
11890 +               if (!th->rst && !mpcb->infinite_mapping_snd) {
11891 +                       struct sock *sk_it;
11892 +
11893 +                       mpcb->send_infinite_mapping = 1;
11894 +                       /* We resend everything that has not been acknowledged */
11895 +                       meta_sk->sk_send_head = tcp_write_queue_head(meta_sk);
11896 +
11897 +                       /* We artificially restart the whole send-queue. Thus,
11898 +                        * it is as if no packets are in flight
11899 +                        */
11900 +                       tcp_sk(meta_sk)->packets_out = 0;
11901 +
11902 +                       /* If the snd_nxt already wrapped around, we have to
11903 +                        * undo the wrapping, as we are restarting from snd_una
11904 +                        * on.
11905 +                        */
11906 +                       if (tcp_sk(meta_sk)->snd_nxt < tcp_sk(meta_sk)->snd_una) {
11907 +                               mpcb->snd_high_order[mpcb->snd_hiseq_index] -= 2;
11908 +                               mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1;
11909 +                       }
11910 +                       tcp_sk(meta_sk)->snd_nxt = tcp_sk(meta_sk)->snd_una;
11911 +
11912 +                       /* Trigger a sending on the meta. */
11913 +                       mptcp_push_pending_frames(meta_sk);
11914 +
11915 +                       mptcp_for_each_sk(mpcb, sk_it) {
11916 +                               if (sk != sk_it)
11917 +                                       mptcp_sub_force_close(sk_it);
11918 +                       }
11919 +               }
11920 +
11921 +               return 0;
11922 +       }
11923 +
11924 +       if (unlikely(mptcp->rx_opt.mp_fclose)) {
11925 +               struct sock *sk_it, *tmpsk;
11926 +
11927 +               mptcp->rx_opt.mp_fclose = 0;
11928 +               if (mptcp->rx_opt.mptcp_key != mpcb->mptcp_loc_key)
11929 +                       return 0;
11930 +
11931 +               if (tcp_need_reset(sk->sk_state))
11932 +                       tcp_send_active_reset(sk, GFP_ATOMIC);
11933 +
11934 +               mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk)
11935 +                       mptcp_sub_force_close(sk_it);
11936 +
11937 +               tcp_reset(meta_sk);
11938 +
11939 +               return 1;
11940 +       }
11941 +
11942 +       return 0;
11943 +}
11944 +
11945 +static inline void mptcp_path_array_check(struct sock *meta_sk)
11946 +{
11947 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
11948 +
11949 +       if (unlikely(mpcb->list_rcvd)) {
11950 +               mpcb->list_rcvd = 0;
11951 +               if (mpcb->pm_ops->new_remote_address)
11952 +                       mpcb->pm_ops->new_remote_address(meta_sk);
11953 +       }
11954 +}
11955 +
11956 +int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, struct sk_buff *skb)
11957 +{
11958 +       struct tcp_sock *tp = tcp_sk(sk);
11959 +       struct mptcp_options_received *mopt = &tp->mptcp->rx_opt;
11960 +
11961 +       if (tp->mpcb->infinite_mapping_rcv || tp->mpcb->infinite_mapping_snd)
11962 +               return 0;
11963 +
11964 +       if (mptcp_mp_fail_rcvd(sk, th))
11965 +               return 1;
11966 +
11967 +       /* RFC 6824, Section 3.3:
11968 +        * If a checksum is not present when its use has been negotiated, the
11969 +        * receiver MUST close the subflow with a RST as it is considered broken.
11970 +        */
11971 +       if (mptcp_is_data_seq(skb) && tp->mpcb->dss_csum &&
11972 +           !(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_DSS_CSUM)) {
11973 +               if (tcp_need_reset(sk->sk_state))
11974 +                       tcp_send_active_reset(sk, GFP_ATOMIC);
11975 +
11976 +               mptcp_sub_force_close(sk);
11977 +               return 1;
11978 +       }
11979 +
11980 +       /* We have to acknowledge retransmissions of the third
11981 +        * ack.
11982 +        */
11983 +       if (mopt->join_ack) {
11984 +               tcp_send_delayed_ack(sk);
11985 +               mopt->join_ack = 0;
11986 +       }
11987 +
11988 +       if (mopt->saw_add_addr || mopt->saw_rem_addr) {
11989 +               if (mopt->more_add_addr || mopt->more_rem_addr) {
11990 +                       mptcp_parse_addropt(skb, sk);
11991 +               } else {
11992 +                       if (mopt->saw_add_addr)
11993 +                               mptcp_handle_add_addr(mopt->add_addr_ptr, sk);
11994 +                       if (mopt->saw_rem_addr)
11995 +                               mptcp_handle_rem_addr(mopt->rem_addr_ptr, sk);
11996 +               }
11997 +
11998 +               mopt->more_add_addr = 0;
11999 +               mopt->saw_add_addr = 0;
12000 +               mopt->more_rem_addr = 0;
12001 +               mopt->saw_rem_addr = 0;
12002 +       }
12003 +       if (mopt->saw_low_prio) {
12004 +               if (mopt->saw_low_prio == 1) {
12005 +                       tp->mptcp->rcv_low_prio = mopt->low_prio;
12006 +               } else {
12007 +                       struct sock *sk_it;
12008 +                       mptcp_for_each_sk(tp->mpcb, sk_it) {
12009 +                               struct mptcp_tcp_sock *mptcp = tcp_sk(sk_it)->mptcp;
12010 +                               if (mptcp->rem_id == mopt->prio_addr_id)
12011 +                                       mptcp->rcv_low_prio = mopt->low_prio;
12012 +                       }
12013 +               }
12014 +               mopt->saw_low_prio = 0;
12015 +       }
12016 +
12017 +       mptcp_data_ack(sk, skb);
12018 +
12019 +       mptcp_path_array_check(mptcp_meta_sk(sk));
12020 +       /* Socket may have been mp_killed by a REMOVE_ADDR */
12021 +       if (tp->mp_killed)
12022 +               return 1;
12023 +
12024 +       return 0;
12025 +}
12026 +
12027 +/* The skptr is needed, because if we become MPTCP-capable, we have to switch
12028 + * from meta-socket to master-socket.
12029 + *
12030 + * @return: 1 - we want to reset this connection
12031 + *         2 - we want to discard the received syn/ack
12032 + *         0 - everything is fine - continue
12033 + */
12034 +int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr,
12035 +                                   struct sk_buff *skb,
12036 +                                   struct mptcp_options_received *mopt)
12037 +{
12038 +       struct tcp_sock *tp = tcp_sk(sk);
12039 +
12040 +       if (tp->mpc) {
12041 +               u8 hash_mac_check[20];
12042 +               struct mptcp_cb *mpcb = tp->mpcb;
12043 +
12044 +               mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key,
12045 +                               (u8 *)&mpcb->mptcp_loc_key,
12046 +                               (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce,
12047 +                               (u8 *)&tp->mptcp->mptcp_loc_nonce,
12048 +                               (u32 *)hash_mac_check);
12049 +               if (memcmp(hash_mac_check,
12050 +                          (char *)&tp->mptcp->rx_opt.mptcp_recv_tmac, 8)) {
12051 +                       mptcp_sub_force_close(sk);
12052 +                       return 1;
12053 +               }
12054 +
12055 +               /* Set this flag in order to postpone data sending
12056 +                * until the 4th ack arrives.
12057 +                */
12058 +               tp->mptcp->pre_established = 1;
12059 +               tp->mptcp->rcv_low_prio = tp->mptcp->rx_opt.low_prio;
12060 +
12061 +               mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key,
12062 +                               (u8 *)&mpcb->mptcp_rem_key,
12063 +                               (u8 *)&tp->mptcp->mptcp_loc_nonce,
12064 +                               (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce,
12065 +                               (u32 *)&tp->mptcp->sender_mac[0]);
12066 +
12067 +       } else if (mopt->saw_mpc) {
12068 +               if (mptcp_create_master_sk(sk, mopt->mptcp_key,
12069 +                                          ntohs(tcp_hdr(skb)->window)))
12070 +                       return 2;
12071 +
12072 +               sk = tcp_sk(sk)->mpcb->master_sk;
12073 +               *skptr = sk;
12074 +               tp = tcp_sk(sk);
12075 +
12076 +               /* snd_nxt - 1, because it has been incremented
12077 +                * by tcp_connect for the SYN
12078 +                */
12079 +               tp->mptcp->snt_isn = tp->snd_nxt - 1;
12080 +               tp->mpcb->dss_csum = mopt->dss_csum;
12081 +               tp->mptcp->include_mpc = 1;
12082 +
12083 +               sk_set_socket(sk, mptcp_meta_sk(sk)->sk_socket);
12084 +               sk->sk_wq = mptcp_meta_sk(sk)->sk_wq;
12085 +
12086 +               mptcp_update_metasocket(sk, mptcp_meta_sk(sk));
12087 +
12088 +                /* hold in mptcp_inherit_sk due to initialization to 2 */
12089 +               sock_put(sk);
12090 +       } else {
12091 +               tp->request_mptcp = 0;
12092 +
12093 +               if (tp->inside_tk_table)
12094 +                       mptcp_hash_remove(tp);
12095 +       }
12096 +
12097 +       if (tp->mpc)
12098 +               tp->mptcp->rcv_isn = TCP_SKB_CB(skb)->seq;
12099 +
12100 +       return 0;
12101 +}
12102 +
12103 +bool mptcp_should_expand_sndbuf(const struct sock *sk)
12104 +{
12105 +       struct sock *sk_it;
12106 +       struct sock *meta_sk = mptcp_meta_sk(sk);
12107 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
12108 +       int cnt_backups = 0;
12109 +       int backup_available = 0;
12110 +
12111 +       /* We circumvent this check in tcp_check_space, because we want to
12112 +        * always call sk_write_space. So, we reproduce the check here.
12113 +        */
12114 +       if (!meta_sk->sk_socket ||
12115 +           !test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags))
12116 +               return false;
12117 +
12118 +       /* If the user specified a specific send buffer setting, do
12119 +        * not modify it.
12120 +        */
12121 +       if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK)
12122 +               return false;
12123 +
12124 +       /* If we are under global TCP memory pressure, do not expand.  */
12125 +       if (sk_under_memory_pressure(meta_sk))
12126 +               return false;
12127 +
12128 +       /* If we are under soft global TCP memory pressure, do not expand.  */
12129 +       if (sk_memory_allocated(meta_sk) >= sk_prot_mem_limits(meta_sk, 0))
12130 +               return false;
12131 +
12132 +
12133 +       /* For MPTCP we look for a subsocket that could send data.
12134 +        * If we found one, then we update the send-buffer.
12135 +        */
12136 +       mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
12137 +               struct tcp_sock *tp_it = tcp_sk(sk_it);
12138 +
12139 +               if (!mptcp_sk_can_send(sk_it))
12140 +                       continue;
12141 +
12142 +               /* Backup-flows have to be counted - if there is no other
12143 +                * subflow we take the backup-flow into account. */
12144 +               if (tp_it->mptcp->rcv_low_prio || tp_it->mptcp->low_prio) {
12145 +                       cnt_backups++;
12146 +               }
12147 +
12148 +               if (tp_it->packets_out < tp_it->snd_cwnd) {
12149 +                       if (tp_it->mptcp->rcv_low_prio || tp_it->mptcp->low_prio) {
12150 +                               backup_available = 1;
12151 +                               continue;
12152 +                       }
12153 +                       return true;
12154 +               }
12155 +       }
12156 +
12157 +       /* Backup-flow is available for sending - update send-buffer */
12158 +       if (meta_tp->mpcb->cnt_established == cnt_backups && backup_available)
12159 +               return true;
12160 +       return false;
12161 +}
12162 +
12163 +void mptcp_init_buffer_space(struct sock *sk)
12164 +{
12165 +       struct tcp_sock *tp = tcp_sk(sk);
12166 +       struct sock *meta_sk = mptcp_meta_sk(sk);
12167 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
12168 +       int space;
12169 +
12170 +       tcp_init_buffer_space(sk);
12171 +
12172 +       if (is_master_tp(tp)) {
12173 +               /* If there is only one subflow, we just use regular TCP
12174 +                * autotuning. User-locks are handled already by
12175 +                * tcp_init_buffer_space
12176 +                */
12177 +               meta_tp->window_clamp = tp->window_clamp;
12178 +               meta_tp->rcv_ssthresh = tp->rcv_ssthresh;
12179 +               meta_sk->sk_rcvbuf = sk->sk_rcvbuf;
12180 +               meta_sk->sk_sndbuf = sk->sk_sndbuf;
12181 +
12182 +               return;
12183 +       }
12184 +
12185 +       if (meta_sk->sk_userlocks & SOCK_RCVBUF_LOCK)
12186 +               goto snd_buf;
12187 +
12188 +       /* Adding a new subflow to the rcv-buffer space. We make a simple
12189 +        * addition, to give some space to allow traffic on the new subflow.
12190 +        * Autotuning will increase it further later on.
12191 +        */
12192 +       space = min(meta_sk->sk_rcvbuf + sk->sk_rcvbuf, sysctl_tcp_rmem[2]);
12193 +       if (space > meta_sk->sk_rcvbuf) {
12194 +               meta_tp->window_clamp += tp->window_clamp;
12195 +               meta_tp->rcv_ssthresh += tp->rcv_ssthresh;
12196 +               meta_sk->sk_rcvbuf = space;
12197 +       }
12198 +
12199 +snd_buf:
12200 +       if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK)
12201 +               return;
12202 +
12203 +       /* Adding a new subflow to the send-buffer space. We make a simple
12204 +        * addition, to give some space to allow traffic on the new subflow.
12205 +        * Autotuning will increase it further later on.
12206 +        */
12207 +       space = min(meta_sk->sk_sndbuf + sk->sk_sndbuf, sysctl_tcp_wmem[2]);
12208 +       if (space > meta_sk->sk_sndbuf) {
12209 +               meta_sk->sk_sndbuf = space;
12210 +               meta_sk->sk_write_space(meta_sk);
12211 +       }
12212 +}
12213 +
12214 +void mptcp_tcp_set_rto(struct sock *sk)
12215 +{
12216 +       tcp_set_rto(sk);
12217 +       mptcp_set_rto(sk);
12218 +}
12219 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ipv4.c linux-3.14.45/net/mptcp/mptcp_ipv4.c
12220 --- linux-3.14.45.orig/net/mptcp/mptcp_ipv4.c   1970-01-01 01:00:00.000000000 +0100
12221 +++ linux-3.14.45/net/mptcp/mptcp_ipv4.c        2015-06-24 14:15:48.895862487 +0200
12222 @@ -0,0 +1,603 @@
12223 +/*
12224 + *     MPTCP implementation - IPv4-specific functions
12225 + *
12226 + *     Initial Design & Implementation:
12227 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
12228 + *
12229 + *     Current Maintainer:
12230 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
12231 + *
12232 + *     Additional authors:
12233 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
12234 + *     Gregory Detal <gregory.detal@uclouvain.be>
12235 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
12236 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
12237 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
12238 + *     Andreas Ripke <ripke@neclab.eu>
12239 + *     Vlad Dogaru <vlad.dogaru@intel.com>
12240 + *     Octavian Purdila <octavian.purdila@intel.com>
12241 + *     John Ronan <jronan@tssg.org>
12242 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
12243 + *     Brandon Heller <brandonh@stanford.edu>
12244 + *
12245 + *
12246 + *     This program is free software; you can redistribute it and/or
12247 + *      modify it under the terms of the GNU General Public License
12248 + *      as published by the Free Software Foundation; either version
12249 + *      2 of the License, or (at your option) any later version.
12250 + */
12251 +
12252 +#include <linux/export.h>
12253 +#include <linux/ip.h>
12254 +#include <linux/list.h>
12255 +#include <linux/skbuff.h>
12256 +#include <linux/spinlock.h>
12257 +#include <linux/tcp.h>
12258 +
12259 +#include <net/inet_common.h>
12260 +#include <net/inet_connection_sock.h>
12261 +#include <net/mptcp.h>
12262 +#include <net/mptcp_v4.h>
12263 +#include <net/request_sock.h>
12264 +#include <net/tcp.h>
12265 +
12266 +u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
12267 +                      u32 seq)
12268 +{
12269 +       u32 hash[MD5_DIGEST_WORDS];
12270 +
12271 +       hash[0] = (__force u32)saddr;
12272 +       hash[1] = (__force u32)daddr;
12273 +       hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
12274 +       hash[3] = seq;
12275 +
12276 +       md5_transform(hash, mptcp_secret);
12277 +
12278 +       return hash[0];
12279 +}
12280 +
12281 +u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport)
12282 +{
12283 +       u32 hash[MD5_DIGEST_WORDS];
12284 +
12285 +       hash[0] = (__force u32)saddr;
12286 +       hash[1] = (__force u32)daddr;
12287 +       hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
12288 +       hash[3] = mptcp_key_seed++;
12289 +
12290 +       md5_transform(hash, mptcp_secret);
12291 +
12292 +       return *((u64 *)hash);
12293 +}
12294 +
12295 +
12296 +static void mptcp_v4_reqsk_destructor(struct request_sock *req)
12297 +{
12298 +       mptcp_reqsk_destructor(req);
12299 +
12300 +       tcp_v4_reqsk_destructor(req);
12301 +}
12302 +
12303 +/* Similar to tcp_request_sock_ops */
12304 +struct request_sock_ops mptcp_request_sock_ops __read_mostly = {
12305 +       .family         =       PF_INET,
12306 +       .obj_size       =       sizeof(struct mptcp_request_sock),
12307 +       .rtx_syn_ack    =       tcp_v4_rtx_synack,
12308 +       .send_ack       =       tcp_v4_reqsk_send_ack,
12309 +       .destructor     =       mptcp_v4_reqsk_destructor,
12310 +       .send_reset     =       tcp_v4_send_reset,
12311 +       .syn_ack_timeout =      tcp_syn_ack_timeout,
12312 +};
12313 +
12314 +static void mptcp_v4_reqsk_queue_hash_add(struct sock *meta_sk,
12315 +                                         struct request_sock *req,
12316 +                                         unsigned long timeout)
12317 +{
12318 +       const u32 h1 = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
12319 +                                    inet_rsk(req)->ir_rmt_port,
12320 +                                    0, MPTCP_HASH_SIZE);
12321 +       /* We cannot call inet_csk_reqsk_queue_hash_add(), because we do not
12322 +        * want to reset the keepalive-timer (responsible for retransmitting
12323 +        * SYN/ACKs). We do not retransmit SYN/ACKs+MP_JOINs, because we cannot
12324 +        * overload the keepalive timer. Also, it's not a big deal, because the
12325 +        * third ACK of the MP_JOIN-handshake is sent in a reliable manner. So,
12326 +        * if the third ACK gets lost, the client will handle the retransmission
12327 +        * anyways. If our SYN/ACK gets lost, the client will retransmit the
12328 +        * SYN.
12329 +        */
12330 +       struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
12331 +       struct listen_sock *lopt = meta_icsk->icsk_accept_queue.listen_opt;
12332 +       const u32 h2 = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
12333 +                                    inet_rsk(req)->ir_rmt_port,
12334 +                                    lopt->hash_rnd, lopt->nr_table_entries);
12335 +
12336 +       reqsk_queue_hash_req(&meta_icsk->icsk_accept_queue, h2, req, timeout);
12337 +       reqsk_queue_added(&meta_icsk->icsk_accept_queue);
12338 +
12339 +       spin_lock(&mptcp_reqsk_hlock);
12340 +       list_add(&mptcp_rsk(req)->collide_tuple, &mptcp_reqsk_htb[h1]);
12341 +       spin_unlock(&mptcp_reqsk_hlock);
12342 +}
12343 +
12344 +/* Similar to tcp_v4_conn_request */
12345 +static void mptcp_v4_join_request(struct sock *meta_sk, struct sk_buff *skb)
12346 +{
12347 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
12348 +       struct tcp_options_received tmp_opt;
12349 +       struct mptcp_options_received mopt;
12350 +       struct request_sock *req;
12351 +       struct inet_request_sock *ireq;
12352 +       struct mptcp_request_sock *mtreq;
12353 +       struct dst_entry *dst = NULL;
12354 +       u8 mptcp_hash_mac[20];
12355 +       __be32 saddr = ip_hdr(skb)->saddr;
12356 +       __be32 daddr = ip_hdr(skb)->daddr;
12357 +       __u32 isn = TCP_SKB_CB(skb)->when;
12358 +       int want_cookie = 0;
12359 +       union inet_addr addr;
12360 +
12361 +       tcp_clear_options(&tmp_opt);
12362 +       mptcp_init_mp_opt(&mopt);
12363 +       tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
12364 +       tmp_opt.user_mss = tcp_sk(meta_sk)->rx_opt.user_mss;
12365 +       tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
12366 +
12367 +       req = inet_reqsk_alloc(&mptcp_request_sock_ops);
12368 +       if (!req)
12369 +               return;
12370 +
12371 +#ifdef CONFIG_TCP_MD5SIG
12372 +       tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
12373 +#endif
12374 +
12375 +       tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
12376 +       tcp_openreq_init(req, &tmp_opt, skb);
12377 +
12378 +       ireq = inet_rsk(req);
12379 +       ireq->ir_loc_addr = daddr;
12380 +       ireq->ir_rmt_addr = saddr;
12381 +       ireq->no_srccheck = inet_sk(meta_sk)->transparent;
12382 +       ireq->opt = tcp_v4_save_options(skb);
12383 +
12384 +       if (security_inet_conn_request(meta_sk, skb, req))
12385 +               goto drop_and_free;
12386 +
12387 +       if (!want_cookie || tmp_opt.tstamp_ok)
12388 +               TCP_ECN_create_request(req, skb, sock_net(meta_sk));
12389 +
12390 +       if (!isn) {
12391 +               struct flowi4 fl4;
12392 +
12393 +               /* VJ's idea. We save last timestamp seen
12394 +                * from the destination in peer table, when entering
12395 +                * state TIME-WAIT, and check against it before
12396 +                * accepting new connection request.
12397 +                *
12398 +                * If "isn" is not zero, this request hit alive
12399 +                * timewait bucket, so that all the necessary checks
12400 +                * are made in the function processing timewait state.
12401 +                */
12402 +               if (tmp_opt.saw_tstamp &&
12403 +                   tcp_death_row.sysctl_tw_recycle &&
12404 +                   (dst = inet_csk_route_req(meta_sk, &fl4, req)) != NULL &&
12405 +                   fl4.daddr == saddr) {
12406 +                       if (!tcp_peer_is_proven(req, dst, true)) {
12407 +                               NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_PAWSPASSIVEREJECTED);
12408 +                               goto drop_and_release;
12409 +                       }
12410 +               }
12411 +               /* Kill the following clause, if you dislike this way. */
12412 +               else if (!sysctl_tcp_syncookies &&
12413 +                        (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(meta_sk) <
12414 +                         (sysctl_max_syn_backlog >> 2)) &&
12415 +                        !tcp_peer_is_proven(req, dst, false)) {
12416 +                       /* Without syncookies last quarter of
12417 +                        * backlog is filled with destinations,
12418 +                        * proven to be alive.
12419 +                        * It means that we continue to communicate
12420 +                        * to destinations, already remembered
12421 +                        * to the moment of synflood.
12422 +                        */
12423 +                       LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
12424 +                                      &saddr, ntohs(tcp_hdr(skb)->source));
12425 +                       goto drop_and_release;
12426 +               }
12427 +
12428 +               isn = tcp_v4_init_sequence(skb);
12429 +       }
12430 +       tcp_rsk(req)->snt_isn = isn;
12431 +       tcp_rsk(req)->snt_synack = tcp_time_stamp;
12432 +       tcp_rsk(req)->listener = NULL;
12433 +
12434 +       mtreq = mptcp_rsk(req);
12435 +       mtreq->mpcb = mpcb;
12436 +       INIT_LIST_HEAD(&mtreq->collide_tuple);
12437 +       mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce;
12438 +       mtreq->mptcp_rem_key = mpcb->mptcp_rem_key;
12439 +       mtreq->mptcp_loc_key = mpcb->mptcp_loc_key;
12440 +       mtreq->mptcp_loc_nonce = mptcp_v4_get_nonce(saddr, daddr,
12441 +                                                   tcp_hdr(skb)->source,
12442 +                                                   tcp_hdr(skb)->dest, isn);
12443 +       mptcp_hmac_sha1((u8 *)&mtreq->mptcp_loc_key,
12444 +                       (u8 *)&mtreq->mptcp_rem_key,
12445 +                       (u8 *)&mtreq->mptcp_loc_nonce,
12446 +                       (u8 *)&mtreq->mptcp_rem_nonce, (u32 *)mptcp_hash_mac);
12447 +       mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac;
12448 +
12449 +       addr.ip = ireq->ir_loc_addr;
12450 +       mtreq->loc_id = mpcb->pm_ops->get_local_id(AF_INET, &addr, sock_net(meta_sk));
12451 +       if (mtreq->loc_id == -1) /* Address not part of the allowed ones */
12452 +               goto drop_and_release;
12453 +       mtreq->rem_id = mopt.rem_id;
12454 +       mtreq->low_prio = mopt.low_prio;
12455 +       tcp_rsk(req)->saw_mpc = 1;
12456 +
12457 +       if (tcp_v4_send_synack(meta_sk, dst, req, skb_get_queue_mapping(skb)))
12458 +               goto drop_and_free;
12459 +
12460 +       /* Adding to request queue in metasocket */
12461 +       mptcp_v4_reqsk_queue_hash_add(meta_sk, req, TCP_TIMEOUT_INIT);
12462 +
12463 +       return;
12464 +
12465 +drop_and_release:
12466 +       dst_release(dst);
12467 +drop_and_free:
12468 +       reqsk_free(req);
12469 +       return;
12470 +}
12471 +
12472 +int mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id)
12473 +{
12474 +       int i;
12475 +
12476 +       for (i = 0; i < MPTCP_MAX_ADDR; i++) {
12477 +               if (!((1 << i) & mpcb->rem4_bits))
12478 +                       continue;
12479 +
12480 +               if (mpcb->remaddr4[i].rem4_id == id) {
12481 +                       /* remove address from bitfield */
12482 +                       mpcb->rem4_bits &= ~(1 << i);
12483 +
12484 +                       return 0;
12485 +               }
12486 +       }
12487 +
12488 +       return -1;
12489 +}
12490 +
12491 +/* Based on function tcp_v4_conn_request (tcp_ipv4.c)
12492 + * Returns -1 if there is no space anymore to store an additional
12493 + * address
12494 + */
12495 +int mptcp_v4_add_raddress(struct mptcp_cb *mpcb, const struct in_addr *addr,
12496 +                         __be16 port, u8 id)
12497 +{
12498 +       int i;
12499 +       struct mptcp_rem4 *rem4;
12500 +
12501 +       mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
12502 +               rem4 = &mpcb->remaddr4[i];
12503 +
12504 +               /* Address is already in the list --- continue */
12505 +               if (rem4->rem4_id == id &&
12506 +                   rem4->addr.s_addr == addr->s_addr && rem4->port == port)
12507 +                       return 0;
12508 +
12509 +               /* This may be the case, when the peer is behind a NAT. He is
12510 +                * trying to JOIN, thus sending the JOIN with a certain ID.
12511 +                * However the src_addr of the IP-packet has been changed. We
12512 +                * update the addr in the list, because this is the address as
12513 +                * OUR BOX sees it.
12514 +                */
12515 +               if (rem4->rem4_id == id && rem4->addr.s_addr != addr->s_addr) {
12516 +                       /* update the address */
12517 +                       mptcp_debug("%s: updating old addr:%pI4 to addr %pI4 with id:%d\n",
12518 +                                   __func__, &rem4->addr.s_addr,
12519 +                                   &addr->s_addr, id);
12520 +                       rem4->addr.s_addr = addr->s_addr;
12521 +                       rem4->port = port;
12522 +                       mpcb->list_rcvd = 1;
12523 +                       return 0;
12524 +               }
12525 +       }
12526 +
12527 +       i = mptcp_find_free_index(mpcb->rem4_bits);
12528 +       /* Do we have already the maximum number of local/remote addresses? */
12529 +       if (i < 0) {
12530 +               mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI4\n",
12531 +                           __func__, MPTCP_MAX_ADDR, &addr->s_addr);
12532 +               return -1;
12533 +       }
12534 +
12535 +       rem4 = &mpcb->remaddr4[i];
12536 +
12537 +       /* Address is not known yet, store it */
12538 +       rem4->addr.s_addr = addr->s_addr;
12539 +       rem4->port = port;
12540 +       rem4->bitfield = 0;
12541 +       rem4->retry_bitfield = 0;
12542 +       rem4->rem4_id = id;
12543 +       mpcb->list_rcvd = 1;
12544 +       mpcb->rem4_bits |= (1 << i);
12545 +
12546 +       return 0;
12547 +}
12548 +
12549 +/* Sets the bitfield of the remote-address field
12550 + * local address is not set as it will disappear with the global address-list
12551 + */
12552 +void mptcp_v4_set_init_addr_bit(struct mptcp_cb *mpcb, __be32 daddr, int index)
12553 +{
12554 +       int i;
12555 +
12556 +       mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
12557 +               if (mpcb->remaddr4[i].addr.s_addr == daddr) {
12558 +                       mpcb->remaddr4[i].bitfield |= (1 << index);
12559 +                       return;
12560 +               }
12561 +       }
12562 +}
12563 +
12564 +/* We only process join requests here. (either the SYN or the final ACK) */
12565 +int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
12566 +{
12567 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
12568 +       struct sock *child, *rsk = NULL;
12569 +       int ret;
12570 +
12571 +       if (!(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_JOIN)) {
12572 +               struct tcphdr *th = tcp_hdr(skb);
12573 +               const struct iphdr *iph = ip_hdr(skb);
12574 +               struct sock *sk;
12575 +
12576 +               sk = inet_lookup_established(sock_net(meta_sk), &tcp_hashinfo,
12577 +                                            iph->saddr, th->source, iph->daddr,
12578 +                                            th->dest, inet_iif(skb));
12579 +
12580 +               if (!sk) {
12581 +                       kfree_skb(skb);
12582 +                       return 0;
12583 +               }
12584 +               if (is_meta_sk(sk)) {
12585 +                       WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
12586 +                       kfree_skb(skb);
12587 +                       sock_put(sk);
12588 +                       return 0;
12589 +               }
12590 +
12591 +               if (sk->sk_state == TCP_TIME_WAIT) {
12592 +                       inet_twsk_put(inet_twsk(sk));
12593 +                       kfree_skb(skb);
12594 +                       return 0;
12595 +               }
12596 +
12597 +               ret = tcp_v4_do_rcv(sk, skb);
12598 +               sock_put(sk);
12599 +
12600 +               return ret;
12601 +       }
12602 +       TCP_SKB_CB(skb)->mptcp_flags = 0;
12603 +
12604 +       /* Has been removed from the tk-table. Thus, no new subflows.
12605 +        *
12606 +        * Check for close-state is necessary, because we may have been closed
12607 +        * without passing by mptcp_close().
12608 +        *
12609 +        * When falling back, no new subflows are allowed either.
12610 +        */
12611 +       if (meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table ||
12612 +           mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping)
12613 +               goto reset_and_discard;
12614 +
12615 +       child = tcp_v4_hnd_req(meta_sk, skb);
12616 +
12617 +       if (!child)
12618 +               goto discard;
12619 +
12620 +       if (child != meta_sk) {
12621 +               sock_rps_save_rxhash(child, skb);
12622 +               /* We don't call tcp_child_process here, because we hold
12623 +                * already the meta-sk-lock and are sure that it is not owned
12624 +                * by the user.
12625 +                */
12626 +               ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), skb->len);
12627 +               bh_unlock_sock(child);
12628 +               sock_put(child);
12629 +               if (ret) {
12630 +                       rsk = child;
12631 +                       goto reset_and_discard;
12632 +               }
12633 +       } else {
12634 +               if (tcp_hdr(skb)->syn) {
12635 +                       struct mp_join *join_opt = mptcp_find_join(skb);
12636 +                       /* Currently we make two calls to mptcp_find_join(). This
12637 +                        * can probably be optimized.
12638 +                        */
12639 +                       if (mptcp_v4_add_raddress(mpcb,
12640 +                                                 (struct in_addr *)&ip_hdr(skb)->saddr,
12641 +                                                 0,
12642 +                                                 join_opt->addr_id) < 0)
12643 +                               goto reset_and_discard;
12644 +                       mpcb->list_rcvd = 0;
12645 +
12646 +                       mptcp_v4_join_request(meta_sk, skb);
12647 +                       goto discard;
12648 +               }
12649 +               goto reset_and_discard;
12650 +       }
12651 +       return 0;
12652 +
12653 +reset_and_discard:
12654 +       tcp_v4_send_reset(rsk, skb);
12655 +discard:
12656 +       kfree_skb(skb);
12657 +       return 0;
12658 +}
12659 +
12660 +/* After this, the ref count of the meta_sk associated with the request_sock
12661 + * is incremented. Thus it is the responsibility of the caller
12662 + * to call sock_put() when the reference is not needed anymore.
12663 + */
12664 +struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
12665 +                                const __be32 laddr, const struct net *net)
12666 +{
12667 +       struct mptcp_request_sock *mtreq;
12668 +       struct sock *meta_sk = NULL;
12669 +
12670 +       spin_lock(&mptcp_reqsk_hlock);
12671 +       list_for_each_entry(mtreq,
12672 +                           &mptcp_reqsk_htb[inet_synq_hash(raddr, rport, 0,
12673 +                                                           MPTCP_HASH_SIZE)],
12674 +                           collide_tuple) {
12675 +               struct inet_request_sock *ireq = inet_rsk(rev_mptcp_rsk(mtreq));
12676 +               meta_sk = mtreq->mpcb->meta_sk;
12677 +
12678 +               if (ireq->ir_rmt_port == rport &&
12679 +                   ireq->ir_rmt_addr == raddr &&
12680 +                   ireq->ir_loc_addr == laddr &&
12681 +                   rev_mptcp_rsk(mtreq)->rsk_ops->family == AF_INET &&
12682 +                   net_eq(net, sock_net(meta_sk)))
12683 +                       break;
12684 +               meta_sk = NULL;
12685 +       }
12686 +
12687 +       if (meta_sk && unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
12688 +               meta_sk = NULL;
12689 +       spin_unlock(&mptcp_reqsk_hlock);
12690 +
12691 +       return meta_sk;
12692 +}
12693 +
12694 +/* Create a new IPv4 subflow.
12695 + *
12696 + * We are in user-context and meta-sock-lock is hold.
12697 + */
12698 +int mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
12699 +                          struct mptcp_rem4 *rem)
12700 +{
12701 +       struct tcp_sock *tp;
12702 +       struct sock *sk;
12703 +       struct sockaddr_in loc_in, rem_in;
12704 +       struct socket sock;
12705 +       int ulid_size = 0, ret;
12706 +
12707 +       /** First, create and prepare the new socket */
12708 +
12709 +       sock.type = meta_sk->sk_socket->type;
12710 +       sock.state = SS_UNCONNECTED;
12711 +       sock.wq = meta_sk->sk_socket->wq;
12712 +       sock.file = meta_sk->sk_socket->file;
12713 +       sock.ops = NULL;
12714 +
12715 +       ret = inet_create(sock_net(meta_sk), &sock, IPPROTO_TCP, 1);
12716 +       if (unlikely(ret < 0)) {
12717 +               mptcp_debug("%s inet_create failed ret: %d\n", __func__, ret);
12718 +               return ret;
12719 +       }
12720 +
12721 +       sk = sock.sk;
12722 +       tp = tcp_sk(sk);
12723 +
12724 +       /* All subsockets need the MPTCP-lock-class */
12725 +       lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, "slock-AF_INET-MPTCP");
12726 +       lockdep_init_map(&(sk)->sk_lock.dep_map, "sk_lock-AF_INET-MPTCP", &meta_key, 0);
12727 +
12728 +       if (mptcp_add_sock(meta_sk, sk, loc->loc4_id, rem->rem4_id, GFP_KERNEL))
12729 +               goto error;
12730 +
12731 +       tp->mptcp->slave_sk = 1;
12732 +       tp->mptcp->low_prio = loc->low_prio;
12733 +
12734 +       /* Initializing the timer for an MPTCP subflow */
12735 +       setup_timer(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, (unsigned long)sk);
12736 +
12737 +       /** Then, connect the socket to the peer */
12738 +
12739 +       ulid_size = sizeof(struct sockaddr_in);
12740 +       loc_in.sin_family = AF_INET;
12741 +       rem_in.sin_family = AF_INET;
12742 +       loc_in.sin_port = 0;
12743 +       if (rem->port)
12744 +               rem_in.sin_port = rem->port;
12745 +       else
12746 +               rem_in.sin_port = inet_sk(meta_sk)->inet_dport;
12747 +       loc_in.sin_addr = loc->addr;
12748 +       rem_in.sin_addr = rem->addr;
12749 +
12750 +       ret = sock.ops->bind(&sock, (struct sockaddr *)&loc_in, ulid_size);
12751 +       if (ret < 0) {
12752 +               mptcp_debug("%s: MPTCP subsocket bind() failed, error %d\n",
12753 +                           __func__, ret);
12754 +               goto error;
12755 +       }
12756 +
12757 +       mptcp_debug("%s: token %#x pi %d src_addr:%pI4:%d dst_addr:%pI4:%d\n",
12758 +                   __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
12759 +                   tp->mptcp->path_index, &loc_in.sin_addr,
12760 +                   ntohs(loc_in.sin_port), &rem_in.sin_addr,
12761 +                   ntohs(rem_in.sin_port));
12762 +
12763 +       ret = sock.ops->connect(&sock, (struct sockaddr *)&rem_in,
12764 +                               ulid_size, O_NONBLOCK);
12765 +       if (ret < 0 && ret != -EINPROGRESS) {
12766 +               mptcp_debug("%s: MPTCP subsocket connect() failed, error %d\n",
12767 +                           __func__, ret);
12768 +               goto error;
12769 +       }
12770 +
12771 +       sk_set_socket(sk, meta_sk->sk_socket);
12772 +       sk->sk_wq = meta_sk->sk_wq;
12773 +
12774 +       return 0;
12775 +
12776 +error:
12777 +       /* May happen if mptcp_add_sock fails first */
12778 +       if (!tp->mpc) {
12779 +               tcp_close(sk, 0);
12780 +       } else {
12781 +               local_bh_disable();
12782 +               mptcp_sub_force_close(sk);
12783 +               local_bh_enable();
12784 +       }
12785 +       return ret;
12786 +}
12787 +EXPORT_SYMBOL(mptcp_init4_subsockets);
12788 +
12789 +/* General initialization of IPv4 for MPTCP */
12790 +int mptcp_pm_v4_init(void)
12791 +{
12792 +       int ret = 0;
12793 +       struct request_sock_ops *ops = &mptcp_request_sock_ops;
12794 +
12795 +       ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP");
12796 +       if (ops->slab_name == NULL) {
12797 +               ret = -ENOMEM;
12798 +               goto out;
12799 +       }
12800 +
12801 +       ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
12802 +                                     SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
12803 +                                     NULL);
12804 +
12805 +       if (ops->slab == NULL) {
12806 +               ret =  -ENOMEM;
12807 +               goto err_reqsk_create;
12808 +       }
12809 +
12810 +out:
12811 +       return ret;
12812 +
12813 +err_reqsk_create:
12814 +       kfree(ops->slab_name);
12815 +       ops->slab_name = NULL;
12816 +       goto out;
12817 +}
12818 +
12819 +void mptcp_pm_v4_undo(void)
12820 +{
12821 +       kmem_cache_destroy(mptcp_request_sock_ops.slab);
12822 +       kfree(mptcp_request_sock_ops.slab_name);
12823 +}
12824 +
12825 +
12826 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ipv6.c linux-3.14.45/net/mptcp/mptcp_ipv6.c
12827 --- linux-3.14.45.orig/net/mptcp/mptcp_ipv6.c   1970-01-01 01:00:00.000000000 +0100
12828 +++ linux-3.14.45/net/mptcp/mptcp_ipv6.c        2015-06-24 14:15:48.931862523 +0200
12829 @@ -0,0 +1,822 @@
12830 +/*
12831 + *     MPTCP implementation - IPv6-specific functions
12832 + *
12833 + *     Initial Design & Implementation:
12834 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
12835 + *
12836 + *     Current Maintainer:
12837 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
12838 + *
12839 + *     Additional authors:
12840 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
12841 + *     Gregory Detal <gregory.detal@uclouvain.be>
12842 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
12843 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
12844 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
12845 + *     Andreas Ripke <ripke@neclab.eu>
12846 + *     Vlad Dogaru <vlad.dogaru@intel.com>
12847 + *     Octavian Purdila <octavian.purdila@intel.com>
12848 + *     John Ronan <jronan@tssg.org>
12849 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
12850 + *     Brandon Heller <brandonh@stanford.edu>
12851 + *
12852 + *
12853 + *     This program is free software; you can redistribute it and/or
12854 + *      modify it under the terms of the GNU General Public License
12855 + *      as published by the Free Software Foundation; either version
12856 + *      2 of the License, or (at your option) any later version.
12857 + */
12858 +
12859 +#include <linux/export.h>
12860 +#include <linux/in6.h>
12861 +#include <linux/kernel.h>
12862 +
12863 +#include <net/addrconf.h>
12864 +#include <net/flow.h>
12865 +#include <net/inet6_connection_sock.h>
12866 +#include <net/inet6_hashtables.h>
12867 +#include <net/inet_common.h>
12868 +#include <net/ipv6.h>
12869 +#include <net/ip6_checksum.h>
12870 +#include <net/ip6_route.h>
12871 +#include <net/mptcp.h>
12872 +#include <net/mptcp_v6.h>
12873 +#include <net/tcp.h>
12874 +#include <net/transp_v6.h>
12875 +
12876 +static int mptcp_v6v4_send_synack(struct sock *meta_sk, struct request_sock *req,
12877 +                                 u16 queue_mapping);
12878 +
12879 +__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
12880 +                        __be16 sport, __be16 dport, u32 seq)
12881 +{
12882 +       u32 secret[MD5_MESSAGE_BYTES / 4];
12883 +       u32 hash[MD5_DIGEST_WORDS];
12884 +       u32 i;
12885 +
12886 +       memcpy(hash, saddr, 16);
12887 +       for (i = 0; i < 4; i++)
12888 +               secret[i] = mptcp_secret[i] + (__force u32)daddr[i];
12889 +       secret[4] = mptcp_secret[4] +
12890 +                   (((__force u16)sport << 16) + (__force u16)dport);
12891 +       secret[5] = seq;
12892 +       for (i = 6; i < MD5_MESSAGE_BYTES / 4; i++)
12893 +               secret[i] = mptcp_secret[i];
12894 +
12895 +       md5_transform(hash, secret);
12896 +
12897 +       return hash[0];
12898 +}
12899 +
12900 +u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
12901 +                    __be16 sport, __be16 dport)
12902 +{
12903 +       u32 secret[MD5_MESSAGE_BYTES / 4];
12904 +       u32 hash[MD5_DIGEST_WORDS];
12905 +       u32 i;
12906 +
12907 +       memcpy(hash, saddr, 16);
12908 +       for (i = 0; i < 4; i++)
12909 +               secret[i] = mptcp_secret[i] + (__force u32)daddr[i];
12910 +       secret[4] = mptcp_secret[4] +
12911 +                   (((__force u16)sport << 16) + (__force u16)dport);
12912 +       secret[5] = mptcp_key_seed++;
12913 +       for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
12914 +               secret[i] = mptcp_secret[i];
12915 +
12916 +       md5_transform(hash, secret);
12917 +
12918 +       return *((u64 *)hash);
12919 +}
12920 +
12921 +static void mptcp_v6_reqsk_destructor(struct request_sock *req)
12922 +{
12923 +       mptcp_reqsk_destructor(req);
12924 +
12925 +       tcp_v6_reqsk_destructor(req);
12926 +}
12927 +
12928 +/* Similar to tcp_v6_rtx_synack */
12929 +static int mptcp_v6_rtx_synack(struct sock *meta_sk, struct request_sock *req)
12930 +{
12931 +       if (meta_sk->sk_family == AF_INET6)
12932 +               return tcp_v6_rtx_synack(meta_sk, req);
12933 +
12934 +       TCP_INC_STATS_BH(sock_net(meta_sk), TCP_MIB_RETRANSSEGS);
12935 +       return mptcp_v6v4_send_synack(meta_sk, req, 0);
12936 +}
12937 +
12938 +/* Similar to tcp6_request_sock_ops */
12939 +struct request_sock_ops mptcp6_request_sock_ops __read_mostly = {
12940 +       .family         =       AF_INET6,
12941 +       .obj_size       =       sizeof(struct mptcp_request_sock),
12942 +       .rtx_syn_ack    =       mptcp_v6_rtx_synack,
12943 +       .send_ack       =       tcp_v6_reqsk_send_ack,
12944 +       .destructor     =       mptcp_v6_reqsk_destructor,
12945 +       .send_reset     =       tcp_v6_send_reset,
12946 +       .syn_ack_timeout =      tcp_syn_ack_timeout,
12947 +};
12948 +
12949 +static void mptcp_v6_reqsk_queue_hash_add(struct sock *meta_sk,
12950 +                                         struct request_sock *req,
12951 +                                         unsigned long timeout)
12952 +{
12953 +       const u32 h1 = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
12954 +                                     inet_rsk(req)->ir_rmt_port,
12955 +                                     0, MPTCP_HASH_SIZE);
12956 +       /* We cannot call inet6_csk_reqsk_queue_hash_add(), because we do not
12957 +        * want to reset the keepalive-timer (responsible for retransmitting
12958 +        * SYN/ACKs). We do not retransmit SYN/ACKs+MP_JOINs, because we cannot
12959 +        * overload the keepalive timer. Also, it's not a big deal, because the
12960 +        * third ACK of the MP_JOIN-handshake is sent in a reliable manner. So,
12961 +        * if the third ACK gets lost, the client will handle the retransmission
12962 +        * anyways. If our SYN/ACK gets lost, the client will retransmit the
12963 +        * SYN.
12964 +        */
12965 +       struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
12966 +       struct listen_sock *lopt = meta_icsk->icsk_accept_queue.listen_opt;
12967 +       const u32 h2 = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
12968 +                                     inet_rsk(req)->ir_rmt_port,
12969 +                                     lopt->hash_rnd, lopt->nr_table_entries);
12970 +
12971 +       reqsk_queue_hash_req(&meta_icsk->icsk_accept_queue, h2, req, timeout);
12972 +       reqsk_queue_added(&meta_icsk->icsk_accept_queue);
12973 +
12974 +       spin_lock(&mptcp_reqsk_hlock);
12975 +       list_add(&mptcp_rsk(req)->collide_tuple, &mptcp_reqsk_htb[h1]);
12976 +       spin_unlock(&mptcp_reqsk_hlock);
12977 +}
12978 +
12979 +/* Similar to tcp_v6_send_synack
12980 + *
12981 + * The meta-socket is IPv4, but a new subsocket is IPv6
12982 + */
12983 +static int mptcp_v6v4_send_synack(struct sock *meta_sk, struct request_sock *req,
12984 +                                 u16 queue_mapping)
12985 +{
12986 +       struct inet_request_sock *treq = inet_rsk(req);
12987 +       struct sk_buff *skb;
12988 +       struct flowi6 fl6;
12989 +       struct dst_entry *dst;
12990 +       int err = -ENOMEM;
12991 +
12992 +       memset(&fl6, 0, sizeof(fl6));
12993 +       fl6.flowi6_proto = IPPROTO_TCP;
12994 +       fl6.daddr = treq->ir_v6_rmt_addr;
12995 +       fl6.saddr = treq->ir_v6_loc_addr;
12996 +       fl6.flowlabel = 0;
12997 +       fl6.flowi6_oif = treq->ir_iif;
12998 +       fl6.flowi6_mark = meta_sk->sk_mark;
12999 +       fl6.fl6_dport = inet_rsk(req)->ir_rmt_port;
13000 +       fl6.fl6_sport = htons(inet_rsk(req)->ir_num);
13001 +       security_req_classify_flow(req, flowi6_to_flowi(&fl6));
13002 +
13003 +       dst = ip6_dst_lookup_flow(meta_sk, &fl6, NULL);
13004 +       if (IS_ERR(dst)) {
13005 +               err = PTR_ERR(dst);
13006 +               return err;
13007 +       }
13008 +       skb = tcp_make_synack(meta_sk, dst, req, NULL);
13009 +
13010 +       if (skb) {
13011 +               __tcp_v6_send_check(skb, &treq->ir_v6_loc_addr,
13012 +                                   &treq->ir_v6_rmt_addr);
13013 +
13014 +               fl6.daddr = treq->ir_v6_rmt_addr;
13015 +               skb_set_queue_mapping(skb, queue_mapping);
13016 +               err = ip6_xmit(meta_sk, skb, &fl6, NULL, 0);
13017 +               err = net_xmit_eval(err);
13018 +       }
13019 +
13020 +       return err;
13021 +}
13022 +
13023 +/* Similar to tcp_v6_syn_recv_sock
13024 + *
13025 + * The meta-socket is IPv4, but a new subsocket is IPv6
13026 + */
13027 +struct sock *mptcp_v6v4_syn_recv_sock(struct sock *meta_sk, struct sk_buff *skb,
13028 +                                     struct request_sock *req,
13029 +                                     struct dst_entry *dst)
13030 +{
13031 +       struct inet_request_sock *treq;
13032 +       struct ipv6_pinfo *newnp;
13033 +       struct tcp6_sock *newtcp6sk;
13034 +       struct inet_sock *newinet;
13035 +       struct tcp_sock *newtp;
13036 +       struct sock *newsk;
13037 +
13038 +       treq = inet_rsk(req);
13039 +
13040 +       if (sk_acceptq_is_full(meta_sk))
13041 +               goto out_overflow;
13042 +
13043 +       if (!dst) {
13044 +               /* This code is similar to inet6_csk_route_req, but as we
13045 +                * don't have a np-pointer in the meta, we have to do it
13046 +                * manually.
13047 +                */
13048 +               struct flowi6 fl6;
13049 +
13050 +               memset(&fl6, 0, sizeof(fl6));
13051 +               fl6.flowi6_proto = IPPROTO_TCP;
13052 +               fl6.daddr = treq->ir_v6_rmt_addr;
13053 +               fl6.saddr = treq->ir_v6_loc_addr;
13054 +               fl6.flowi6_oif = treq->ir_iif;
13055 +               fl6.flowi6_mark = meta_sk->sk_mark;
13056 +               fl6.fl6_dport = inet_rsk(req)->ir_rmt_port;
13057 +               fl6.fl6_sport = htons(inet_rsk(req)->ir_num);
13058 +               security_req_classify_flow(req, flowi6_to_flowi(&fl6));
13059 +
13060 +               dst = ip6_dst_lookup_flow(meta_sk, &fl6, NULL);
13061 +               if (IS_ERR(dst))
13062 +                       goto out;
13063 +       }
13064 +
13065 +       newsk = tcp_create_openreq_child(meta_sk, req, skb);
13066 +       if (newsk == NULL)
13067 +               goto out_nonewsk;
13068 +
13069 +       /* Diff to tcp_v6_syn_recv_sock: Must do this prior to __ip6_dst_store,
13070 +        * as it tries to access the pinet6-pointer.
13071 +        */
13072 +       newtcp6sk = (struct tcp6_sock *)newsk;
13073 +       inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
13074 +
13075 +       /*
13076 +        * No need to charge this sock to the relevant IPv6 refcnt debug socks
13077 +        * count here, tcp_create_openreq_child now does this for us, see the
13078 +        * comment in that function for the gory details. -acme
13079 +        */
13080 +
13081 +       newsk->sk_gso_type = SKB_GSO_TCPV6;
13082 +       __ip6_dst_store(newsk, dst, NULL, NULL);
13083 +       inet6_sk_rx_dst_set(newsk, skb);
13084 +
13085 +       newtp = tcp_sk(newsk);
13086 +       newinet = inet_sk(newsk);
13087 +       newnp = inet6_sk(newsk);
13088 +
13089 +       newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
13090 +       newnp->saddr = treq->ir_v6_loc_addr;
13091 +       newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
13092 +       newsk->sk_bound_dev_if = treq->ir_iif;
13093 +
13094 +       /* Now IPv6 options...
13095 +
13096 +          First: no IPv4 options.
13097 +        */
13098 +       newinet->inet_opt = NULL;
13099 +       newnp->ipv6_ac_list = NULL;
13100 +       newnp->ipv6_fl_list = NULL;
13101 +       newnp->rxopt.all = 0;
13102 +
13103 +       /* Clone pktoptions received with SYN */
13104 +       newnp->pktoptions = NULL;
13105 +       if (treq->pktopts != NULL) {
13106 +               newnp->pktoptions = skb_clone(treq->pktopts,
13107 +                                             sk_gfp_atomic(meta_sk, GFP_ATOMIC));
13108 +               consume_skb(treq->pktopts);
13109 +               treq->pktopts = NULL;
13110 +               if (newnp->pktoptions)
13111 +                       skb_set_owner_r(newnp->pktoptions, newsk);
13112 +       }
13113 +       newnp->opt        = NULL;
13114 +       newnp->mcast_oif  = inet6_iif(skb);
13115 +       newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
13116 +       newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
13117 +
13118 +       /* Initialization copied from inet6_create - normally this should have
13119 +        * been handled by the memcpy as in tcp_v6_syn_recv_sock
13120 +        */
13121 +       newnp->hop_limit  = -1;
13122 +       newnp->mc_loop    = 1;
13123 +       newnp->pmtudisc   = IPV6_PMTUDISC_WANT;
13124 +       (void)xchg(&newnp->rxpmtu, NULL);
13125 +
13126 +       inet_csk(newsk)->icsk_ext_hdr_len = 0;
13127 +
13128 +       tcp_mtup_init(newsk);
13129 +       tcp_sync_mss(newsk, dst_mtu(dst));
13130 +       newtp->advmss = dst_metric_advmss(dst);
13131 +       if (tcp_sk(meta_sk)->rx_opt.user_mss &&
13132 +           tcp_sk(meta_sk)->rx_opt.user_mss < newtp->advmss)
13133 +               newtp->advmss = tcp_sk(meta_sk)->rx_opt.user_mss;
13134 +
13135 +       tcp_initialize_rcv_mss(newsk);
13136 +
13137 +       newinet->inet_daddr = LOOPBACK4_IPV6;
13138 +       newinet->inet_saddr = LOOPBACK4_IPV6;
13139 +       newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
13140 +
13141 +       if (__inet_inherit_port(meta_sk, newsk) < 0) {
13142 +               inet_csk_prepare_forced_close(newsk);
13143 +               tcp_done(newsk);
13144 +               goto out;
13145 +       }
13146 +       __inet6_hash(newsk, NULL);
13147 +
13148 +       return newsk;
13149 +
13150 +out_overflow:
13151 +       NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_LISTENOVERFLOWS);
13152 +out_nonewsk:
13153 +       dst_release(dst);
13154 +out:
13155 +       NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_LISTENDROPS);
13156 +       return NULL;
13157 +}
13158 +
13159 +/* Similar to tcp_v6_conn_request */
13160 +static void mptcp_v6_join_request(struct sock *meta_sk, struct sk_buff *skb)
13161 +{
13162 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
13163 +       struct tcp_options_received tmp_opt;
13164 +       struct mptcp_options_received mopt;
13165 +       struct ipv6_pinfo *np = inet6_sk(meta_sk);
13166 +       struct request_sock *req;
13167 +       struct inet_request_sock *treq;
13168 +       struct mptcp_request_sock *mtreq;
13169 +       u8 mptcp_hash_mac[20];
13170 +       __u32 isn = TCP_SKB_CB(skb)->when;
13171 +       struct dst_entry *dst = NULL;
13172 +       struct flowi6 fl6;
13173 +       int want_cookie = 0;
13174 +       union inet_addr addr;
13175 +
13176 +       tcp_clear_options(&tmp_opt);
13177 +       mptcp_init_mp_opt(&mopt);
13178 +       tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
13179 +       tmp_opt.user_mss  = tcp_sk(meta_sk)->rx_opt.user_mss;
13180 +       tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
13181 +
13182 +       req = inet6_reqsk_alloc(&mptcp6_request_sock_ops);
13183 +       if (!req)
13184 +               return;
13185 +
13186 +#ifdef CONFIG_TCP_MD5SIG
13187 +       tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
13188 +#endif
13189 +
13190 +       tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
13191 +       tcp_openreq_init(req, &tmp_opt, skb);
13192 +
13193 +       treq = inet_rsk(req);
13194 +       treq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
13195 +       treq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
13196 +
13197 +       if (!want_cookie || tmp_opt.tstamp_ok)
13198 +               TCP_ECN_create_request(req, skb, sock_net(meta_sk));
13199 +
13200 +       treq->ir_iif = meta_sk->sk_bound_dev_if;
13201 +
13202 +       /* So that link locals have meaning */
13203 +       if (!meta_sk->sk_bound_dev_if &&
13204 +           ipv6_addr_type(&treq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
13205 +               treq->ir_iif = inet6_iif(skb);
13206 +
13207 +       if (!isn) {
13208 +               if (meta_sk->sk_family == AF_INET6 &&
13209 +                   (ipv6_opt_accepted(meta_sk, skb) ||
13210 +                   np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
13211 +                   np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)) {
13212 +                       atomic_inc(&skb->users);
13213 +                       treq->pktopts = skb;
13214 +               }
13215 +
13216 +               /* VJ's idea. We save last timestamp seen
13217 +                * from the destination in peer table, when entering
13218 +                * state TIME-WAIT, and check against it before
13219 +                * accepting new connection request.
13220 +                *
13221 +                * If "isn" is not zero, this request hit alive
13222 +                * timewait bucket, so that all the necessary checks
13223 +                * are made in the function processing timewait state.
13224 +                */
13225 +               if (tmp_opt.saw_tstamp &&
13226 +                   tcp_death_row.sysctl_tw_recycle &&
13227 +                   (dst = inet6_csk_route_req(meta_sk, &fl6, req)) != NULL) {
13228 +                       if (!tcp_peer_is_proven(req, dst, true)) {
13229 +                               NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_PAWSPASSIVEREJECTED);
13230 +                               goto drop_and_release;
13231 +                       }
13232 +               }
13233 +               /* Kill the following clause, if you dislike this way. */
13234 +               else if (!sysctl_tcp_syncookies &&
13235 +                        (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(meta_sk) <
13236 +                         (sysctl_max_syn_backlog >> 2)) &&
13237 +                        !tcp_peer_is_proven(req, dst, false)) {
13238 +                       /* Without syncookies last quarter of
13239 +                        * backlog is filled with destinations,
13240 +                        * proven to be alive.
13241 +                        * It means that we continue to communicate
13242 +                        * to destinations, already remembered
13243 +                        * to the moment of synflood.
13244 +                        */
13245 +                       LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
13246 +                                      &treq->ir_v6_rmt_addr,
13247 +                                      ntohs(tcp_hdr(skb)->source));
13248 +                       goto drop_and_release;
13249 +               }
13250 +
13251 +               isn = tcp_v6_init_sequence(skb);
13252 +       }
13253 +
13254 +       tcp_rsk(req)->snt_isn = isn;
13255 +       tcp_rsk(req)->snt_synack = tcp_time_stamp;
13256 +       tcp_rsk(req)->listener = NULL;
13257 +
13258 +       mtreq = mptcp_rsk(req);
13259 +       mtreq->mpcb = mpcb;
13260 +       INIT_LIST_HEAD(&mtreq->collide_tuple);
13261 +       mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce;
13262 +       mtreq->mptcp_rem_key = mpcb->mptcp_rem_key;
13263 +       mtreq->mptcp_loc_key = mpcb->mptcp_loc_key;
13264 +       mtreq->mptcp_loc_nonce = mptcp_v6_get_nonce(ipv6_hdr(skb)->daddr.s6_addr32,
13265 +                                                   ipv6_hdr(skb)->saddr.s6_addr32,
13266 +                                                   tcp_hdr(skb)->dest,
13267 +                                                   tcp_hdr(skb)->source, isn);
13268 +       mptcp_hmac_sha1((u8 *)&mtreq->mptcp_loc_key,
13269 +                       (u8 *)&mtreq->mptcp_rem_key,
13270 +                       (u8 *)&mtreq->mptcp_loc_nonce,
13271 +                       (u8 *)&mtreq->mptcp_rem_nonce, (u32 *)mptcp_hash_mac);
13272 +       mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac;
13273 +
13274 +       addr.in6 = treq->ir_v6_loc_addr;
13275 +       mtreq->loc_id = mpcb->pm_ops->get_local_id(AF_INET6, &addr, sock_net(meta_sk));
13276 +       if (mtreq->loc_id == -1) /* Address not part of the allowed ones */
13277 +               goto drop_and_release;
13278 +       mtreq->rem_id = mopt.rem_id;
13279 +       mtreq->low_prio = mopt.low_prio;
13280 +       tcp_rsk(req)->saw_mpc = 1;
13281 +
13282 +       if (meta_sk->sk_family == AF_INET6) {
13283 +               if (tcp_v6_send_synack(meta_sk, dst, &fl6, req,
13284 +                                      skb_get_queue_mapping(skb)))
13285 +                       goto drop_and_free;
13286 +       } else {
13287 +               if (mptcp_v6v4_send_synack(meta_sk, req, skb_get_queue_mapping(skb)))
13288 +                       goto drop_and_free;
13289 +       }
13290 +
13291 +       /* Adding to request queue in metasocket */
13292 +       mptcp_v6_reqsk_queue_hash_add(meta_sk, req, TCP_TIMEOUT_INIT);
13293 +
13294 +       return;
13295 +
13296 +drop_and_release:
13297 +       dst_release(dst);
13298 +drop_and_free:
13299 +       reqsk_free(req);
13300 +       return;
13301 +}
13302 +
13303 +int mptcp_v6_rem_raddress(struct mptcp_cb *mpcb, u8 id)
13304 +{
13305 +       int i;
13306 +
13307 +       for (i = 0; i < MPTCP_MAX_ADDR; i++) {
13308 +               if (!((1 << i) & mpcb->rem6_bits))
13309 +                       continue;
13310 +
13311 +               if (mpcb->remaddr6[i].rem6_id == id) {
13312 +                       /* remove address from bitfield */
13313 +                       mpcb->rem6_bits &= ~(1 << i);
13314 +
13315 +                       return 0;
13316 +               }
13317 +       }
13318 +
13319 +       return -1;
13320 +}
13321 +
13322 +/* Returns -1 if there is no space anymore to store an additional
13323 + * address
13324 + */
13325 +int mptcp_v6_add_raddress(struct mptcp_cb *mpcb, const struct in6_addr *addr,
13326 +                         __be16 port, u8 id)
13327 +{
13328 +       int i;
13329 +       struct mptcp_rem6 *rem6;
13330 +
13331 +       mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
13332 +               rem6 = &mpcb->remaddr6[i];
13333 +
13334 +               /* Address is already in the list --- continue */
13335 +               if (rem6->rem6_id == id &&
13336 +                   ipv6_addr_equal(&rem6->addr, addr) && rem6->port == port)
13337 +                       return 0;
13338 +
13339 +               /* This may be the case, when the peer is behind a NAT. He is
13340 +                * trying to JOIN, thus sending the JOIN with a certain ID.
13341 +                * However the src_addr of the IP-packet has been changed. We
13342 +                * update the addr in the list, because this is the address as
13343 +                * OUR BOX sees it.
13344 +                */
13345 +               if (rem6->rem6_id == id) {
13346 +                       /* update the address */
13347 +                       mptcp_debug("%s: updating old addr: %pI6 to addr %pI6 with id:%d\n",
13348 +                                   __func__, &rem6->addr, addr, id);
13349 +                       rem6->addr = *addr;
13350 +                       rem6->port = port;
13351 +                       mpcb->list_rcvd = 1;
13352 +                       return 0;
13353 +               }
13354 +       }
13355 +
13356 +       i = mptcp_find_free_index(mpcb->rem6_bits);
13357 +       /* Do we have already the maximum number of local/remote addresses? */
13358 +       if (i < 0) {
13359 +               mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI6\n",
13360 +                           __func__, MPTCP_MAX_ADDR, addr);
13361 +               return -1;
13362 +       }
13363 +
13364 +       rem6 = &mpcb->remaddr6[i];
13365 +
13366 +       /* Address is not known yet, store it */
13367 +       rem6->addr = *addr;
13368 +       rem6->port = port;
13369 +       rem6->bitfield = 0;
13370 +       rem6->retry_bitfield = 0;
13371 +       rem6->rem6_id = id;
13372 +       mpcb->list_rcvd = 1;
13373 +       mpcb->rem6_bits |= (1 << i);
13374 +
13375 +       return 0;
13376 +}
13377 +
13378 +/* Sets the bitfield of the remote-address field
13379 + * local address is not set as it will disappear with the global address-list
13380 + */
13381 +void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb,
13382 +                               const struct in6_addr *daddr, int index)
13383 +{
13384 +       int i;
13385 +       mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
13386 +               if (ipv6_addr_equal(&mpcb->remaddr6[i].addr, daddr)) {
13387 +                       mpcb->remaddr6[i].bitfield |= (1 << index);
13388 +                       return;
13389 +               }
13390 +       }
13391 +}
13392 +
13393 +int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
13394 +{
13395 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
13396 +       struct sock *child, *rsk = NULL;
13397 +       int ret;
13398 +
13399 +       if (!(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_JOIN)) {
13400 +               struct tcphdr *th = tcp_hdr(skb);
13401 +               const struct ipv6hdr *ip6h = ipv6_hdr(skb);
13402 +               struct sock *sk;
13403 +
13404 +               sk = __inet6_lookup_established(sock_net(meta_sk),
13405 +                                               &tcp_hashinfo,
13406 +                                               &ip6h->saddr, th->source,
13407 +                                               &ip6h->daddr, ntohs(th->dest),
13408 +                                               inet6_iif(skb));
13409 +
13410 +               if (!sk) {
13411 +                       kfree_skb(skb);
13412 +                       return 0;
13413 +               }
13414 +               if (is_meta_sk(sk)) {
13415 +                       WARN("%s Did not find a sub-sk!\n", __func__);
13416 +                       kfree_skb(skb);
13417 +                       sock_put(sk);
13418 +                       return 0;
13419 +               }
13420 +
13421 +               if (sk->sk_state == TCP_TIME_WAIT) {
13422 +                       inet_twsk_put(inet_twsk(sk));
13423 +                       kfree_skb(skb);
13424 +                       return 0;
13425 +               }
13426 +
13427 +               ret = tcp_v6_do_rcv(sk, skb);
13428 +               sock_put(sk);
13429 +
13430 +               return ret;
13431 +       }
13432 +       TCP_SKB_CB(skb)->mptcp_flags = 0;
13433 +
13434 +       /* Has been removed from the tk-table. Thus, no new subflows.
13435 +        *
13436 +        * Check for close-state is necessary, because we may have been closed
13437 +        * without passing by mptcp_close().
13438 +        *
13439 +        * When falling back, no new subflows are allowed either.
13440 +        */
13441 +       if (meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table ||
13442 +           mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping)
13443 +               goto reset_and_discard;
13444 +
13445 +       child = tcp_v6_hnd_req(meta_sk, skb);
13446 +
13447 +       if (!child)
13448 +               goto discard;
13449 +
13450 +       if (child != meta_sk) {
13451 +               sock_rps_save_rxhash(child, skb);
13452 +               /* We don't call tcp_child_process here, because we hold
13453 +                * already the meta-sk-lock and are sure that it is not owned
13454 +                * by the user.
13455 +                */
13456 +               ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), skb->len);
13457 +               bh_unlock_sock(child);
13458 +               sock_put(child);
13459 +               if (ret) {
13460 +                       rsk = child;
13461 +                       goto reset_and_discard;
13462 +               }
13463 +       } else {
13464 +               if (tcp_hdr(skb)->syn) {
13465 +                       struct mp_join *join_opt = mptcp_find_join(skb);
13466 +                       /* Currently we make two calls to mptcp_find_join(). This
13467 +                        * can probably be optimized. */
13468 +                       if (mptcp_v6_add_raddress(mpcb,
13469 +                                                 (struct in6_addr *)&ipv6_hdr(skb)->saddr,
13470 +                                                 0,
13471 +                                                 join_opt->addr_id) < 0)
13472 +                               goto reset_and_discard;
13473 +                       mpcb->list_rcvd = 0;
13474 +
13475 +                       mptcp_v6_join_request(meta_sk, skb);
13476 +                       goto discard;
13477 +               }
13478 +               goto reset_and_discard;
13479 +       }
13480 +       return 0;
13481 +
13482 +reset_and_discard:
13483 +       tcp_v6_send_reset(rsk, skb);
13484 +discard:
13485 +       kfree_skb(skb);
13486 +       return 0;
13487 +}
13488 +
13489 +/* After this, the ref count of the meta_sk associated with the request_sock
13490 + * is incremented. Thus it is the responsibility of the caller
13491 + * to call sock_put() when the reference is not needed anymore.
13492 + */
13493 +struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
13494 +                                const struct in6_addr *laddr, const struct net *net)
13495 +{
13496 +       struct mptcp_request_sock *mtreq;
13497 +       struct sock *meta_sk = NULL;
13498 +
13499 +       spin_lock(&mptcp_reqsk_hlock);
13500 +       list_for_each_entry(mtreq,
13501 +                           &mptcp_reqsk_htb[inet6_synq_hash(raddr, rport, 0,
13502 +                                                            MPTCP_HASH_SIZE)],
13503 +                           collide_tuple) {
13504 +               struct inet_request_sock *treq = inet_rsk(rev_mptcp_rsk(mtreq));
13505 +               meta_sk = mtreq->mpcb->meta_sk;
13506 +
13507 +               if (inet_rsk(rev_mptcp_rsk(mtreq))->ir_rmt_port == rport &&
13508 +                   rev_mptcp_rsk(mtreq)->rsk_ops->family == AF_INET6 &&
13509 +                   ipv6_addr_equal(&treq->ir_v6_rmt_addr, raddr) &&
13510 +                   ipv6_addr_equal(&treq->ir_v6_loc_addr, laddr) &&
13511 +                   net_eq(net, sock_net(meta_sk)))
13512 +                       break;
13513 +               meta_sk = NULL;
13514 +       }
13515 +
13516 +       if (meta_sk && unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
13517 +               meta_sk = NULL;
13518 +       spin_unlock(&mptcp_reqsk_hlock);
13519 +
13520 +       return meta_sk;
13521 +}
13522 +
13523 +/* Create a new IPv6 subflow.
13524 + *
13525 + * We are in user-context and meta-sock-lock is hold.
13526 + */
13527 +int mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
13528 +                          struct mptcp_rem6 *rem)
13529 +{
13530 +       struct tcp_sock *tp;
13531 +       struct sock *sk;
13532 +       struct sockaddr_in6 loc_in, rem_in;
13533 +       struct socket sock;
13534 +       int ulid_size = 0, ret;
13535 +
13536 +       /** First, create and prepare the new socket */
13537 +
13538 +       sock.type = meta_sk->sk_socket->type;
13539 +       sock.state = SS_UNCONNECTED;
13540 +       sock.wq = meta_sk->sk_socket->wq;
13541 +       sock.file = meta_sk->sk_socket->file;
13542 +       sock.ops = NULL;
13543 +
13544 +       ret = inet6_create(sock_net(meta_sk), &sock, IPPROTO_TCP, 1);
13545 +       if (unlikely(ret < 0)) {
13546 +               mptcp_debug("%s inet6_create failed ret: %d\n", __func__, ret);
13547 +               return ret;
13548 +       }
13549 +
13550 +       sk = sock.sk;
13551 +       tp = tcp_sk(sk);
13552 +
13553 +       /* All subsockets need the MPTCP-lock-class */
13554 +       lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, "slock-AF_INET-MPTCP");
13555 +       lockdep_init_map(&(sk)->sk_lock.dep_map, "sk_lock-AF_INET-MPTCP", &meta_key, 0);
13556 +
13557 +       if (mptcp_add_sock(meta_sk, sk, loc->loc6_id, rem->rem6_id, GFP_KERNEL))
13558 +               goto error;
13559 +
13560 +       tp->mptcp->slave_sk = 1;
13561 +       tp->mptcp->low_prio = loc->low_prio;
13562 +
13563 +       /* Initializing the timer for an MPTCP subflow */
13564 +       setup_timer(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, (unsigned long)sk);
13565 +
13566 +       /** Then, connect the socket to the peer */
13567 +
13568 +       ulid_size = sizeof(struct sockaddr_in6);
13569 +       loc_in.sin6_family = AF_INET6;
13570 +       rem_in.sin6_family = AF_INET6;
13571 +       loc_in.sin6_port = 0;
13572 +       if (rem->port)
13573 +               rem_in.sin6_port = rem->port;
13574 +       else
13575 +               rem_in.sin6_port = inet_sk(meta_sk)->inet_dport;
13576 +       loc_in.sin6_addr = loc->addr;
13577 +       rem_in.sin6_addr = rem->addr;
13578 +
13579 +       ret = sock.ops->bind(&sock, (struct sockaddr *)&loc_in, ulid_size);
13580 +       if (ret < 0) {
13581 +               mptcp_debug("%s: MPTCP subsocket bind()failed, error %d\n",
13582 +                           __func__, ret);
13583 +               goto error;
13584 +       }
13585 +
13586 +       mptcp_debug("%s: token %#x pi %d src_addr:%pI6:%d dst_addr:%pI6:%d\n",
13587 +                   __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
13588 +                   tp->mptcp->path_index, &loc_in.sin6_addr,
13589 +                   ntohs(loc_in.sin6_port), &rem_in.sin6_addr,
13590 +                   ntohs(rem_in.sin6_port));
13591 +
13592 +       ret = sock.ops->connect(&sock, (struct sockaddr *)&rem_in,
13593 +                               ulid_size, O_NONBLOCK);
13594 +       if (ret < 0 && ret != -EINPROGRESS) {
13595 +               mptcp_debug("%s: MPTCP subsocket connect() failed, error %d\n",
13596 +                           __func__, ret);
13597 +               goto error;
13598 +       }
13599 +
13600 +       sk_set_socket(sk, meta_sk->sk_socket);
13601 +       sk->sk_wq = meta_sk->sk_wq;
13602 +
13603 +       return 0;
13604 +
13605 +error:
13606 +       /* May happen if mptcp_add_sock fails first */
13607 +       if (!tp->mpc) {
13608 +               tcp_close(sk, 0);
13609 +       } else {
13610 +               local_bh_disable();
13611 +               mptcp_sub_force_close(sk);
13612 +               local_bh_enable();
13613 +       }
13614 +       return ret;
13615 +}
13616 +EXPORT_SYMBOL(mptcp_init6_subsockets);
13617 +
13618 +int mptcp_pm_v6_init(void)
13619 +{
13620 +       int ret = 0;
13621 +       struct request_sock_ops *ops = &mptcp6_request_sock_ops;
13622 +
13623 +       ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP6");
13624 +       if (ops->slab_name == NULL) {
13625 +               ret = -ENOMEM;
13626 +               goto out;
13627 +       }
13628 +
13629 +       ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
13630 +                                     SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
13631 +                                     NULL);
13632 +
13633 +       if (ops->slab == NULL) {
13634 +               ret =  -ENOMEM;
13635 +               goto err_reqsk_create;
13636 +       }
13637 +
13638 +out:
13639 +       return ret;
13640 +
13641 +err_reqsk_create:
13642 +       kfree(ops->slab_name);
13643 +       ops->slab_name = NULL;
13644 +       goto out;
13645 +}
13646 +
13647 +void mptcp_pm_v6_undo(void)
13648 +{
13649 +       kmem_cache_destroy(mptcp6_request_sock_ops.slab);
13650 +       kfree(mptcp6_request_sock_ops.slab_name);
13651 +}
13652 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ndiffports.c linux-3.14.45/net/mptcp/mptcp_ndiffports.c
13653 --- linux-3.14.45.orig/net/mptcp/mptcp_ndiffports.c     1970-01-01 01:00:00.000000000 +0100
13654 +++ linux-3.14.45/net/mptcp/mptcp_ndiffports.c  2015-06-24 14:15:48.931862523 +0200
13655 @@ -0,0 +1,171 @@
13656 +#include <linux/module.h>
13657 +
13658 +#include <net/mptcp.h>
13659 +#include <net/mptcp_v4.h>
13660 +
13661 +#if IS_ENABLED(CONFIG_IPV6)
13662 +#include <net/mptcp_v6.h>
13663 +#endif
13664 +
13665 +struct ndiffports_priv {
13666 +       /* Worker struct for subflow establishment */
13667 +       struct work_struct subflow_work;
13668 +
13669 +       struct mptcp_cb *mpcb;
13670 +};
13671 +
13672 +static int sysctl_mptcp_ndiffports __read_mostly = 2;
13673 +
13674 +/**
13675 + * Create all new subflows, by doing calls to mptcp_initX_subsockets
13676 + *
13677 + * This function uses a goto next_subflow, to allow releasing the lock between
13678 + * new subflows and giving other processes a chance to do some work on the
13679 + * socket and potentially finishing the communication.
13680 + **/
13681 +static void create_subflow_worker(struct work_struct *work)
13682 +{
13683 +       struct ndiffports_priv *pm_priv = container_of(work,
13684 +                                                    struct ndiffports_priv,
13685 +                                                    subflow_work);
13686 +       struct mptcp_cb *mpcb = pm_priv->mpcb;
13687 +       struct sock *meta_sk = mpcb->meta_sk;
13688 +       int iter = 0;
13689 +
13690 +next_subflow:
13691 +       if (iter) {
13692 +               release_sock(meta_sk);
13693 +               mutex_unlock(&mpcb->mpcb_mutex);
13694 +
13695 +               yield();
13696 +       }
13697 +       mutex_lock(&mpcb->mpcb_mutex);
13698 +       lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
13699 +
13700 +       iter++;
13701 +
13702 +       if (sock_flag(meta_sk, SOCK_DEAD))
13703 +               goto exit;
13704 +
13705 +       if (mpcb->master_sk &&
13706 +           !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
13707 +               goto exit;
13708 +
13709 +       if (sysctl_mptcp_ndiffports > iter &&
13710 +           sysctl_mptcp_ndiffports > mpcb->cnt_subflows) {
13711 +               if (meta_sk->sk_family == AF_INET ||
13712 +                   mptcp_v6_is_v4_mapped(meta_sk)) {
13713 +                       struct mptcp_loc4 loc;
13714 +
13715 +                       loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
13716 +                       loc.loc4_id = 0;
13717 +                       loc.low_prio = 0;
13718 +
13719 +                       mptcp_init4_subsockets(meta_sk, &loc, &mpcb->remaddr4[0]);
13720 +               } else {
13721 +#if IS_ENABLED(CONFIG_IPV6)
13722 +                       struct mptcp_loc6 loc;
13723 +
13724 +                       loc.addr = inet6_sk(meta_sk)->saddr;
13725 +                       loc.loc6_id = 0;
13726 +                       loc.low_prio = 0;
13727 +
13728 +                       mptcp_init6_subsockets(meta_sk, &loc, &mpcb->remaddr6[0]);
13729 +#endif
13730 +               }
13731 +               goto next_subflow;
13732 +       }
13733 +
13734 +exit:
13735 +       release_sock(meta_sk);
13736 +       mutex_unlock(&mpcb->mpcb_mutex);
13737 +       sock_put(meta_sk);
13738 +}
13739 +
13740 +static void ndiffports_new_session(struct sock *meta_sk, int index)
13741 +{
13742 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
13743 +       struct ndiffports_priv *fmp = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
13744 +
13745 +       /* Initialize workqueue-struct */
13746 +       INIT_WORK(&fmp->subflow_work, create_subflow_worker);
13747 +       fmp->mpcb = mpcb;
13748 +}
13749 +
13750 +static void ndiffports_create_subflows(struct sock *meta_sk)
13751 +{
13752 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
13753 +       struct ndiffports_priv *pm_priv = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
13754 +
13755 +       if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
13756 +           mpcb->send_infinite_mapping ||
13757 +           mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
13758 +               return;
13759 +
13760 +       if (!work_pending(&pm_priv->subflow_work)) {
13761 +               sock_hold(meta_sk);
13762 +               queue_work(mptcp_wq, &pm_priv->subflow_work);
13763 +       }
13764 +}
13765 +
13766 +static int ndiffports_get_local_index(sa_family_t family, union inet_addr *addr,
13767 +                                     struct net *net)
13768 +{
13769 +       return 0;
13770 +}
13771 +
13772 +static struct mptcp_pm_ops ndiffports __read_mostly = {
13773 +       .new_session = ndiffports_new_session,
13774 +       .fully_established = ndiffports_create_subflows,
13775 +       .get_local_index = ndiffports_get_local_index,
13776 +       .get_local_id = ndiffports_get_local_index,
13777 +       .name = "ndiffports",
13778 +       .owner = THIS_MODULE,
13779 +};
13780 +
13781 +static struct ctl_table ndiff_table[] = {
13782 +       {
13783 +               .procname = "mptcp_ndiffports",
13784 +               .data = &sysctl_mptcp_ndiffports,
13785 +               .maxlen = sizeof(int),
13786 +               .mode = 0644,
13787 +               .proc_handler = &proc_dointvec
13788 +       },
13789 +       { }
13790 +};
13791 +
13792 +struct ctl_table_header *mptcp_sysctl;
13793 +
13794 +/* General initialization of MPTCP_PM */
13795 +static int __init ndiffports_register(void)
13796 +{
13797 +       BUILD_BUG_ON(sizeof(struct ndiffports_priv) > MPTCP_PM_SIZE);
13798 +
13799 +       mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", ndiff_table);
13800 +       if (!mptcp_sysctl)
13801 +               goto exit;
13802 +
13803 +       if (mptcp_register_path_manager(&ndiffports))
13804 +               goto pm_failed;
13805 +
13806 +       return 0;
13807 +
13808 +pm_failed:
13809 +       unregister_net_sysctl_table(mptcp_sysctl);
13810 +exit:
13811 +       return -1;
13812 +}
13813 +
13814 +static void ndiffports_unregister(void)
13815 +{
13816 +       mptcp_unregister_path_manager(&ndiffports);
13817 +       unregister_net_sysctl_table(mptcp_sysctl);
13818 +}
13819 +
13820 +module_init(ndiffports_register);
13821 +module_exit(ndiffports_unregister);
13822 +
13823 +MODULE_AUTHOR("Christoph Paasch");
13824 +MODULE_LICENSE("GPL");
13825 +MODULE_DESCRIPTION("NDIFF-PORTS MPTCP");
13826 +MODULE_VERSION("0.88");
13827 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ofo_queue.c linux-3.14.45/net/mptcp/mptcp_ofo_queue.c
13828 --- linux-3.14.45.orig/net/mptcp/mptcp_ofo_queue.c      1970-01-01 01:00:00.000000000 +0100
13829 +++ linux-3.14.45/net/mptcp/mptcp_ofo_queue.c   2015-06-24 14:15:48.931862523 +0200
13830 @@ -0,0 +1,278 @@
13831 +/*
13832 + *     MPTCP implementation - Fast algorithm for MPTCP meta-reordering
13833 + *
13834 + *     Initial Design & Implementation:
13835 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
13836 + *
13837 + *     Current Maintainer & Author:
13838 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
13839 + *
13840 + *     Additional authors:
13841 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
13842 + *     Gregory Detal <gregory.detal@uclouvain.be>
13843 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
13844 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
13845 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
13846 + *     Andreas Ripke <ripke@neclab.eu>
13847 + *     Vlad Dogaru <vlad.dogaru@intel.com>
13848 + *     Octavian Purdila <octavian.purdila@intel.com>
13849 + *     John Ronan <jronan@tssg.org>
13850 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
13851 + *     Brandon Heller <brandonh@stanford.edu>
13852 + *
13853 + *     This program is free software; you can redistribute it and/or
13854 + *      modify it under the terms of the GNU General Public License
13855 + *      as published by the Free Software Foundation; either version
13856 + *      2 of the License, or (at your option) any later version.
13857 + */
13858 +
13859 +#include <linux/skbuff.h>
13860 +#include <linux/slab.h>
13861 +#include <net/tcp.h>
13862 +#include <net/mptcp.h>
13863 +
13864 +void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
13865 +                           const struct sk_buff *skb)
13866 +{
13867 +       struct tcp_sock *tp;
13868 +
13869 +       mptcp_for_each_tp(mpcb, tp) {
13870 +               if (tp->mptcp->shortcut_ofoqueue == skb) {
13871 +                       tp->mptcp->shortcut_ofoqueue = NULL;
13872 +                       return;
13873 +               }
13874 +       }
13875 +}
13876 +
13877 +/* Does 'skb' fits after 'here' in the queue 'head' ?
13878 + * If yes, we queue it and return 1
13879 + */
13880 +static int mptcp_ofo_queue_after(struct sk_buff_head *head,
13881 +                                struct sk_buff *skb, struct sk_buff *here,
13882 +                                struct tcp_sock *tp)
13883 +{
13884 +       struct sock *meta_sk = tp->meta_sk;
13885 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
13886 +       u32 seq = TCP_SKB_CB(skb)->seq;
13887 +       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
13888 +
13889 +       /* We want to queue skb after here, thus seq >= end_seq */
13890 +       if (before(seq, TCP_SKB_CB(here)->end_seq))
13891 +               return 0;
13892 +
13893 +       if (seq == TCP_SKB_CB(here)->end_seq) {
13894 +               bool fragstolen = false;
13895 +
13896 +               if (!tcp_try_coalesce(meta_sk, here, skb, &fragstolen)) {
13897 +                       __skb_queue_after(&meta_tp->out_of_order_queue, here, skb);
13898 +                       return 1;
13899 +               } else {
13900 +                       kfree_skb_partial(skb, fragstolen);
13901 +                       return -1;
13902 +               }
13903 +       }
13904 +
13905 +       /* If here is the last one, we can always queue it */
13906 +       if (skb_queue_is_last(head, here)) {
13907 +               __skb_queue_after(head, here, skb);
13908 +               return 1;
13909 +       } else {
13910 +               struct sk_buff *skb1 = skb_queue_next(head, here);
13911 +               /* It's not the last one, but does it fits between 'here' and
13912 +                * the one after 'here' ? Thus, does end_seq <= after_here->seq
13913 +                */
13914 +               if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) {
13915 +                       __skb_queue_after(head, here, skb);
13916 +                       return 1;
13917 +               }
13918 +       }
13919 +
13920 +       return 0;
13921 +}
13922 +
13923 +static void try_shortcut(struct sk_buff *shortcut, struct sk_buff *skb,
13924 +                        struct sk_buff_head *head, struct tcp_sock *tp)
13925 +{
13926 +       struct sock *meta_sk = tp->meta_sk;
13927 +       struct tcp_sock *tp_it, *meta_tp = tcp_sk(meta_sk);
13928 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
13929 +       struct sk_buff *skb1, *best_shortcut = NULL;
13930 +       u32 seq = TCP_SKB_CB(skb)->seq;
13931 +       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
13932 +       u32 distance = 0xffffffff;
13933 +
13934 +       /* First, check the tp's shortcut */
13935 +       if (!shortcut) {
13936 +               if (skb_queue_empty(head)) {
13937 +                       __skb_queue_head(head, skb);
13938 +                       goto end;
13939 +               }
13940 +       } else {
13941 +               int ret = mptcp_ofo_queue_after(head, skb, shortcut, tp);
13942 +               /* Does the tp's shortcut is a hit? If yes, we insert. */
13943 +
13944 +               if (ret) {
13945 +                       skb = (ret > 0) ? skb : NULL;
13946 +                       goto end;
13947 +               }
13948 +       }
13949 +
13950 +       /* Check the shortcuts of the other subsockets. */
13951 +       mptcp_for_each_tp(mpcb, tp_it) {
13952 +               shortcut = tp_it->mptcp->shortcut_ofoqueue;
13953 +               /* Can we queue it here? If yes, do so! */
13954 +               if (shortcut) {
13955 +                       int ret = mptcp_ofo_queue_after(head, skb, shortcut, tp);
13956 +
13957 +                       if (ret) {
13958 +                               skb = (ret > 0) ? skb : NULL;
13959 +                               goto end;
13960 +                       }
13961 +               }
13962 +
13963 +               /* Could not queue it, check if we are close.
13964 +                * We are looking for a shortcut, close enough to seq to
13965 +                * set skb1 prematurely and thus improve the subsequent lookup,
13966 +                * which tries to find a skb1 so that skb1->seq <= seq.
13967 +                *
13968 +                * So, here we only take shortcuts, whose shortcut->seq > seq,
13969 +                * and minimize the distance between shortcut->seq and seq and
13970 +                * set best_shortcut to this one with the minimal distance.
13971 +                *
13972 +                * That way, the subsequent while-loop is shortest.
13973 +                */
13974 +               if (shortcut && after(TCP_SKB_CB(shortcut)->seq, seq)) {
13975 +                       /* Are we closer than the current best shortcut? */
13976 +                       if ((u32)(TCP_SKB_CB(shortcut)->seq - seq) < distance) {
13977 +                               distance = (u32)(TCP_SKB_CB(shortcut)->seq - seq);
13978 +                               best_shortcut = shortcut;
13979 +                       }
13980 +               }
13981 +       }
13982 +
13983 +       if (best_shortcut)
13984 +               skb1 = best_shortcut;
13985 +       else
13986 +               skb1 = skb_peek_tail(head);
13987 +
13988 +       if (seq == TCP_SKB_CB(skb1)->end_seq) {
13989 +               bool fragstolen = false;
13990 +
13991 +               if (!tcp_try_coalesce(meta_sk, skb1, skb, &fragstolen)) {
13992 +                       __skb_queue_after(&meta_tp->out_of_order_queue, skb1, skb);
13993 +               } else {
13994 +                       kfree_skb_partial(skb, fragstolen);
13995 +                       skb = NULL;
13996 +               }
13997 +
13998 +               goto end;
13999 +       }
14000 +
14001 +       /* Find the insertion point, starting from best_shortcut if available.
14002 +        *
14003 +        * Inspired from tcp_data_queue_ofo.
14004 +        */
14005 +       while (1) {
14006 +               /* skb1->seq <= seq */
14007 +               if (!after(TCP_SKB_CB(skb1)->seq, seq))
14008 +                       break;
14009 +               if (skb_queue_is_first(head, skb1)) {
14010 +                       skb1 = NULL;
14011 +                       break;
14012 +               }
14013 +               skb1 = skb_queue_prev(head, skb1);
14014 +       }
14015 +
14016 +       /* Do skb overlap to previous one? */
14017 +       if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
14018 +               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
14019 +                       /* All the bits are present. */
14020 +                       __kfree_skb(skb);
14021 +                       skb = NULL;
14022 +                       goto end;
14023 +               }
14024 +               if (seq == TCP_SKB_CB(skb1)->seq) {
14025 +                       if (skb_queue_is_first(head, skb1))
14026 +                               skb1 = NULL;
14027 +                       else
14028 +                               skb1 = skb_queue_prev(head, skb1);
14029 +               }
14030 +       }
14031 +       if (!skb1)
14032 +               __skb_queue_head(head, skb);
14033 +       else
14034 +               __skb_queue_after(head, skb1, skb);
14035 +
14036 +       /* And clean segments covered by new one as whole. */
14037 +       while (!skb_queue_is_last(head, skb)) {
14038 +               skb1 = skb_queue_next(head, skb);
14039 +
14040 +               if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
14041 +                       break;
14042 +
14043 +               __skb_unlink(skb1, head);
14044 +               mptcp_remove_shortcuts(mpcb, skb1);
14045 +               __kfree_skb(skb1);
14046 +       }
14047 +
14048 +end:
14049 +       if (skb) {
14050 +               skb_set_owner_r(skb, meta_sk);
14051 +               tp->mptcp->shortcut_ofoqueue = skb;
14052 +       }
14053 +
14054 +       return;
14055 +}
14056 +
14057 +/**
14058 + * @sk: the subflow that received this skb.
14059 + */
14060 +void mptcp_add_meta_ofo_queue(struct sock *meta_sk, struct sk_buff *skb,
14061 +                             struct sock *sk)
14062 +{
14063 +       struct tcp_sock *tp = tcp_sk(sk);
14064 +
14065 +       try_shortcut(tp->mptcp->shortcut_ofoqueue, skb,
14066 +                    &tcp_sk(meta_sk)->out_of_order_queue, tp);
14067 +}
14068 +
14069 +void mptcp_ofo_queue(struct sock *meta_sk)
14070 +{
14071 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
14072 +       struct sk_buff *skb;
14073 +
14074 +       while ((skb = skb_peek(&meta_tp->out_of_order_queue)) != NULL) {
14075 +               u32 old_rcv_nxt = meta_tp->rcv_nxt;
14076 +               if (after(TCP_SKB_CB(skb)->seq, meta_tp->rcv_nxt))
14077 +                       break;
14078 +
14079 +               if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->rcv_nxt)) {
14080 +                       __skb_unlink(skb, &meta_tp->out_of_order_queue);
14081 +                       mptcp_remove_shortcuts(meta_tp->mpcb, skb);
14082 +                       __kfree_skb(skb);
14083 +                       continue;
14084 +               }
14085 +
14086 +               __skb_unlink(skb, &meta_tp->out_of_order_queue);
14087 +               mptcp_remove_shortcuts(meta_tp->mpcb, skb);
14088 +
14089 +               __skb_queue_tail(&meta_sk->sk_receive_queue, skb);
14090 +               meta_tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
14091 +               mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt);
14092 +
14093 +               if (tcp_hdr(skb)->fin)
14094 +                       mptcp_fin(meta_sk);
14095 +       }
14096 +}
14097 +
14098 +void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp)
14099 +{
14100 +       struct sk_buff_head *head = &meta_tp->out_of_order_queue;
14101 +       struct sk_buff *skb, *tmp;
14102 +
14103 +       skb_queue_walk_safe(head, skb, tmp) {
14104 +               __skb_unlink(skb, head);
14105 +               mptcp_remove_shortcuts(meta_tp->mpcb, skb);
14106 +               kfree_skb(skb);
14107 +       }
14108 +}
14109 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_olia.c linux-3.14.45/net/mptcp/mptcp_olia.c
14110 --- linux-3.14.45.orig/net/mptcp/mptcp_olia.c   1970-01-01 01:00:00.000000000 +0100
14111 +++ linux-3.14.45/net/mptcp/mptcp_olia.c        2015-06-24 14:15:48.931862523 +0200
14112 @@ -0,0 +1,314 @@
14113 +/*
14114 + * MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL:
14115 + *
14116 + * Algorithm design:
14117 + * Ramin Khalili <ramin.khalili@epfl.ch>
14118 + * Nicolas Gast <nicolas.gast@epfl.ch>
14119 + * Jean-Yves Le Boudec <jean-yves.leboudec@epfl.ch>
14120 + *
14121 + * Implementation:
14122 + * Ramin Khalili <ramin.khalili@epfl.ch>
14123 + *
14124 + * Ported to the official MPTCP-kernel:
14125 + * Christoph Paasch <christoph.paasch@uclouvain.be>
14126 + *
14127 + * This program is free software; you can redistribute it and/or
14128 + * modify it under the terms of the GNU General Public License
14129 + * as published by the Free Software Foundation; either version
14130 + * 2 of the License, or (at your option) any later version.
14131 + */
14132 +
14133 +
14134 +#include <net/tcp.h>
14135 +#include <net/mptcp.h>
14136 +
14137 +#include <linux/module.h>
14138 +
14139 +static int scale = 10;
14140 +
14141 +struct mptcp_olia {
14142 +       u32     mptcp_loss1;
14143 +       u32     mptcp_loss2;
14144 +       u32     mptcp_loss3;
14145 +       int     epsilon_num;
14146 +       u32     epsilon_den;
14147 +       int     mptcp_snd_cwnd_cnt;
14148 +};
14149 +
14150 +static inline int mptcp_olia_sk_can_send(const struct sock *sk)
14151 +{
14152 +       return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt;
14153 +}
14154 +
14155 +static inline u64 mptcp_olia_scale(u64 val, int scale)
14156 +{
14157 +       return (u64) val << scale;
14158 +}
14159 +
14160 +/* take care of artificially inflate (see RFC5681)
14161 + * of cwnd during fast-retransmit phase
14162 + */
14163 +static u32 mptcp_get_crt_cwnd(struct sock *sk)
14164 +{
14165 +       struct inet_connection_sock *icsk = inet_csk(sk);
14166 +
14167 +       if (icsk->icsk_ca_state == TCP_CA_Recovery)
14168 +               return tcp_sk(sk)->snd_ssthresh;
14169 +       else
14170 +               return tcp_sk(sk)->snd_cwnd;
14171 +}
14172 +
14173 +/* return the dominator of the first term of  the increasing term */
14174 +static u64 mptcp_get_rate(struct mptcp_cb *mpcb , u32 path_rtt)
14175 +{
14176 +       struct sock *sk;
14177 +       u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */
14178 +
14179 +       mptcp_for_each_sk(mpcb, sk) {
14180 +               struct tcp_sock *tp = tcp_sk(sk);
14181 +               u64 scaled_num;
14182 +               u32 tmp_cwnd;
14183 +
14184 +               if (!mptcp_olia_sk_can_send(sk))
14185 +                       continue;
14186 +
14187 +               tmp_cwnd = mptcp_get_crt_cwnd(sk);
14188 +               scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt;
14189 +               rate += div_u64(scaled_num , tp->srtt);
14190 +       }
14191 +       rate *= rate;
14192 +       return rate;
14193 +}
14194 +
14195 +/* find the maximum cwnd, used to find set M */
14196 +static u32 mptcp_get_max_cwnd(struct mptcp_cb *mpcb)
14197 +{
14198 +       struct sock *sk;
14199 +       u32 best_cwnd = 0;
14200 +
14201 +       mptcp_for_each_sk(mpcb, sk) {
14202 +               u32 tmp_cwnd;
14203 +
14204 +               if (!mptcp_olia_sk_can_send(sk))
14205 +                       continue;
14206 +
14207 +               tmp_cwnd = mptcp_get_crt_cwnd(sk);
14208 +               if (tmp_cwnd > best_cwnd)
14209 +                       best_cwnd = tmp_cwnd;
14210 +       }
14211 +       return best_cwnd;
14212 +}
14213 +
14214 +static void mptcp_get_epsilon(struct mptcp_cb *mpcb)
14215 +{
14216 +       struct mptcp_olia *ca;
14217 +       struct tcp_sock *tp;
14218 +       struct sock *sk;
14219 +       u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1;
14220 +       u32 max_cwnd = 1, best_cwnd = 1, tmp_cwnd;
14221 +       u8 M = 0, B_not_M = 0;
14222 +
14223 +       /* TODO - integrate this in the following loop - we just want to iterate once */
14224 +
14225 +       max_cwnd = mptcp_get_max_cwnd(mpcb);
14226 +
14227 +       /* find the best path */
14228 +       mptcp_for_each_sk(mpcb, sk) {
14229 +               tp = tcp_sk(sk);
14230 +               ca = inet_csk_ca(sk);
14231 +
14232 +               if (!mptcp_olia_sk_can_send(sk))
14233 +                       continue;
14234 +
14235 +               tmp_rtt = tp->srtt * tp->srtt;
14236 +               /* TODO - check here and rename variables */
14237 +               tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
14238 +                             ca->mptcp_loss2 - ca->mptcp_loss1);
14239 +
14240 +               tmp_cwnd = mptcp_get_crt_cwnd(sk);
14241 +               if (tmp_int * best_rtt >= best_int * tmp_rtt) {
14242 +                       best_rtt = tmp_rtt;
14243 +                       best_int = tmp_int;
14244 +                       best_cwnd = tmp_cwnd;
14245 +               }
14246 +       }
14247 +
14248 +       /* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */
14249 +       /* find the size of M and B_not_M */
14250 +       mptcp_for_each_sk(mpcb, sk) {
14251 +               tp = tcp_sk(sk);
14252 +               ca = inet_csk_ca(sk);
14253 +
14254 +               if (!mptcp_olia_sk_can_send(sk))
14255 +                       continue;
14256 +
14257 +               tmp_cwnd = mptcp_get_crt_cwnd(sk);
14258 +               if (tmp_cwnd == max_cwnd) {
14259 +                       M++;
14260 +               } else {
14261 +                       tmp_rtt = tp->srtt * tp->srtt;
14262 +                       tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
14263 +                                     ca->mptcp_loss2 - ca->mptcp_loss1);
14264 +
14265 +                       if (tmp_int * best_rtt == best_int * tmp_rtt)
14266 +                               B_not_M++;
14267 +               }
14268 +       }
14269 +
14270 +       /* check if the path is in M or B_not_M and set the value of epsilon accordingly */
14271 +       mptcp_for_each_sk(mpcb, sk) {
14272 +               tp = tcp_sk(sk);
14273 +               ca = inet_csk_ca(sk);
14274 +
14275 +               if (!mptcp_olia_sk_can_send(sk))
14276 +                       continue;
14277 +
14278 +               if (B_not_M == 0) {
14279 +                       ca->epsilon_num = 0;
14280 +                       ca->epsilon_den = 1;
14281 +               } else {
14282 +                       tmp_rtt = tp->srtt * tp->srtt;
14283 +                       tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
14284 +                                     ca->mptcp_loss2 - ca->mptcp_loss1);
14285 +                       tmp_cwnd = mptcp_get_crt_cwnd(sk);
14286 +
14287 +                       if (tmp_cwnd < max_cwnd &&
14288 +                           tmp_int * best_rtt == best_int * tmp_rtt){
14289 +                               ca->epsilon_num = 1;
14290 +                               ca->epsilon_den = mpcb->cnt_established * B_not_M;
14291 +                       } else if (tmp_cwnd == max_cwnd) {
14292 +                               ca->epsilon_num = -1;
14293 +                               ca->epsilon_den = mpcb->cnt_established  * M;
14294 +                       } else {
14295 +                               ca->epsilon_num = 0;
14296 +                               ca->epsilon_den = 1;
14297 +                       }
14298 +               }
14299 +       }
14300 +
14301 +}
14302 +
14303 +/* setting the initial values */
14304 +static void mptcp_olia_init(struct sock *sk)
14305 +{
14306 +       struct tcp_sock *tp = tcp_sk(sk);
14307 +       struct mptcp_olia *ca = inet_csk_ca(sk);
14308 +
14309 +       if (tp->mpc) {
14310 +               ca->mptcp_loss1 = tp->snd_una;
14311 +               ca->mptcp_loss2 = tp->snd_una;
14312 +               ca->mptcp_loss3 = tp->snd_una;
14313 +               ca->mptcp_snd_cwnd_cnt = 0;
14314 +               ca->epsilon_num = 0;
14315 +               ca->epsilon_den = 1;
14316 +       }
14317 +}
14318 +
14319 +/* updating inter-loss distance and ssthresh */
14320 +static void mptcp_olia_set_state(struct sock *sk, u8 new_state)
14321 +{
14322 +       if (!tcp_sk(sk)->mpc)
14323 +               return;
14324 +
14325 +       if (new_state == TCP_CA_Loss ||
14326 +           new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) {
14327 +               struct mptcp_olia *ca = inet_csk_ca(sk);
14328 +
14329 +               if (ca->mptcp_loss3 != ca->mptcp_loss2 &&
14330 +                   !inet_csk(sk)->icsk_retransmits) {
14331 +                       ca->mptcp_loss1 = ca->mptcp_loss2;
14332 +                       ca->mptcp_loss2 = ca->mptcp_loss3;
14333 +               }
14334 +       }
14335 +
14336 +}
14337 +
14338 +/* main algorithm */
14339 +static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
14340 +{
14341 +       struct tcp_sock *tp = tcp_sk(sk);
14342 +       struct mptcp_olia *ca = inet_csk_ca(sk);
14343 +       struct mptcp_cb *mpcb = tp->mpcb;
14344 +
14345 +       u64 inc_num, inc_den, rate, cwnd_scaled;
14346 +
14347 +       if (!tp->mpc) {
14348 +               tcp_reno_cong_avoid(sk, ack, acked, in_flight);
14349 +               return;
14350 +       }
14351 +
14352 +       ca->mptcp_loss3 = tp->snd_una;
14353 +
14354 +       if (!tcp_is_cwnd_limited(sk, in_flight))
14355 +               return;
14356 +
14357 +       /* slow start if it is in the safe area */
14358 +       if (tp->snd_cwnd <= tp->snd_ssthresh) {
14359 +               tcp_slow_start(tp, acked);
14360 +               return;
14361 +       }
14362 +
14363 +       mptcp_get_epsilon(mpcb);
14364 +       rate = mptcp_get_rate(mpcb, tp->srtt);
14365 +       cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale);
14366 +       inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1;
14367 +
14368 +       /* calculate the increasing term, scaling is used to reduce the rounding effect */
14369 +       if (ca->epsilon_num == -1) {
14370 +               if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) {
14371 +                       inc_num = rate - ca->epsilon_den *
14372 +                               cwnd_scaled * cwnd_scaled;
14373 +                       ca->mptcp_snd_cwnd_cnt -= div64_u64(
14374 +                           mptcp_olia_scale(inc_num , scale) , inc_den);
14375 +               } else {
14376 +                       inc_num = ca->epsilon_den *
14377 +                           cwnd_scaled * cwnd_scaled - rate;
14378 +                       ca->mptcp_snd_cwnd_cnt += div64_u64(
14379 +                           mptcp_olia_scale(inc_num , scale) , inc_den);
14380 +               }
14381 +       } else {
14382 +               inc_num = ca->epsilon_num * rate +
14383 +                   ca->epsilon_den * cwnd_scaled * cwnd_scaled;
14384 +               ca->mptcp_snd_cwnd_cnt += div64_u64(
14385 +                   mptcp_olia_scale(inc_num , scale) , inc_den);
14386 +       }
14387 +
14388 +
14389 +       if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) {
14390 +               if (tp->snd_cwnd < tp->snd_cwnd_clamp)
14391 +                       tp->snd_cwnd++;
14392 +               ca->mptcp_snd_cwnd_cnt = 0;
14393 +       } else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) {
14394 +               tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1);
14395 +               ca->mptcp_snd_cwnd_cnt = 0;
14396 +       }
14397 +}
14398 +
14399 +static struct tcp_congestion_ops mptcp_olia = {
14400 +       .init           = mptcp_olia_init,
14401 +       .ssthresh       = tcp_reno_ssthresh,
14402 +       .cong_avoid     = mptcp_olia_cong_avoid,
14403 +       .set_state      = mptcp_olia_set_state,
14404 +       .min_cwnd       = tcp_reno_min_cwnd,
14405 +       .owner          = THIS_MODULE,
14406 +       .name           = "olia",
14407 +};
14408 +
14409 +static int __init mptcp_olia_register(void)
14410 +{
14411 +       BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE);
14412 +       return tcp_register_congestion_control(&mptcp_olia);
14413 +}
14414 +
14415 +static void __exit mptcp_olia_unregister(void)
14416 +{
14417 +       tcp_unregister_congestion_control(&mptcp_olia);
14418 +}
14419 +
14420 +module_init(mptcp_olia_register);
14421 +module_exit(mptcp_olia_unregister);
14422 +
14423 +MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec");
14424 +MODULE_LICENSE("GPL");
14425 +MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
14426 +MODULE_VERSION("0.1");
14427 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_output.c linux-3.14.45/net/mptcp/mptcp_output.c
14428 --- linux-3.14.45.orig/net/mptcp/mptcp_output.c 1970-01-01 01:00:00.000000000 +0100
14429 +++ linux-3.14.45/net/mptcp/mptcp_output.c      2015-06-24 14:15:48.931862523 +0200
14430 @@ -0,0 +1,2255 @@
14431 +/*
14432 + *     MPTCP implementation - Sending side
14433 + *
14434 + *     Initial Design & Implementation:
14435 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
14436 + *
14437 + *     Current Maintainer & Author:
14438 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
14439 + *
14440 + *     Additional authors:
14441 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
14442 + *     Gregory Detal <gregory.detal@uclouvain.be>
14443 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
14444 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
14445 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
14446 + *     Andreas Ripke <ripke@neclab.eu>
14447 + *     Vlad Dogaru <vlad.dogaru@intel.com>
14448 + *     Octavian Purdila <octavian.purdila@intel.com>
14449 + *     John Ronan <jronan@tssg.org>
14450 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
14451 + *     Brandon Heller <brandonh@stanford.edu>
14452 + *
14453 + *
14454 + *     This program is free software; you can redistribute it and/or
14455 + *      modify it under the terms of the GNU General Public License
14456 + *      as published by the Free Software Foundation; either version
14457 + *      2 of the License, or (at your option) any later version.
14458 + */
14459 +
14460 +#include <linux/kconfig.h>
14461 +#include <linux/skbuff.h>
14462 +#include <linux/tcp.h>
14463 +
14464 +#include <net/mptcp.h>
14465 +#include <net/mptcp_v4.h>
14466 +#include <net/mptcp_v6.h>
14467 +#include <net/sock.h>
14468 +
14469 +static inline int mptcp_pi_to_flag(int pi)
14470 +{
14471 +       return 1 << (pi - 1);
14472 +}
14473 +
14474 +static inline int mptcp_sub_len_remove_addr(u16 bitfield)
14475 +{
14476 +       unsigned int c;
14477 +       for (c = 0; bitfield; c++)
14478 +               bitfield &= bitfield - 1;
14479 +       return MPTCP_SUB_LEN_REMOVE_ADDR + c - 1;
14480 +}
14481 +
14482 +int mptcp_sub_len_remove_addr_align(u16 bitfield)
14483 +{
14484 +       return ALIGN(mptcp_sub_len_remove_addr(bitfield), 4);
14485 +}
14486 +EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
14487 +
14488 +/* If the sub-socket sk available to send the skb? */
14489 +static int mptcp_is_available(struct sock *sk, struct sk_buff *skb,
14490 +                             unsigned int *mss)
14491 +{
14492 +       struct tcp_sock *tp = tcp_sk(sk);
14493 +       unsigned int mss_now;
14494 +
14495 +       /* Set of states for which we are allowed to send data */
14496 +       if (!mptcp_sk_can_send(sk))
14497 +               return 0;
14498 +
14499 +       /* We do not send data on this subflow unless it is
14500 +        * fully established, i.e. the 4th ack has been received.
14501 +        */
14502 +       if (tp->mptcp->pre_established)
14503 +               return 0;
14504 +
14505 +       if (tp->pf ||
14506 +           (tp->mpcb->noneligible & mptcp_pi_to_flag(tp->mptcp->path_index)))
14507 +               return 0;
14508 +
14509 +       if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
14510 +               /* If SACK is disabled, and we got a loss, TCP does not exit
14511 +                * the loss-state until something above high_seq has been acked.
14512 +                * (see tcp_try_undo_recovery)
14513 +                *
14514 +                * high_seq is the snd_nxt at the moment of the RTO. As soon
14515 +                * as we have an RTO, we won't push data on the subflow.
14516 +                * Thus, snd_una can never go beyond high_seq.
14517 +                */
14518 +               if (!tcp_is_reno(tp))
14519 +                       return 0;
14520 +               else if (tp->snd_una != tp->high_seq)
14521 +                       return 0;
14522 +       }
14523 +
14524 +       if (!tp->mptcp->fully_established) {
14525 +               /* Make sure that we send in-order data */
14526 +               if (skb && tp->mptcp->second_packet &&
14527 +                   tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
14528 +                       return 0;
14529 +       }
14530 +
14531 +       if (!tcp_cwnd_test(tp, skb))
14532 +               return 0;
14533 +
14534 +       mss_now = tcp_current_mss(sk);
14535 +       /* Don't send on this subflow if we bypass the allowed send-window at
14536 +        * the per-subflow level. Similar to tcp_snd_wnd_test, but manually
14537 +        * calculated end_seq (because here at this point end_seq is still at
14538 +        * the meta-level).
14539 +        */
14540 +       if (skb && after(tp->write_seq + min(skb->len, mss_now), tcp_wnd_end(tp)))
14541 +               return 0;
14542 +
14543 +       if (mss)
14544 +               *mss = mss_now;
14545 +
14546 +       return 1;
14547 +}
14548 +
14549 +/* Are we not allowed to reinject this skb on tp? */
14550 +static int mptcp_dont_reinject_skb(struct tcp_sock *tp, struct sk_buff *skb)
14551 +{
14552 +       /* If the skb has already been enqueued in this sk, try to find
14553 +        * another one.
14554 +        */
14555 +       return skb &&
14556 +               /* Has the skb already been enqueued into this subsocket? */
14557 +               mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
14558 +}
14559 +
14560 +/* This is the scheduler. This function decides on which flow to send
14561 + * a given MSS. If all subflows are found to be busy, NULL is returned
14562 + * The flow is selected based on the shortest RTT.
14563 + * If all paths have full cong windows, we simply return NULL.
14564 + *
14565 + * Additionally, this function is aware of the backup-subflows.
14566 + */
14567 +static struct sock *get_available_subflow(struct sock *meta_sk,
14568 +                                         struct sk_buff *skb,
14569 +                                         unsigned int *mss_now)
14570 +{
14571 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
14572 +       struct sock *sk, *bestsk = NULL, *lowpriosk = NULL, *backupsk = NULL;
14573 +       unsigned int mss = 0, mss_lowprio = 0, mss_backup = 0;
14574 +       u32 min_time_to_peer = 0xffffffff, lowprio_min_time_to_peer = 0xffffffff;
14575 +       int cnt_backups = 0;
14576 +
14577 +       /* if there is only one subflow, bypass the scheduling function */
14578 +       if (mpcb->cnt_subflows == 1) {
14579 +               bestsk = (struct sock *)mpcb->connection_list;
14580 +               if (!mptcp_is_available(bestsk, skb, mss_now))
14581 +                       bestsk = NULL;
14582 +               return bestsk;
14583 +       }
14584 +
14585 +       /* Answer data_fin on same subflow!!! */
14586 +       if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
14587 +           skb && mptcp_is_data_fin(skb)) {
14588 +               mptcp_for_each_sk(mpcb, sk) {
14589 +                       if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
14590 +                           mptcp_is_available(sk, skb, mss_now))
14591 +                               return sk;
14592 +               }
14593 +       }
14594 +
14595 +       /* First, find the best subflow */
14596 +       mptcp_for_each_sk(mpcb, sk) {
14597 +               struct tcp_sock *tp = tcp_sk(sk);
14598 +               int this_mss;
14599 +
14600 +               if (tp->mptcp->rcv_low_prio || tp->mptcp->low_prio)
14601 +                       cnt_backups++;
14602 +
14603 +               if ((tp->mptcp->rcv_low_prio || tp->mptcp->low_prio) &&
14604 +                   tp->srtt < lowprio_min_time_to_peer) {
14605 +
14606 +                       if (!mptcp_is_available(sk, skb, &this_mss))
14607 +                               continue;
14608 +
14609 +                       if (mptcp_dont_reinject_skb(tp, skb)) {
14610 +                               mss_backup = this_mss;
14611 +                               backupsk = sk;
14612 +                               continue;
14613 +                       }
14614 +
14615 +                       lowprio_min_time_to_peer = tp->srtt;
14616 +                       lowpriosk = sk;
14617 +                       mss_lowprio = this_mss;
14618 +               } else if (!(tp->mptcp->rcv_low_prio || tp->mptcp->low_prio) &&
14619 +                          tp->srtt < min_time_to_peer) {
14620 +                       if (!mptcp_is_available(sk, skb, &this_mss))
14621 +                               continue;
14622 +
14623 +                       if (mptcp_dont_reinject_skb(tp, skb)) {
14624 +                               mss_backup = this_mss;
14625 +                               backupsk = sk;
14626 +                               continue;
14627 +                       }
14628 +
14629 +                       min_time_to_peer = tp->srtt;
14630 +                       bestsk = sk;
14631 +                       mss = this_mss;
14632 +               }
14633 +       }
14634 +
14635 +       if (mpcb->cnt_established == cnt_backups && lowpriosk) {
14636 +               mss = mss_lowprio;
14637 +               sk = lowpriosk;
14638 +       } else if (bestsk) {
14639 +               sk = bestsk;
14640 +       } else if (backupsk){
14641 +               /* It has been sent on all subflows once - let's give it a
14642 +                * chance again by restarting its pathmask.
14643 +                */
14644 +               if (skb)
14645 +                       TCP_SKB_CB(skb)->path_mask = 0;
14646 +               mss = mss_backup;
14647 +               sk = backupsk;
14648 +       }
14649 +
14650 +       if (mss_now)
14651 +               *mss_now = mss;
14652 +
14653 +       return sk;
14654 +}
14655 +
14656 +static struct mp_dss *mptcp_skb_find_dss(const struct sk_buff *skb)
14657 +{
14658 +       if (!mptcp_is_data_seq(skb))
14659 +               return NULL;
14660 +
14661 +       return (struct mp_dss *)(skb->data - (MPTCP_SUB_LEN_DSS_ALIGN +
14662 +                                             MPTCP_SUB_LEN_ACK_ALIGN +
14663 +                                             MPTCP_SUB_LEN_SEQ_ALIGN));
14664 +}
14665 +
14666 +/* get the data-seq and end-data-seq and store them again in the
14667 + * tcp_skb_cb
14668 + */
14669 +static int mptcp_reconstruct_mapping(struct sk_buff *skb, struct sk_buff *orig_skb)
14670 +{
14671 +       struct mp_dss *mpdss = mptcp_skb_find_dss(orig_skb);
14672 +       u32 *p32;
14673 +       u16 *p16;
14674 +
14675 +       if (!mpdss || !mpdss->M)
14676 +               return 1;
14677 +
14678 +       /* Move the pointer to the data-seq */
14679 +       p32 = (u32 *)mpdss;
14680 +       p32++;
14681 +       if (mpdss->A) {
14682 +               p32++;
14683 +               if (mpdss->a)
14684 +                       p32++;
14685 +       }
14686 +
14687 +       TCP_SKB_CB(skb)->seq = ntohl(*p32);
14688 +
14689 +       /* Get the data_len to calculate the end_data_seq */
14690 +       p32++;
14691 +       p32++;
14692 +       p16 = (u16 *)p32;
14693 +       TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
14694 +
14695 +       return 0;
14696 +}
14697 +
14698 +/* Similar to __pskb_copy and sk_stream_alloc_skb. */
14699 +static struct sk_buff *mptcp_pskb_copy(struct sk_buff *skb)
14700 +{
14701 +       struct sk_buff *n;
14702 +       /* The TCP header must be at least 32-bit aligned.  */
14703 +       int size = ALIGN(skb_headlen(skb), 4);
14704 +
14705 +       n = alloc_skb_fclone(size + MAX_TCP_HEADER, GFP_ATOMIC);
14706 +       if (!n)
14707 +               return NULL;
14708 +
14709 +       /* Set the data pointer */
14710 +       skb_reserve(n, MAX_TCP_HEADER);
14711 +       /* Set the tail pointer and length */
14712 +       skb_put(n, skb_headlen(skb));
14713 +       /* Copy the bytes */
14714 +       skb_copy_from_linear_data(skb, n->data, n->len);
14715 +
14716 +       n->truesize += skb->data_len;
14717 +       n->data_len  = skb->data_len;
14718 +       n->len       = skb->len;
14719 +
14720 +       if (skb_shinfo(skb)->nr_frags) {
14721 +               int i;
14722 +
14723 +               if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
14724 +                       if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
14725 +                               kfree_skb(n);
14726 +                               n = NULL;
14727 +                               goto out;
14728 +                       }
14729 +               }
14730 +               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
14731 +                       skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
14732 +                       skb_frag_ref(skb, i);
14733 +               }
14734 +               skb_shinfo(n)->nr_frags = i;
14735 +       }
14736 +
14737 +       if (skb_has_frag_list(skb)) {
14738 +               skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
14739 +               skb_clone_fraglist(n);
14740 +       }
14741 +
14742 +       copy_skb_header(n, skb);
14743 +out:
14744 +       return n;
14745 +}
14746 +
14747 +/* Reinject data from one TCP subflow to the meta_sk. If sk == NULL, we are
14748 + * coming from the meta-retransmit-timer
14749 + */
14750 +static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk,
14751 +                                 struct sock *sk, int clone_it)
14752 +{
14753 +       struct sk_buff *skb, *skb1;
14754 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
14755 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
14756 +       u32 seq, end_seq;
14757 +
14758 +       if (clone_it) {
14759 +               /* pskb_copy is necessary here, because the TCP/IP-headers
14760 +                * will be changed when it's going to be reinjected on another
14761 +                * subflow.
14762 +                */
14763 +               skb = mptcp_pskb_copy(orig_skb);
14764 +       } else {
14765 +               __skb_unlink(orig_skb, &sk->sk_write_queue);
14766 +               sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
14767 +               sk->sk_wmem_queued -= orig_skb->truesize;
14768 +               sk_mem_uncharge(sk, orig_skb->truesize);
14769 +               skb = orig_skb;
14770 +       }
14771 +       if (unlikely(!skb))
14772 +               return;
14773 +
14774 +       if (sk && mptcp_reconstruct_mapping(skb, orig_skb)) {
14775 +               __kfree_skb(skb);
14776 +               return;
14777 +       }
14778 +
14779 +       skb->sk = meta_sk;
14780 +
14781 +       /* If it reached already the destination, we don't have to reinject it */
14782 +       if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) {
14783 +               __kfree_skb(skb);
14784 +               return;
14785 +       }
14786 +
14787 +       /* Only reinject segments that are fully covered by the mapping */
14788 +       if (skb->len + (mptcp_is_data_fin(skb) ? 1 : 0) !=
14789 +           TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
14790 +               u32 seq = TCP_SKB_CB(skb)->seq;
14791 +               u32 end_seq = TCP_SKB_CB(skb)->end_seq;
14792 +
14793 +               __kfree_skb(skb);
14794 +
14795 +               /* Ok, now we have to look for the full mapping in the meta
14796 +                * send-queue :S
14797 +                */
14798 +               tcp_for_write_queue(skb, meta_sk) {
14799 +                       /* Not yet at the mapping? */
14800 +                       if (before(TCP_SKB_CB(skb)->seq, seq))
14801 +                               continue;
14802 +                       /* We have passed by the mapping */
14803 +                       if (after(TCP_SKB_CB(skb)->end_seq, end_seq))
14804 +                               return;
14805 +
14806 +                       __mptcp_reinject_data(skb, meta_sk, NULL, 1);
14807 +               }
14808 +               return;
14809 +       }
14810 +
14811 +       /* If it's empty, just add */
14812 +       if (skb_queue_empty(&mpcb->reinject_queue)) {
14813 +               skb_queue_head(&mpcb->reinject_queue, skb);
14814 +               return;
14815 +       }
14816 +
14817 +       /* Find place to insert skb - or even we can 'drop' it, as the
14818 +        * data is already covered by other skb's in the reinject-queue.
14819 +        *
14820 +        * This is inspired by code from tcp_data_queue.
14821 +        */
14822 +
14823 +       skb1 = skb_peek_tail(&mpcb->reinject_queue);
14824 +       seq = TCP_SKB_CB(skb)->seq;
14825 +       while (1) {
14826 +               if (!after(TCP_SKB_CB(skb1)->seq, seq))
14827 +                       break;
14828 +               if (skb_queue_is_first(&mpcb->reinject_queue, skb1)) {
14829 +                       skb1 = NULL;
14830 +                       break;
14831 +               }
14832 +               skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1);
14833 +       }
14834 +
14835 +       /* Do skb overlap to previous one? */
14836 +       end_seq = TCP_SKB_CB(skb)->end_seq;
14837 +       if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
14838 +               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
14839 +                       /* All the bits are present. Don't reinject */
14840 +                       __kfree_skb(skb);
14841 +                       return;
14842 +               }
14843 +               if (seq == TCP_SKB_CB(skb1)->seq) {
14844 +                       if (skb_queue_is_first(&mpcb->reinject_queue, skb1))
14845 +                               skb1 = NULL;
14846 +                       else
14847 +                               skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1);
14848 +               }
14849 +       }
14850 +       if (!skb1)
14851 +               __skb_queue_head(&mpcb->reinject_queue, skb);
14852 +       else
14853 +               __skb_queue_after(&mpcb->reinject_queue, skb1, skb);
14854 +
14855 +       /* And clean segments covered by new one as whole. */
14856 +       while (!skb_queue_is_last(&mpcb->reinject_queue, skb)) {
14857 +               skb1 = skb_queue_next(&mpcb->reinject_queue, skb);
14858 +
14859 +               if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
14860 +                       break;
14861 +
14862 +               __skb_unlink(skb1, &mpcb->reinject_queue);
14863 +               __kfree_skb(skb1);
14864 +       }
14865 +       return;
14866 +}
14867 +
14868 +/* Inserts data into the reinject queue */
14869 +void mptcp_reinject_data(struct sock *sk, int clone_it)
14870 +{
14871 +       struct sk_buff *skb_it, *tmp;
14872 +       struct tcp_sock *tp = tcp_sk(sk);
14873 +       struct sock *meta_sk = tp->meta_sk;
14874 +
14875 +       /* It has already been closed - there is really no point in reinjecting */
14876 +       if (meta_sk->sk_state == TCP_CLOSE)
14877 +               return;
14878 +
14879 +       skb_queue_walk_safe(&sk->sk_write_queue, skb_it, tmp) {
14880 +               struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it);
14881 +               /* Subflow syn's and fin's are not reinjected.
14882 +                *
14883 +                * As well as empty subflow-fins with a data-fin.
14884 +                * They are reinjected below (without the subflow-fin-flag)
14885 +                */
14886 +               if (tcb->tcp_flags & TCPHDR_SYN ||
14887 +                   (tcb->tcp_flags & TCPHDR_FIN && !mptcp_is_data_fin(skb_it)) ||
14888 +                   (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len))
14889 +                       continue;
14890 +
14891 +               __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it);
14892 +       }
14893 +
14894 +       skb_it = tcp_write_queue_tail(meta_sk);
14895 +       /* If sk has sent the empty data-fin, we have to reinject it too. */
14896 +       if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 &&
14897 +           TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) {
14898 +               __mptcp_reinject_data(skb_it, meta_sk, NULL, 1);
14899 +       }
14900 +
14901 +       mptcp_push_pending_frames(meta_sk);
14902 +
14903 +       tp->pf = 1;
14904 +}
14905 +EXPORT_SYMBOL(mptcp_reinject_data);
14906 +
14907 +static void mptcp_combine_dfin(struct sk_buff *skb, struct sock *meta_sk,
14908 +                              struct sock *subsk)
14909 +{
14910 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
14911 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
14912 +       struct sock *sk_it;
14913 +       int all_empty = 1, all_acked;
14914 +
14915 +       /* In infinite mapping we always try to combine */
14916 +       if (mpcb->infinite_mapping_snd && tcp_close_state(subsk)) {
14917 +               subsk->sk_shutdown |= SEND_SHUTDOWN;
14918 +               TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
14919 +               return;
14920 +       }
14921 +
14922 +       /* Don't combine, if they didn't combine - otherwise we end up in
14923 +        * TIME_WAIT, even if our app is smart enough to avoid it
14924 +        */
14925 +       if (meta_sk->sk_shutdown & RCV_SHUTDOWN) {
14926 +               if (!mpcb->dfin_combined)
14927 +                       return;
14928 +       }
14929 +
14930 +       /* If no other subflow has data to send, we can combine */
14931 +       mptcp_for_each_sk(mpcb, sk_it) {
14932 +               if (!mptcp_sk_can_send(sk_it))
14933 +                       continue;
14934 +
14935 +               if (!tcp_write_queue_empty(sk_it))
14936 +                       all_empty = 0;
14937 +       }
14938 +
14939 +       /* If all data has been DATA_ACKed, we can combine.
14940 +        * -1, because the data_fin consumed one byte
14941 +        */
14942 +       all_acked = (meta_tp->snd_una == (meta_tp->write_seq - 1));
14943 +
14944 +       if ((all_empty || all_acked) && tcp_close_state(subsk)) {
14945 +               subsk->sk_shutdown |= SEND_SHUTDOWN;
14946 +               TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
14947 +       }
14948 +}
14949 +
14950 +static struct sk_buff *mptcp_skb_entail(struct sock *sk, struct sk_buff *skb,
14951 +                                       int reinject)
14952 +{
14953 +       __be32 *ptr;
14954 +       __u16 data_len;
14955 +       struct mp_dss *mdss;
14956 +       struct tcp_sock *tp = tcp_sk(sk);
14957 +       struct sock *meta_sk = mptcp_meta_sk(sk);
14958 +       struct mptcp_cb *mpcb = tp->mpcb;
14959 +       struct tcp_skb_cb *tcb;
14960 +       struct sk_buff *subskb = NULL;
14961 +
14962 +       if (!reinject)
14963 +               TCP_SKB_CB(skb)->mptcp_flags |= (mpcb->snd_hiseq_index ?
14964 +                                                 MPTCPHDR_SEQ64_INDEX : 0);
14965 +
14966 +       subskb = mptcp_pskb_copy(skb);
14967 +       if (!subskb)
14968 +               return NULL;
14969 +
14970 +       TCP_SKB_CB(skb)->path_mask |= mptcp_pi_to_flag(tp->mptcp->path_index);
14971 +
14972 +       if (!(sk->sk_route_caps & NETIF_F_ALL_CSUM) &&
14973 +           skb->ip_summed == CHECKSUM_PARTIAL) {
14974 +               subskb->csum = skb->csum = skb_checksum(skb, 0, skb->len, 0);
14975 +               subskb->ip_summed = skb->ip_summed = CHECKSUM_NONE;
14976 +       }
14977 +
14978 +       /* The subskb is going in the subflow send-queue. Its path-mask
14979 +        * is not needed anymore and MUST be set to 0, as the path-mask
14980 +        * is a union with inet_skb_param.
14981 +        */
14982 +       tcb = TCP_SKB_CB(subskb);
14983 +       tcb->path_mask = 0;
14984 +
14985 +       if (mptcp_is_data_fin(subskb))
14986 +               mptcp_combine_dfin(subskb, meta_sk, sk);
14987 +
14988 +       if (tp->mpcb->infinite_mapping_snd)
14989 +               goto no_data_seq;
14990 +
14991 +       if (tp->mpcb->send_infinite_mapping &&
14992 +           !before(tcb->seq, mptcp_meta_tp(tp)->snd_nxt)) {
14993 +               tp->mptcp->fully_established = 1;
14994 +               tp->mpcb->infinite_mapping_snd = 1;
14995 +               tp->mptcp->infinite_cutoff_seq = tp->write_seq;
14996 +               tcb->mptcp_flags |= MPTCPHDR_INF;
14997 +               data_len = 0;
14998 +       } else {
14999 +               data_len = tcb->end_seq - tcb->seq;
15000 +       }
15001 +
15002 +       /**** Write MPTCP DSS-option to the packet. ****/
15003 +       ptr = (__be32 *)(subskb->data - (MPTCP_SUB_LEN_DSS_ALIGN +
15004 +                                     MPTCP_SUB_LEN_ACK_ALIGN +
15005 +                                     MPTCP_SUB_LEN_SEQ_ALIGN));
15006 +
15007 +       /* Then we start writing it from the start */
15008 +       mdss = (struct mp_dss *)ptr;
15009 +
15010 +       mdss->kind = TCPOPT_MPTCP;
15011 +       mdss->sub = MPTCP_SUB_DSS;
15012 +       mdss->rsv1 = 0;
15013 +       mdss->rsv2 = 0;
15014 +       mdss->F = (mptcp_is_data_fin(subskb) ? 1 : 0);
15015 +       mdss->m = 0;
15016 +       mdss->M = 1;
15017 +       mdss->a = 0;
15018 +       mdss->A = 1;
15019 +       mdss->len = mptcp_sub_len_dss(mdss, tp->mpcb->dss_csum);
15020 +
15021 +       ptr++;
15022 +       ptr++; /* data_ack will be set in mptcp_options_write */
15023 +       *ptr++ = htonl(tcb->seq); /* data_seq */
15024 +
15025 +       /* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
15026 +       if (mptcp_is_data_fin(subskb) && subskb->len == 0)
15027 +               *ptr++ = 0; /* subseq */
15028 +       else
15029 +               *ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
15030 +
15031 +       if (tp->mpcb->dss_csum && data_len) {
15032 +               __be16 *p16 = (__be16 *)ptr;
15033 +               __be32 hdseq = mptcp_get_highorder_sndbits(subskb, tp->mpcb);
15034 +               __wsum csum;
15035 +               *ptr = htonl(((data_len) << 16) |
15036 +                               (TCPOPT_EOL << 8) |
15037 +                               (TCPOPT_EOL));
15038 +
15039 +               csum = csum_partial(ptr - 2, 12, subskb->csum);
15040 +               p16++;
15041 +               *p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
15042 +       } else {
15043 +               *ptr++ = htonl(((data_len) << 16) |
15044 +                               (TCPOPT_NOP << 8) |
15045 +                               (TCPOPT_NOP));
15046 +       }
15047 +
15048 +no_data_seq:
15049 +       tcb->seq = tp->write_seq;
15050 +       tcb->sacked = 0; /* reset the sacked field: from the point of view
15051 +                         * of this subflow, we are sending a brand new
15052 +                         * segment */
15053 +       /* Take into account seg len */
15054 +       tp->write_seq += subskb->len + ((tcb->tcp_flags & TCPHDR_FIN) ? 1 : 0);
15055 +       tcb->end_seq = tp->write_seq;
15056 +
15057 +       /* If it's a non-payload DATA_FIN (also no subflow-fin), the
15058 +        * segment is not part of the subflow but on a meta-only-level
15059 +        */
15060 +       if (!mptcp_is_data_fin(subskb) || tcb->end_seq != tcb->seq) {
15061 +               tcp_add_write_queue_tail(sk, subskb);
15062 +               sk->sk_wmem_queued += subskb->truesize;
15063 +               sk_mem_charge(sk, subskb->truesize);
15064 +       }
15065 +
15066 +       return subskb;
15067 +}
15068 +
15069 +static void mptcp_sub_event_new_data_sent(struct sock *sk,
15070 +                                         struct sk_buff *subskb,
15071 +                                         struct sk_buff *skb)
15072 +{
15073 +       /* If it's a non-payload DATA_FIN (also no subflow-fin), the
15074 +        * segment is not part of the subflow but on a meta-only-level
15075 +        *
15076 +        * We free it, because it has been queued nowhere.
15077 +        */
15078 +       if (!mptcp_is_data_fin(subskb) ||
15079 +           (TCP_SKB_CB(subskb)->end_seq != TCP_SKB_CB(subskb)->seq)) {
15080 +               tcp_event_new_data_sent(sk, subskb);
15081 +               tcp_sk(sk)->mptcp->second_packet = 1;
15082 +               tcp_sk(sk)->mptcp->last_end_data_seq = TCP_SKB_CB(skb)->end_seq;
15083 +       } else {
15084 +               kfree_skb(subskb);
15085 +       }
15086 +}
15087 +
15088 +/* Handle the packets and sockets after a tcp_transmit_skb failed */
15089 +static void mptcp_transmit_skb_failed(struct sock *sk, struct sk_buff *skb,
15090 +                                     struct sk_buff *subskb)
15091 +{
15092 +       struct tcp_sock *tp = tcp_sk(sk);
15093 +       struct mptcp_cb *mpcb = tp->mpcb;
15094 +
15095 +       /* No work to do if we are in infinite mapping mode
15096 +        * There is only one subflow left and we cannot send this segment on
15097 +        * another subflow.
15098 +        */
15099 +       if (mpcb->infinite_mapping_snd)
15100 +               return;
15101 +
15102 +       TCP_SKB_CB(skb)->path_mask &= ~mptcp_pi_to_flag(tp->mptcp->path_index);
15103 +
15104 +       if (TCP_SKB_CB(subskb)->tcp_flags & TCPHDR_FIN) {
15105 +               /* If it is a subflow-fin we must leave it on the
15106 +                * subflow-send-queue, so that the probe-timer
15107 +                * can retransmit it.
15108 +                */
15109 +               if (!tp->packets_out && !inet_csk(sk)->icsk_pending)
15110 +                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
15111 +                                                 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
15112 +       } else if (mptcp_is_data_fin(subskb) &&
15113 +                  TCP_SKB_CB(subskb)->end_seq == TCP_SKB_CB(subskb)->seq) {
15114 +               /* An empty data-fin has not been enqueued on the subflow
15115 +                * and thus we free it.
15116 +                */
15117 +
15118 +               kfree_skb(subskb);
15119 +       } else {
15120 +               /* In all other cases we remove it from the sub-queue.
15121 +                * Other subflows may send it, or the probe-timer will
15122 +                * handle it.
15123 +                */
15124 +               tcp_advance_send_head(sk, subskb);
15125 +
15126 +               /* tcp_add_write_queue_tail initialized highest_sack. We have
15127 +                * to reset it, if necessary.
15128 +                */
15129 +               if (tp->highest_sack == subskb)
15130 +                       tp->highest_sack = NULL;
15131 +
15132 +               tcp_unlink_write_queue(subskb, sk);
15133 +               tp->write_seq -= subskb->len;
15134 +               sk_wmem_free_skb(sk, subskb);
15135 +       }
15136 +}
15137 +
15138 +/* Function to create two new TCP segments.  Shrinks the given segment
15139 + * to the specified size and appends a new segment with the rest of the
15140 + * packet to the list.  This won't be called frequently, I hope.
15141 + * Remember, these are still headerless SKBs at this point.
15142 + */
15143 +int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
15144 +                  unsigned int mss_now, int reinject)
15145 +{
15146 +       struct tcp_sock *tp = tcp_sk(sk);
15147 +       struct sk_buff *buff;
15148 +       int nsize, old_factor;
15149 +       int nlen;
15150 +       u8 flags;
15151 +       int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
15152 +                    MPTCP_SUB_LEN_SEQ_ALIGN;
15153 +       char dss[MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
15154 +                MPTCP_SUB_LEN_SEQ_ALIGN];
15155 +
15156 +       if (WARN_ON(len > skb->len))
15157 +               return -EINVAL;
15158 +
15159 +       /* DSS-option must be recovered afterwards. */
15160 +       if (!is_meta_sk(sk))
15161 +               memcpy(dss, skb->data - dsslen, dsslen);
15162 +
15163 +       nsize = skb_headlen(skb) - len;
15164 +       if (nsize < 0)
15165 +               nsize = 0;
15166 +
15167 +       if (skb_cloned(skb)) {
15168 +               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
15169 +                       return -ENOMEM;
15170 +               /* Recover dss-option */
15171 +               if (!is_meta_sk(sk))
15172 +                       memcpy(skb->data - dsslen, dss, dsslen);
15173 +       }
15174 +
15175 +       /* Get a new skb... force flag on. */
15176 +       buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
15177 +       if (buff == NULL)
15178 +               return -ENOMEM; /* We'll just try again later. */
15179 +
15180 +       /* See below - if reinject == 1, the buff will be added to the reinject-
15181 +        * queue, which is currently not part of the memory-accounting.
15182 +        */
15183 +       if (reinject != 1) {
15184 +               sk->sk_wmem_queued += buff->truesize;
15185 +               sk_mem_charge(sk, buff->truesize);
15186 +       }
15187 +       nlen = skb->len - len - nsize;
15188 +       buff->truesize += nlen;
15189 +       skb->truesize -= nlen;
15190 +
15191 +       /* Correct the sequence numbers. */
15192 +       TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
15193 +       TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
15194 +       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
15195 +
15196 +       /* PSH and FIN should only be set in the second packet. */
15197 +       flags = TCP_SKB_CB(skb)->tcp_flags;
15198 +       TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
15199 +       TCP_SKB_CB(buff)->tcp_flags = flags;
15200 +       TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
15201 +
15202 +       flags = TCP_SKB_CB(skb)->mptcp_flags;
15203 +       TCP_SKB_CB(skb)->mptcp_flags = flags & ~(MPTCPHDR_FIN);
15204 +       TCP_SKB_CB(buff)->mptcp_flags = flags;
15205 +
15206 +       if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
15207 +               /* Copy and checksum data tail into the new buffer. */
15208 +               buff->csum = csum_partial_copy_nocheck(skb->data + len,
15209 +                                                      skb_put(buff, nsize),
15210 +                                                      nsize, 0);
15211 +
15212 +               skb_trim(skb, len);
15213 +
15214 +               skb->csum = csum_block_sub(skb->csum, buff->csum, len);
15215 +       } else {
15216 +               skb->ip_summed = CHECKSUM_PARTIAL;
15217 +               skb_split(skb, buff, len);
15218 +       }
15219 +
15220 +       /* We lost the dss-option when creating buff - put it back! */
15221 +       if (!is_meta_sk(sk))
15222 +               memcpy(buff->data - dsslen, dss, dsslen);
15223 +
15224 +       buff->ip_summed = skb->ip_summed;
15225 +
15226 +       /* Looks stupid, but our code really uses when of
15227 +        * skbs, which it never sent before. --ANK
15228 +        */
15229 +       TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
15230 +       buff->tstamp = skb->tstamp;
15231 +
15232 +       old_factor = tcp_skb_pcount(skb);
15233 +
15234 +       /* Fix up tso_factor for both original and new SKB.  */
15235 +       tcp_set_skb_tso_segs(sk, skb, mss_now);
15236 +       tcp_set_skb_tso_segs(sk, buff, mss_now);
15237 +
15238 +       /* If this packet has been sent out already, we must
15239 +        * adjust the various packet counters.
15240 +        */
15241 +       if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq) && reinject != 1) {
15242 +               int diff = old_factor - tcp_skb_pcount(skb) -
15243 +                       tcp_skb_pcount(buff);
15244 +
15245 +               if (diff)
15246 +                       tcp_adjust_pcount(sk, skb, diff);
15247 +       }
15248 +
15249 +       /* Link BUFF into the send queue. */
15250 +       skb_header_release(buff);
15251 +       if (reinject == 1)
15252 +               __skb_queue_after(&tcp_sk(sk)->mpcb->reinject_queue, skb, buff);
15253 +       else
15254 +               tcp_insert_write_queue_after(skb, buff, sk);
15255 +
15256 +       return 0;
15257 +}
15258 +
15259 +int mptso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
15260 +                  unsigned int mss_now, gfp_t gfp, int reinject)
15261 +{
15262 +       struct sk_buff *buff;
15263 +       int nlen = skb->len - len, old_factor;
15264 +       u8 flags;
15265 +       int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
15266 +                    MPTCP_SUB_LEN_SEQ_ALIGN;
15267 +
15268 +       /* All of a TSO frame must be composed of paged data.  */
15269 +       if (skb->len != skb->data_len)
15270 +               return mptcp_fragment(sk, skb, len, mss_now, reinject);
15271 +
15272 +       buff = sk_stream_alloc_skb(sk, 0, gfp);
15273 +       if (unlikely(buff == NULL))
15274 +               return -ENOMEM;
15275 +
15276 +       /* See below - if reinject == 1, the buff will be added to the reinject-
15277 +        * queue, which is currently not part of the memory-accounting.
15278 +        */
15279 +       if (reinject != 1) {
15280 +               sk->sk_wmem_queued += buff->truesize;
15281 +               sk_mem_charge(sk, buff->truesize);
15282 +       }
15283 +       buff->truesize += nlen;
15284 +       skb->truesize -= nlen;
15285 +
15286 +       /* Correct the sequence numbers. */
15287 +       TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
15288 +       TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
15289 +       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
15290 +
15291 +       /* PSH and FIN should only be set in the second packet. */
15292 +       flags = TCP_SKB_CB(skb)->tcp_flags;
15293 +       TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
15294 +       TCP_SKB_CB(buff)->tcp_flags = flags;
15295 +
15296 +       flags = TCP_SKB_CB(skb)->mptcp_flags;
15297 +       TCP_SKB_CB(skb)->mptcp_flags = flags & ~(MPTCPHDR_FIN);
15298 +       TCP_SKB_CB(buff)->mptcp_flags = flags;
15299 +
15300 +       /* This packet was never sent out yet, so no SACK bits. */
15301 +       TCP_SKB_CB(buff)->sacked = 0;
15302 +
15303 +       buff->ip_summed = CHECKSUM_PARTIAL;
15304 +       skb->ip_summed = CHECKSUM_PARTIAL;
15305 +       skb_split(skb, buff, len);
15306 +
15307 +       /* We lost the dss-option when creating buff - put it back! */
15308 +       if (!is_meta_sk(sk))
15309 +               memcpy(buff->data - dsslen, skb->data - dsslen, dsslen);
15310 +
15311 +       old_factor = tcp_skb_pcount(skb);
15312 +
15313 +       /* Fix up tso_factor for both original and new SKB.  */
15314 +       tcp_set_skb_tso_segs(sk, skb, mss_now);
15315 +       tcp_set_skb_tso_segs(sk, buff, mss_now);
15316 +
15317 +       /* If this packet has been sent out already, we must
15318 +        * adjust the various packet counters.
15319 +        */
15320 +       if (!before(tcp_sk(sk)->snd_nxt, TCP_SKB_CB(buff)->end_seq) && reinject != 1) {
15321 +               int diff = old_factor - tcp_skb_pcount(skb) -
15322 +                       tcp_skb_pcount(buff);
15323 +
15324 +               if (diff)
15325 +                       tcp_adjust_pcount(sk, skb, diff);
15326 +       }
15327 +
15328 +       /* Link BUFF into the send queue. */
15329 +       skb_header_release(buff);
15330 +       if (reinject == 1)
15331 +               __skb_queue_after(&tcp_sk(sk)->mpcb->reinject_queue, skb, buff);
15332 +       else
15333 +               tcp_insert_write_queue_after(skb, buff, sk);
15334 +
15335 +       return 0;
15336 +}
15337 +
15338 +/* Inspired by tcp_write_wakeup */
15339 +int mptcp_write_wakeup(struct sock *meta_sk)
15340 +{
15341 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
15342 +       struct sk_buff *skb, *subskb;
15343 +
15344 +       skb = tcp_send_head(meta_sk);
15345 +       if (skb &&
15346 +           before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(meta_tp))) {
15347 +               int err;
15348 +               unsigned int mss;
15349 +               unsigned int seg_size = tcp_wnd_end(meta_tp) - TCP_SKB_CB(skb)->seq;
15350 +               struct sock *subsk = get_available_subflow(meta_sk, skb, &mss);
15351 +               if (!subsk)
15352 +                       return -1;
15353 +
15354 +               if (before(meta_tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
15355 +                       meta_tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
15356 +
15357 +               /* We are probing the opening of a window
15358 +                * but the window size is != 0
15359 +                * must have been a result SWS avoidance ( sender )
15360 +                */
15361 +               if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
15362 +                   skb->len > mss) {
15363 +                       seg_size = min(seg_size, mss);
15364 +                       TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
15365 +                       if (mptcp_fragment(meta_sk, skb, seg_size, mss, 0))
15366 +                               return -1;
15367 +               } else if (!tcp_skb_pcount(skb)) {
15368 +                       tcp_set_skb_tso_segs(meta_sk, skb, mss);
15369 +               }
15370 +
15371 +               subskb = mptcp_skb_entail(subsk, skb, 0);
15372 +               if (!subskb)
15373 +                       return -1;
15374 +
15375 +               TCP_SKB_CB(subskb)->tcp_flags |= TCPHDR_PSH;
15376 +               TCP_SKB_CB(skb)->when = tcp_time_stamp;
15377 +               TCP_SKB_CB(subskb)->when = tcp_time_stamp;
15378 +               err = tcp_transmit_skb(subsk, subskb, 1, GFP_ATOMIC);
15379 +               if (unlikely(err)) {
15380 +                       mptcp_transmit_skb_failed(subsk, skb, subskb);
15381 +                       return err;
15382 +               }
15383 +
15384 +               mptcp_check_sndseq_wrap(meta_tp, TCP_SKB_CB(skb)->end_seq -
15385 +                                                TCP_SKB_CB(skb)->seq);
15386 +               tcp_event_new_data_sent(meta_sk, skb);
15387 +               mptcp_sub_event_new_data_sent(subsk, subskb, skb);
15388 +
15389 +               return 0;
15390 +       } else {
15391 +               struct sock *sk_it;
15392 +               int ans = 0;
15393 +
15394 +               if (between(meta_tp->snd_up, meta_tp->snd_una + 1,
15395 +                           meta_tp->snd_una + 0xFFFF)) {
15396 +                       mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
15397 +                               if (mptcp_sk_can_send_ack(sk_it))
15398 +                                       tcp_xmit_probe_skb(sk_it, 1);
15399 +                       }
15400 +               }
15401 +
15402 +               /* At least one of the tcp_xmit_probe_skb's has to succeed */
15403 +               mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
15404 +                       int ret;
15405 +
15406 +                       if (!mptcp_sk_can_send_ack(sk_it))
15407 +                               continue;
15408 +
15409 +                       ret = tcp_xmit_probe_skb(sk_it, 0);
15410 +                       if (unlikely(ret > 0))
15411 +                               ans = ret;
15412 +               }
15413 +               return ans;
15414 +       }
15415 +}
15416 +
15417 +static void mptcp_find_and_set_pathmask(struct sock *meta_sk, struct sk_buff *skb)
15418 +{
15419 +       struct sk_buff *skb_it;
15420 +
15421 +       skb_it = tcp_write_queue_head(meta_sk);
15422 +
15423 +       tcp_for_write_queue_from(skb_it, meta_sk) {
15424 +               if (skb_it == tcp_send_head(meta_sk))
15425 +                       break;
15426 +
15427 +               if (TCP_SKB_CB(skb_it)->seq == TCP_SKB_CB(skb)->seq) {
15428 +                       TCP_SKB_CB(skb)->path_mask = TCP_SKB_CB(skb_it)->path_mask;
15429 +                       break;
15430 +               }
15431 +       }
15432 +}
15433 +
15434 +static struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk, int penal)
15435 +{
15436 +       struct sock *meta_sk;
15437 +       struct tcp_sock *tp = tcp_sk(sk), *tp_it;
15438 +       struct sk_buff *skb_head;
15439 +
15440 +       if (tp->mpcb->cnt_subflows == 1)
15441 +               return NULL;
15442 +
15443 +       meta_sk = mptcp_meta_sk(sk);
15444 +       skb_head = tcp_write_queue_head(meta_sk);
15445 +
15446 +       if (!skb_head || skb_head == tcp_send_head(meta_sk))
15447 +               return NULL;
15448 +
15449 +       /* If penalization is optional (coming from mptcp_next_segment() and
15450 +        * We are not send-buffer-limited we do not penalize. The retransmission
15451 +        * is just an optimization to fix the idle-time due to the delay before
15452 +        * we wake up the application.
15453 +        */
15454 +       if (!penal && sk_stream_memory_free(meta_sk))
15455 +               goto retrans;
15456 +
15457 +       /* Only penalize again after an RTT has elapsed */
15458 +       if (tcp_time_stamp - tp->mptcp->last_rbuf_opti < tp->srtt >> 3)
15459 +               goto retrans;
15460 +
15461 +       /* Half the cwnd of the slow flow */
15462 +       mptcp_for_each_tp(tp->mpcb, tp_it) {
15463 +               if (tp_it != tp &&
15464 +                   TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
15465 +                       if (tp->srtt < tp_it->srtt && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
15466 +                               tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
15467 +                               if (tp_it->snd_ssthresh != TCP_INFINITE_SSTHRESH)
15468 +                                       tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
15469 +
15470 +                               tp->mptcp->last_rbuf_opti = tcp_time_stamp;
15471 +                       }
15472 +                       break;
15473 +               }
15474 +       }
15475 +
15476 +retrans:
15477 +
15478 +       /* Segment not yet injected into this path? Take it!!! */
15479 +       if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
15480 +               bool do_retrans = false;
15481 +               mptcp_for_each_tp(tp->mpcb, tp_it) {
15482 +                       if (tp_it != tp &&
15483 +                           TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
15484 +                               if (tp_it->snd_cwnd <= 4) {
15485 +                                       do_retrans = true;
15486 +                                       break;
15487 +                               }
15488 +
15489 +                               if (4 * tp->srtt >= tp_it->srtt) {
15490 +                                       do_retrans = false;
15491 +                                       break;
15492 +                               } else {
15493 +                                       do_retrans = true;
15494 +                               }
15495 +                       }
15496 +               }
15497 +
15498 +               if (do_retrans)
15499 +                       return skb_head;
15500 +       }
15501 +       return NULL;
15502 +}
15503 +
15504 +int mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle,
15505 +                    int push_one, gfp_t gfp)
15506 +{
15507 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk), *subtp;
15508 +       struct sock *subsk;
15509 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
15510 +       struct sk_buff *skb;
15511 +       unsigned int tso_segs, old_factor, sent_pkts;
15512 +       int cwnd_quota;
15513 +       int result;
15514 +       int reinject = 0;
15515 +
15516 +       sent_pkts = 0;
15517 +
15518 +       /* Currently mtu-probing is not done in MPTCP */
15519 +       if (!push_one && 0) {
15520 +               /* Do MTU probing. */
15521 +               result = tcp_mtu_probe(meta_sk);
15522 +               if (!result)
15523 +                       return 0;
15524 +               else if (result > 0)
15525 +                       sent_pkts = 1;
15526 +       }
15527 +
15528 +       while ((skb = mptcp_next_segment(meta_sk, &reinject))) {
15529 +               unsigned int limit;
15530 +               struct sk_buff *subskb = NULL;
15531 +               u32 noneligible = mpcb->noneligible;
15532 +
15533 +               if (reinject == 1) {
15534 +                       if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) {
15535 +                               /* Segment already reached the peer, take the next one */
15536 +                               __skb_unlink(skb, &mpcb->reinject_queue);
15537 +                               __kfree_skb(skb);
15538 +                               continue;
15539 +                       }
15540 +
15541 +                       /* Reinjection and it is coming from a subflow? We need
15542 +                        * to find out the path-mask from the meta-write-queue
15543 +                        * to properly select a subflow.
15544 +                        */
15545 +                       if (!TCP_SKB_CB(skb)->path_mask)
15546 +                               mptcp_find_and_set_pathmask(meta_sk, skb);
15547 +               }
15548 +
15549 +subflow:
15550 +               subsk = get_available_subflow(meta_sk, skb, &mss_now);
15551 +               if (!subsk)
15552 +                       break;
15553 +               subtp = tcp_sk(subsk);
15554 +
15555 +               /* Since all subsocks are locked before calling the scheduler,
15556 +                * the tcp_send_head should not change.
15557 +                */
15558 +               BUG_ON(!reinject && tcp_send_head(meta_sk) != skb);
15559 +retry:
15560 +               /* If the segment was cloned (e.g. a meta retransmission),
15561 +                * the header must be expanded/copied so that there is no
15562 +                * corruption of TSO information.
15563 +                */
15564 +               if (skb_unclone(skb, GFP_ATOMIC))
15565 +                       break;
15566 +
15567 +               old_factor = tcp_skb_pcount(skb);
15568 +               tcp_set_skb_tso_segs(meta_sk, skb, mss_now);
15569 +               tso_segs = tcp_skb_pcount(skb);
15570 +
15571 +               if (reinject == -1) {
15572 +                       /* The packet has already once been sent, so if we
15573 +                        * change the pcount here we have to adjust packets_out
15574 +                        * in the meta-sk
15575 +                        */
15576 +                       int diff = old_factor - tso_segs;
15577 +
15578 +                       if (diff)
15579 +                               tcp_adjust_pcount(meta_sk, skb, diff);
15580 +               }
15581 +
15582 +               cwnd_quota = tcp_cwnd_test(subtp, skb);
15583 +               if (!cwnd_quota) {
15584 +                       /* May happen due to two cases:
15585 +                        *
15586 +                        * - if at the first selection we circumvented
15587 +                        *   the test due to a DATA_FIN (and got rejected at
15588 +                        *   tcp_snd_wnd_test), but the reinjected segment is not
15589 +                        *   a DATA_FIN.
15590 +                        * - if we take a DATA_FIN with data, but
15591 +                        *   tcp_set_skb_tso_segs() increases the number of
15592 +                        *   tso_segs to something > 1. Then, cwnd_test might
15593 +                        *   reject it.
15594 +                        */
15595 +                       mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
15596 +                       continue;
15597 +               }
15598 +
15599 +               if (!reinject && unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) {
15600 +                       skb = mptcp_rcv_buf_optimization(subsk, 1);
15601 +                       if (skb) {
15602 +                               reinject = -1;
15603 +                               goto retry;
15604 +                       }
15605 +                       break;
15606 +               }
15607 +
15608 +               if (tso_segs == 1) {
15609 +                       if (unlikely(!tcp_nagle_test(meta_tp, skb, mss_now,
15610 +                                                    (tcp_skb_is_last(meta_sk, skb) ?
15611 +                                                     nonagle : TCP_NAGLE_PUSH))))
15612 +                               break;
15613 +               } else {
15614 +                       /* Do not try to defer the transmission of a reinjected
15615 +                        * segment. Send it directly.
15616 +                        * If it is not possible to send the TSO segment on the
15617 +                        * best subflow right now try to look for another subflow.
15618 +                        * If there is no subflow available defer the segment to avoid
15619 +                        * the call to mptso_fragment.
15620 +                        */
15621 +                       if (!push_one && !reinject && tcp_tso_should_defer(subsk, skb)) {
15622 +                               mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
15623 +                               goto subflow;
15624 +                       }
15625 +               }
15626 +
15627 +               limit = mss_now;
15628 +               if (tso_segs > 1 && !tcp_urg_mode(meta_tp))
15629 +                       limit = tcp_mss_split_point(subsk, skb, mss_now,
15630 +                                                   min_t(unsigned int,
15631 +                                                         cwnd_quota,
15632 +                                                         subsk->sk_gso_max_segs),
15633 +                                                   nonagle);
15634 +
15635 +               if (skb->len > limit &&
15636 +                   unlikely(mptso_fragment(meta_sk, skb, limit, mss_now, gfp, reinject)))
15637 +                       break;
15638 +
15639 +               subskb = mptcp_skb_entail(subsk, skb, reinject);
15640 +               if (!subskb)
15641 +                       break;
15642 +
15643 +               mpcb->noneligible = noneligible;
15644 +               TCP_SKB_CB(skb)->when = tcp_time_stamp;
15645 +               TCP_SKB_CB(subskb)->when = tcp_time_stamp;
15646 +               if (unlikely(tcp_transmit_skb(subsk, subskb, 1, gfp))) {
15647 +                       mptcp_transmit_skb_failed(subsk, skb, subskb);
15648 +                       mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
15649 +                       continue;
15650 +               }
15651 +
15652 +               if (!reinject) {
15653 +                       mptcp_check_sndseq_wrap(meta_tp,
15654 +                                               TCP_SKB_CB(skb)->end_seq -
15655 +                                               TCP_SKB_CB(skb)->seq);
15656 +                       tcp_event_new_data_sent(meta_sk, skb);
15657 +               }
15658 +
15659 +               tcp_minshall_update(meta_tp, mss_now, skb);
15660 +               sent_pkts += tcp_skb_pcount(skb);
15661 +               tcp_sk(subsk)->mptcp->sent_pkts += tcp_skb_pcount(skb);
15662 +
15663 +               mptcp_sub_event_new_data_sent(subsk, subskb, skb);
15664 +
15665 +               if (reinject > 0) {
15666 +                       __skb_unlink(skb, &mpcb->reinject_queue);
15667 +                       kfree_skb(skb);
15668 +               }
15669 +
15670 +               if (push_one)
15671 +                       break;
15672 +       }
15673 +
15674 +       mpcb->noneligible = 0;
15675 +
15676 +       if (likely(sent_pkts)) {
15677 +               mptcp_for_each_sk(mpcb, subsk) {
15678 +                       subtp = tcp_sk(subsk);
15679 +                       if (subtp->mptcp->sent_pkts) {
15680 +                               if (tcp_in_cwnd_reduction(subsk))
15681 +                                       subtp->prr_out += subtp->mptcp->sent_pkts;
15682 +                               tcp_cwnd_validate(subsk);
15683 +                               subtp->mptcp->sent_pkts = 0;
15684 +                       }
15685 +               }
15686 +               return 0;
15687 +       }
15688 +
15689 +       return !meta_tp->packets_out && tcp_send_head(meta_sk);
15690 +}
15691 +
15692 +void mptcp_write_space(struct sock *sk)
15693 +{
15694 +       mptcp_push_pending_frames(mptcp_meta_sk(sk));
15695 +}
15696 +
15697 +u32 __mptcp_select_window(struct sock *sk)
15698 +{
15699 +       struct inet_connection_sock *icsk = inet_csk(sk);
15700 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
15701 +       int mss, free_space, full_space, window;
15702 +
15703 +       /* MSS for the peer's data.  Previous versions used mss_clamp
15704 +        * here.  I don't know if the value based on our guesses
15705 +        * of peer's MSS is better for the performance.  It's more correct
15706 +        * but may be worse for the performance because of rcv_mss
15707 +        * fluctuations.  --SAW  1998/11/1
15708 +        */
15709 +       mss = icsk->icsk_ack.rcv_mss;
15710 +       free_space = tcp_space(sk);
15711 +       full_space = min_t(int, meta_tp->window_clamp,
15712 +                       tcp_full_space(sk));
15713 +
15714 +       if (mss > full_space)
15715 +               mss = full_space;
15716 +
15717 +       if (free_space < (full_space >> 1)) {
15718 +               icsk->icsk_ack.quick = 0;
15719 +
15720 +               if (tcp_memory_pressure)
15721 +                       /* TODO this has to be adapted when we support different
15722 +                        * MSS's among the subflows.
15723 +                        */
15724 +                       meta_tp->rcv_ssthresh = min(meta_tp->rcv_ssthresh,
15725 +                                                   4U * meta_tp->advmss);
15726 +
15727 +               if (free_space < mss)
15728 +                       return 0;
15729 +       }
15730 +
15731 +       if (free_space > meta_tp->rcv_ssthresh)
15732 +               free_space = meta_tp->rcv_ssthresh;
15733 +
15734 +       /* Don't do rounding if we are using window scaling, since the
15735 +        * scaled window will not line up with the MSS boundary anyway.
15736 +        */
15737 +       window = meta_tp->rcv_wnd;
15738 +       if (tp->rx_opt.rcv_wscale) {
15739 +               window = free_space;
15740 +
15741 +               /* Advertise enough space so that it won't get scaled away.
15742 +                * Import case: prevent zero window announcement if
15743 +                * 1<<rcv_wscale > mss.
15744 +                */
15745 +               if (((window >> tp->rx_opt.rcv_wscale) << tp->
15746 +                    rx_opt.rcv_wscale) != window)
15747 +                       window = (((window >> tp->rx_opt.rcv_wscale) + 1)
15748 +                                 << tp->rx_opt.rcv_wscale);
15749 +       } else {
15750 +               /* Get the largest window that is a nice multiple of mss.
15751 +                * Window clamp already applied above.
15752 +                * If our current window offering is within 1 mss of the
15753 +                * free space we just keep it. This prevents the divide
15754 +                * and multiply from happening most of the time.
15755 +                * We also don't do any window rounding when the free space
15756 +                * is too small.
15757 +                */
15758 +               if (window <= free_space - mss || window > free_space)
15759 +                       window = (free_space / mss) * mss;
15760 +               else if (mss == full_space &&
15761 +                        free_space > window + (full_space >> 1))
15762 +                       window = free_space;
15763 +       }
15764 +
15765 +       return window;
15766 +}
15767 +
15768 +void mptcp_syn_options(struct sock *sk, struct tcp_out_options *opts,
15769 +                      unsigned *remaining)
15770 +{
15771 +       struct tcp_sock *tp = tcp_sk(sk);
15772 +
15773 +       opts->options |= OPTION_MPTCP;
15774 +       if (is_master_tp(tp)) {
15775 +               opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYN;
15776 +               *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN;
15777 +               opts->mp_capable.sender_key = tp->mptcp_loc_key;
15778 +               opts->dss_csum = !!sysctl_mptcp_checksum;
15779 +       } else {
15780 +               struct mptcp_cb *mpcb = tp->mpcb;
15781 +
15782 +               opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYN;
15783 +               *remaining -= MPTCP_SUB_LEN_JOIN_SYN_ALIGN;
15784 +               opts->mp_join_syns.token = mpcb->mptcp_rem_token;
15785 +               opts->addr_id = tp->mptcp->loc_id;
15786 +               opts->mp_join_syns.sender_nonce = tp->mptcp->mptcp_loc_nonce;
15787 +       }
15788 +}
15789 +
15790 +void mptcp_synack_options(struct request_sock *req,
15791 +                         struct tcp_out_options *opts, unsigned *remaining)
15792 +{
15793 +       struct mptcp_request_sock *mtreq;
15794 +       mtreq = mptcp_rsk(req);
15795 +
15796 +       opts->options |= OPTION_MPTCP;
15797 +       /* MPCB not yet set - thus it's a new MPTCP-session */
15798 +       if (!mtreq->mpcb) {
15799 +               opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYNACK;
15800 +               opts->mp_capable.sender_key = mtreq->mptcp_loc_key;
15801 +               opts->dss_csum = !!sysctl_mptcp_checksum || mtreq->dss_csum;
15802 +               *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN;
15803 +       } else {
15804 +               opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYNACK;
15805 +               opts->mp_join_syns.sender_truncated_mac =
15806 +                               mtreq->mptcp_hash_tmac;
15807 +               opts->mp_join_syns.sender_nonce = mtreq->mptcp_loc_nonce;
15808 +               opts->addr_id = mtreq->loc_id;
15809 +               *remaining -= MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN;
15810 +       }
15811 +}
15812 +
15813 +void mptcp_established_options(struct sock *sk, struct sk_buff *skb,
15814 +                              struct tcp_out_options *opts, unsigned *size)
15815 +{
15816 +       struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
15817 +       struct mptcp_cb *mpcb = tp->mpcb;
15818 +       struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
15819 +
15820 +       /* In fallback mp_fail-mode, we have to repeat it until the fallback
15821 +        * has been done by the sender
15822 +        */
15823 +       if (unlikely(tp->mptcp->send_mp_fail)) {
15824 +               opts->options |= OPTION_MPTCP;
15825 +               opts->mptcp_options |= OPTION_MP_FAIL;
15826 +               opts->data_ack = (__u32)(mpcb->csum_cutoff_seq >> 32);
15827 +               opts->data_seq = (__u32)mpcb->csum_cutoff_seq;
15828 +               *size += MPTCP_SUB_LEN_FAIL;
15829 +               return;
15830 +       }
15831 +
15832 +       if (unlikely(tp->send_mp_fclose)) {
15833 +               opts->options |= OPTION_MPTCP;
15834 +               opts->mptcp_options |= OPTION_MP_FCLOSE;
15835 +               opts->mp_capable.receiver_key = mpcb->mptcp_rem_key;
15836 +               *size += MPTCP_SUB_LEN_FCLOSE_ALIGN;
15837 +               return;
15838 +       }
15839 +
15840 +       /* 1. If we are the sender of the infinite-mapping, we need the
15841 +        *    MPTCPHDR_INF-flag, because a retransmission of the
15842 +        *    infinite-announcment still needs the mptcp-option.
15843 +        *
15844 +        *    We need infinite_cutoff_seq, because retransmissions from before
15845 +        *    the infinite-cutoff-moment still need the MPTCP-signalling to stay
15846 +        *    consistent.
15847 +        *
15848 +        * 2. If we are the receiver of the infinite-mapping, we always skip
15849 +        *    mptcp-options, because acknowledgments from before the
15850 +        *    infinite-mapping point have already been sent out.
15851 +        *
15852 +        * I know, the whole infinite-mapping stuff is ugly...
15853 +        *
15854 +        * TODO: Handle wrapped data-sequence numbers
15855 +        *       (even if it's very unlikely)
15856 +        */
15857 +       if (unlikely(mpcb->infinite_mapping_snd) &&
15858 +           tp->mptcp->fully_established &&
15859 +           ((mpcb->send_infinite_mapping && tcb &&
15860 +             !(tcb->mptcp_flags & MPTCPHDR_INF) &&
15861 +             !before(tcb->seq, tp->mptcp->infinite_cutoff_seq)) ||
15862 +            !mpcb->send_infinite_mapping))
15863 +               return;
15864 +
15865 +       if (unlikely(tp->mptcp->include_mpc)) {
15866 +               opts->options |= OPTION_MPTCP;
15867 +               opts->mptcp_options |= OPTION_MP_CAPABLE |
15868 +                                      OPTION_TYPE_ACK;
15869 +               *size += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN;
15870 +               opts->mp_capable.sender_key = mpcb->mptcp_loc_key;
15871 +               opts->mp_capable.receiver_key = mpcb->mptcp_rem_key;
15872 +               opts->dss_csum = mpcb->dss_csum;
15873 +
15874 +               if (skb)
15875 +                       tp->mptcp->include_mpc = 0;
15876 +       }
15877 +       if (unlikely(tp->mptcp->pre_established)) {
15878 +               opts->options |= OPTION_MPTCP;
15879 +               opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_ACK;
15880 +               *size += MPTCP_SUB_LEN_JOIN_ACK_ALIGN;
15881 +       }
15882 +
15883 +       if (!tp->mptcp->include_mpc && !tp->mptcp->pre_established) {
15884 +               opts->options |= OPTION_MPTCP;
15885 +               opts->mptcp_options |= OPTION_DATA_ACK;
15886 +               /* If !skb, we come from tcp_current_mss and thus we always
15887 +                * assume that the DSS-option will be set for the data-packet.
15888 +                */
15889 +               if (skb && !mptcp_is_data_seq(skb)) {
15890 +                       opts->data_ack = meta_tp->rcv_nxt;
15891 +
15892 +                       *size += MPTCP_SUB_LEN_ACK_ALIGN;
15893 +               } else {
15894 +                       opts->data_ack = meta_tp->rcv_nxt;
15895 +
15896 +                       /* Doesn't matter, if csum included or not. It will be
15897 +                        * either 10 or 12, and thus aligned = 12
15898 +                        */
15899 +                       *size += MPTCP_SUB_LEN_ACK_ALIGN +
15900 +                                MPTCP_SUB_LEN_SEQ_ALIGN;
15901 +               }
15902 +
15903 +               *size += MPTCP_SUB_LEN_DSS_ALIGN;
15904 +       }
15905 +
15906 +       if (mpcb->pm_ops->addr_signal)
15907 +               mpcb->pm_ops->addr_signal(sk, size, opts, skb);
15908 +
15909 +       if (unlikely(tp->mptcp->send_mp_prio) &&
15910 +           MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_PRIO_ALIGN) {
15911 +               opts->options |= OPTION_MPTCP;
15912 +               opts->mptcp_options |= OPTION_MP_PRIO;
15913 +               if (skb)
15914 +                       tp->mptcp->send_mp_prio = 0;
15915 +               *size += MPTCP_SUB_LEN_PRIO_ALIGN;
15916 +       }
15917 +
15918 +       return;
15919 +}
15920 +
15921 +u16 mptcp_select_window(struct sock *sk)
15922 +{
15923 +       u16 new_win             = tcp_select_window(sk);
15924 +       struct tcp_sock *tp     = tcp_sk(sk);
15925 +       struct tcp_sock *meta_tp = mptcp_meta_tp(tp);
15926 +
15927 +       meta_tp->rcv_wnd        = tp->rcv_wnd;
15928 +       meta_tp->rcv_wup        = meta_tp->rcv_nxt;
15929 +
15930 +       return new_win;
15931 +}
15932 +
15933 +void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
15934 +                        struct tcp_out_options *opts,
15935 +                        struct sk_buff *skb)
15936 +{
15937 +       if (unlikely(OPTION_MP_CAPABLE & opts->mptcp_options)) {
15938 +               struct mp_capable *mpc = (struct mp_capable *)ptr;
15939 +
15940 +               mpc->kind = TCPOPT_MPTCP;
15941 +
15942 +               if ((OPTION_TYPE_SYN & opts->mptcp_options) ||
15943 +                   (OPTION_TYPE_SYNACK & opts->mptcp_options)) {
15944 +                       mpc->sender_key = opts->mp_capable.sender_key;
15945 +                       mpc->len = MPTCP_SUB_LEN_CAPABLE_SYN;
15946 +                       ptr += MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN >> 2;
15947 +               } else if (OPTION_TYPE_ACK & opts->mptcp_options) {
15948 +                       mpc->sender_key = opts->mp_capable.sender_key;
15949 +                       mpc->receiver_key = opts->mp_capable.receiver_key;
15950 +                       mpc->len = MPTCP_SUB_LEN_CAPABLE_ACK;
15951 +                       ptr += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN >> 2;
15952 +               }
15953 +
15954 +               mpc->sub = MPTCP_SUB_CAPABLE;
15955 +               mpc->ver = 0;
15956 +               mpc->a = opts->dss_csum;
15957 +               mpc->b = 0;
15958 +               mpc->rsv = 0;
15959 +               mpc->h = 1;
15960 +       }
15961 +
15962 +       if (unlikely(OPTION_MP_JOIN & opts->mptcp_options)) {
15963 +               struct mp_join *mpj = (struct mp_join *)ptr;
15964 +
15965 +               mpj->kind = TCPOPT_MPTCP;
15966 +               mpj->sub = MPTCP_SUB_JOIN;
15967 +               mpj->rsv = 0;
15968 +               mpj->addr_id = opts->addr_id;
15969 +
15970 +               if (OPTION_TYPE_SYN & opts->mptcp_options) {
15971 +                       mpj->len = MPTCP_SUB_LEN_JOIN_SYN;
15972 +                       mpj->u.syn.token = opts->mp_join_syns.token;
15973 +                       mpj->u.syn.nonce = opts->mp_join_syns.sender_nonce;
15974 +                       mpj->b = tp->mptcp->low_prio;
15975 +                       ptr += MPTCP_SUB_LEN_JOIN_SYN_ALIGN >> 2;
15976 +               } else if (OPTION_TYPE_SYNACK & opts->mptcp_options) {
15977 +                       mpj->len = MPTCP_SUB_LEN_JOIN_SYNACK;
15978 +                       mpj->u.synack.mac =
15979 +                               opts->mp_join_syns.sender_truncated_mac;
15980 +                       mpj->u.synack.nonce = opts->mp_join_syns.sender_nonce;
15981 +                       mpj->b = tp->mptcp->low_prio;
15982 +                       ptr += MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN >> 2;
15983 +               } else if (OPTION_TYPE_ACK & opts->mptcp_options) {
15984 +                       mpj->len = MPTCP_SUB_LEN_JOIN_ACK;
15985 +                       memcpy(mpj->u.ack.mac, &tp->mptcp->sender_mac[0], 20);
15986 +                       ptr += MPTCP_SUB_LEN_JOIN_ACK_ALIGN >> 2;
15987 +               }
15988 +       }
15989 +       if (unlikely(OPTION_ADD_ADDR & opts->mptcp_options)) {
15990 +               struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
15991 +
15992 +               mpadd->kind = TCPOPT_MPTCP;
15993 +               if (opts->add_addr_v4) {
15994 +                       mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4;
15995 +                       mpadd->sub = MPTCP_SUB_ADD_ADDR;
15996 +                       mpadd->ipver = 4;
15997 +                       mpadd->addr_id = opts->add_addr4.addr_id;
15998 +                       mpadd->u.v4.addr = opts->add_addr4.addr;
15999 +                       ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN >> 2;
16000 +               } else if (opts->add_addr_v6) {
16001 +                       mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6;
16002 +                       mpadd->sub = MPTCP_SUB_ADD_ADDR;
16003 +                       mpadd->ipver = 6;
16004 +                       mpadd->addr_id = opts->add_addr6.addr_id;
16005 +                       memcpy(&mpadd->u.v6.addr, &opts->add_addr6.addr,
16006 +                              sizeof(mpadd->u.v6.addr));
16007 +                       ptr += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN >> 2;
16008 +               }
16009 +       }
16010 +       if (unlikely(OPTION_REMOVE_ADDR & opts->mptcp_options)) {
16011 +               struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr;
16012 +               u8 *addrs_id;
16013 +               int id, len, len_align;
16014 +
16015 +               len = mptcp_sub_len_remove_addr(opts->remove_addrs);
16016 +               len_align = mptcp_sub_len_remove_addr_align(opts->remove_addrs);
16017 +
16018 +               mprem->kind = TCPOPT_MPTCP;
16019 +               mprem->len = len;
16020 +               mprem->sub = MPTCP_SUB_REMOVE_ADDR;
16021 +               mprem->rsv = 0;
16022 +               addrs_id = &mprem->addrs_id;
16023 +
16024 +               mptcp_for_each_bit_set(opts->remove_addrs, id)
16025 +                       *(addrs_id++) = id;
16026 +
16027 +               /* Fill the rest with NOP's */
16028 +               if (len_align > len) {
16029 +                       int i;
16030 +                       for (i = 0; i < len_align - len; i++)
16031 +                               *(addrs_id++) = TCPOPT_NOP;
16032 +               }
16033 +
16034 +               ptr += len_align >> 2;
16035 +       }
16036 +       if (unlikely(OPTION_MP_FAIL & opts->mptcp_options)) {
16037 +               struct mp_fail *mpfail = (struct mp_fail *)ptr;
16038 +
16039 +               mpfail->kind = TCPOPT_MPTCP;
16040 +               mpfail->len = MPTCP_SUB_LEN_FAIL;
16041 +               mpfail->sub = MPTCP_SUB_FAIL;
16042 +               mpfail->rsv1 = 0;
16043 +               mpfail->rsv2 = 0;
16044 +               mpfail->data_seq = htonll(((u64)opts->data_ack << 32) | opts->data_seq);
16045 +
16046 +               ptr += MPTCP_SUB_LEN_FAIL_ALIGN >> 2;
16047 +       }
16048 +       if (unlikely(OPTION_MP_FCLOSE & opts->mptcp_options)) {
16049 +               struct mp_fclose *mpfclose = (struct mp_fclose *)ptr;
16050 +
16051 +               mpfclose->kind = TCPOPT_MPTCP;
16052 +               mpfclose->len = MPTCP_SUB_LEN_FCLOSE;
16053 +               mpfclose->sub = MPTCP_SUB_FCLOSE;
16054 +               mpfclose->rsv1 = 0;
16055 +               mpfclose->rsv2 = 0;
16056 +               mpfclose->key = opts->mp_capable.receiver_key;
16057 +
16058 +               ptr += MPTCP_SUB_LEN_FCLOSE_ALIGN >> 2;
16059 +       }
16060 +
16061 +       if (OPTION_DATA_ACK & opts->mptcp_options) {
16062 +               if (!mptcp_is_data_seq(skb)) {
16063 +                       struct mp_dss *mdss = (struct mp_dss *)ptr;
16064 +
16065 +                       mdss->kind = TCPOPT_MPTCP;
16066 +                       mdss->sub = MPTCP_SUB_DSS;
16067 +                       mdss->rsv1 = 0;
16068 +                       mdss->rsv2 = 0;
16069 +                       mdss->F = 0;
16070 +                       mdss->m = 0;
16071 +                       mdss->M = 0;
16072 +                       mdss->a = 0;
16073 +                       mdss->A = 1;
16074 +                       mdss->len = mptcp_sub_len_dss(mdss, tp->mpcb->dss_csum);
16075 +
16076 +                       ptr++;
16077 +                       *ptr++ = htonl(opts->data_ack);
16078 +               } else {
16079 +                       /**** Just update the data_ack ****/
16080 +
16081 +                       /* Get pointer to data_ack-field. MPTCP is always at
16082 +                        * the end of the TCP-options.
16083 +                        */
16084 +                       /* TODO if we allow sending 64-bit dseq's we have to change "16" */
16085 +                       __be32 *dack = (__be32 *)(skb->data + (tcp_hdr(skb)->doff << 2) - 16);
16086 +
16087 +                       *dack = htonl(opts->data_ack);
16088 +               }
16089 +       }
16090 +       if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
16091 +               struct mp_prio *mpprio = (struct mp_prio *)ptr;
16092 +
16093 +               mpprio->kind = TCPOPT_MPTCP;
16094 +               mpprio->len = MPTCP_SUB_LEN_PRIO;
16095 +               mpprio->sub = MPTCP_SUB_PRIO;
16096 +               mpprio->rsv = 0;
16097 +               mpprio->b = tp->mptcp->low_prio;
16098 +               mpprio->addr_id = TCPOPT_NOP;
16099 +
16100 +               ptr += MPTCP_SUB_LEN_PRIO_ALIGN >> 2;
16101 +       }
16102 +}
16103 +
16104 +/* Returns the next segment to be sent from the mptcp meta-queue.
16105 + * (chooses the reinject queue if any segment is waiting in it, otherwise,
16106 + * chooses the normal write queue).
16107 + * Sets *@reinject to 1 if the returned segment comes from the
16108 + * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
16109 + * and sets it to -1 if it is a meta-level retransmission to optimize the
16110 + * receive-buffer.
16111 + */
16112 +struct sk_buff *mptcp_next_segment(struct sock *meta_sk, int *reinject)
16113 +{
16114 +       struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
16115 +       struct sk_buff *skb = NULL;
16116 +       if (reinject)
16117 +               *reinject = 0;
16118 +
16119 +       /* If we are in fallback-mode, just take from the meta-send-queue */
16120 +       if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
16121 +               return tcp_send_head(meta_sk);
16122 +
16123 +       skb = skb_peek(&mpcb->reinject_queue);
16124 +
16125 +       if (skb) {
16126 +               if (reinject)
16127 +                       *reinject = 1;
16128 +       } else {
16129 +               skb = tcp_send_head(meta_sk);
16130 +
16131 +               if (!skb && meta_sk->sk_socket &&
16132 +                   test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
16133 +                   sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
16134 +                       struct sock *subsk = get_available_subflow(meta_sk, NULL, NULL);
16135 +                       if (!subsk)
16136 +                               return NULL;
16137 +
16138 +                       skb = mptcp_rcv_buf_optimization(subsk, 0);
16139 +                       if (skb && reinject)
16140 +                               *reinject = -1;
16141 +               }
16142 +       }
16143 +       return skb;
16144 +}
16145 +
16146 +/* Sends the datafin */
16147 +void mptcp_send_fin(struct sock *meta_sk)
16148 +{
16149 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
16150 +       struct sk_buff *skb = tcp_write_queue_tail(meta_sk);
16151 +       int mss_now;
16152 +
16153 +       if ((1 << meta_sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
16154 +               meta_tp->mpcb->passive_close = 1;
16155 +
16156 +       /* Optimization, tack on the FIN if we have a queue of
16157 +        * unsent frames.  But be careful about outgoing SACKS
16158 +        * and IP options.
16159 +        */
16160 +       mss_now = mptcp_current_mss(meta_sk);
16161 +
16162 +       if (tcp_send_head(meta_sk) != NULL) {
16163 +               TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN;
16164 +               TCP_SKB_CB(skb)->end_seq++;
16165 +               meta_tp->write_seq++;
16166 +       } else {
16167 +               /* Socket is locked, keep trying until memory is available. */
16168 +               for (;;) {
16169 +                       skb = alloc_skb_fclone(MAX_TCP_HEADER,
16170 +                                              meta_sk->sk_allocation);
16171 +                       if (skb)
16172 +                               break;
16173 +                       yield();
16174 +               }
16175 +               /* Reserve space for headers and prepare control bits. */
16176 +               skb_reserve(skb, MAX_TCP_HEADER);
16177 +
16178 +               tcp_init_nondata_skb(skb, meta_tp->write_seq, TCPHDR_ACK);
16179 +               TCP_SKB_CB(skb)->end_seq++;
16180 +               TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN | MPTCPHDR_SEQ;
16181 +               tcp_queue_skb(meta_sk, skb);
16182 +       }
16183 +       __tcp_push_pending_frames(meta_sk, mss_now, TCP_NAGLE_OFF);
16184 +}
16185 +
16186 +void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority)
16187 +{
16188 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
16189 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
16190 +       struct sock *sk = NULL, *sk_it = NULL, *tmpsk;
16191 +
16192 +       if (!mpcb->cnt_subflows)
16193 +               return;
16194 +
16195 +       WARN_ON(meta_tp->send_mp_fclose);
16196 +
16197 +       /* First - select a socket */
16198 +       sk = mptcp_select_ack_sock(meta_sk, 0);
16199 +
16200 +       /* May happen if no subflow is in an appropriate state */
16201 +       if (!sk)
16202 +               return;
16203 +
16204 +       /* We are in infinite mode - just send a reset */
16205 +       if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv) {
16206 +               sk->sk_err = ECONNRESET;
16207 +               if (tcp_need_reset(sk->sk_state))
16208 +                       tcp_send_active_reset(sk, priority);
16209 +               mptcp_sub_force_close(sk);
16210 +               return;
16211 +       }
16212 +
16213 +
16214 +       tcp_sk(sk)->send_mp_fclose = 1;
16215 +       /** Reset all other subflows */
16216 +
16217 +       /* tcp_done must be handled with bh disabled */
16218 +       if (!in_serving_softirq())
16219 +               local_bh_disable();
16220 +
16221 +       mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
16222 +               if (tcp_sk(sk_it)->send_mp_fclose)
16223 +                       continue;
16224 +
16225 +               sk_it->sk_err = ECONNRESET;
16226 +               if (tcp_need_reset(sk_it->sk_state))
16227 +                       tcp_send_active_reset(sk_it, GFP_ATOMIC);
16228 +               mptcp_sub_force_close(sk_it);
16229 +       }
16230 +
16231 +       if (!in_serving_softirq())
16232 +               local_bh_enable();
16233 +
16234 +       tcp_send_ack(sk);
16235 +       inet_csk_reset_keepalive_timer(sk, inet_csk(sk)->icsk_rto);
16236 +
16237 +       meta_tp->send_mp_fclose = 1;
16238 +}
16239 +
16240 +static void mptcp_ack_retransmit_timer(struct sock *sk)
16241 +{
16242 +       struct sk_buff *skb;
16243 +       struct tcp_sock *tp = tcp_sk(sk);
16244 +       struct inet_connection_sock *icsk = inet_csk(sk);
16245 +
16246 +       if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
16247 +               goto out; /* Routing failure or similar */
16248 +
16249 +       if (!tp->retrans_stamp)
16250 +               tp->retrans_stamp = tcp_time_stamp ? : 1;
16251 +
16252 +       if (tcp_write_timeout(sk)) {
16253 +               tp->mptcp->pre_established = 0;
16254 +               sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
16255 +               tcp_send_active_reset(sk, GFP_ATOMIC);
16256 +               goto out;
16257 +       }
16258 +
16259 +       skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
16260 +       if (skb == NULL) {
16261 +               sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
16262 +                              jiffies + icsk->icsk_rto);
16263 +               return;
16264 +       }
16265 +
16266 +       /* Reserve space for headers and prepare control bits */
16267 +       skb_reserve(skb, MAX_TCP_HEADER);
16268 +       tcp_init_nondata_skb(skb, tp->snd_una, TCPHDR_ACK);
16269 +
16270 +       TCP_SKB_CB(skb)->when = tcp_time_stamp;
16271 +       if (tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC) > 0) {
16272 +               /* Retransmission failed because of local congestion,
16273 +                * do not backoff.
16274 +                */
16275 +               if (!icsk->icsk_retransmits)
16276 +                       icsk->icsk_retransmits = 1;
16277 +               sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
16278 +                              jiffies + icsk->icsk_rto);
16279 +               return;
16280 +       }
16281 +
16282 +
16283 +       icsk->icsk_retransmits++;
16284 +       icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
16285 +       sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
16286 +                      jiffies + icsk->icsk_rto);
16287 +       if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) {
16288 +               __sk_dst_reset(sk);
16289 +       }
16290 +
16291 +out:;
16292 +}
16293 +
16294 +void mptcp_ack_handler(unsigned long data)
16295 +{
16296 +       struct sock *sk = (struct sock *)data;
16297 +       struct sock *meta_sk = mptcp_meta_sk(sk);
16298 +
16299 +       bh_lock_sock(meta_sk);
16300 +       if (sock_owned_by_user(meta_sk)) {
16301 +               /* Try again later */
16302 +               sk_reset_timer(sk, &tcp_sk(sk)->mptcp->mptcp_ack_timer,
16303 +                              jiffies + (HZ / 20));
16304 +               goto out_unlock;
16305 +       }
16306 +
16307 +       if (sk->sk_state == TCP_CLOSE)
16308 +               goto out_unlock;
16309 +
16310 +       mptcp_ack_retransmit_timer(sk);
16311 +
16312 +       sk_mem_reclaim(sk);
16313 +
16314 +out_unlock:
16315 +       bh_unlock_sock(meta_sk);
16316 +       sock_put(sk);
16317 +}
16318 +
16319 +/* Similar to tcp_retransmit_skb
16320 + *
16321 + * The diff is that we handle the retransmission-stats (retrans_stamp) at the
16322 + * meta-level.
16323 + */
16324 +int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb)
16325 +{
16326 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
16327 +       struct sock *subsk;
16328 +       struct sk_buff *subskb;
16329 +       unsigned int limit, tso_segs, mss_now;
16330 +       int err = -1, oldpcount;
16331 +
16332 +       /* Do not sent more than we queued. 1/4 is reserved for possible
16333 +        * copying overhead: fragmentation, tunneling, mangling etc.
16334 +        *
16335 +        * This is a meta-retransmission thus we check on the meta-socket.
16336 +        */
16337 +       if (atomic_read(&meta_sk->sk_wmem_alloc) >
16338 +           min(meta_sk->sk_wmem_queued + (meta_sk->sk_wmem_queued >> 2), meta_sk->sk_sndbuf)) {
16339 +               return -EAGAIN;
16340 +       }
16341 +
16342 +       /* We need to make sure that the retransmitted segment can be sent on a
16343 +        * subflow right now. If it is too big, it needs to be fragmented.
16344 +        */
16345 +       subsk = get_available_subflow(meta_sk, skb, &mss_now);
16346 +       if (!subsk) {
16347 +               /* We want to increase icsk_retransmits, thus return 0, so that
16348 +                * mptcp_retransmit_timer enters the desired branch.
16349 +                */
16350 +               err = 0;
16351 +               goto failed;
16352 +       }
16353 +
16354 +       /* If the segment was cloned (e.g. a meta retransmission), the header
16355 +        * must be expanded/copied so that there is no corruption of TSO
16356 +        * information.
16357 +        */
16358 +       if (skb_unclone(skb, GFP_ATOMIC)) {
16359 +               err = ENOMEM;
16360 +               goto failed;
16361 +       }
16362 +
16363 +       oldpcount = tcp_skb_pcount(skb);
16364 +       tcp_set_skb_tso_segs(meta_sk, skb, mss_now);
16365 +       tso_segs = tcp_skb_pcount(skb);
16366 +       BUG_ON(!tso_segs);
16367 +
16368 +       /* The MSS might have changed and so the number of segments. We
16369 +        * need to account for this change.
16370 +        */
16371 +       if (unlikely(oldpcount != tso_segs))
16372 +               tcp_adjust_pcount(meta_sk, skb, oldpcount - tso_segs);
16373 +
16374 +       limit = mss_now;
16375 +       if (tso_segs > 1 && !tcp_urg_mode(meta_tp))
16376 +               limit = tcp_mss_split_point(subsk, skb, mss_now,
16377 +                                           min_t(unsigned int,
16378 +                                                 tcp_cwnd_test(tcp_sk(subsk), skb),
16379 +                                                 subsk->sk_gso_max_segs),
16380 +                                                 TCP_NAGLE_OFF);
16381 +
16382 +       if (skb->len > limit &&
16383 +           unlikely(mptso_fragment(meta_sk, skb, limit, mss_now,
16384 +                                   GFP_ATOMIC, 0)))
16385 +               goto failed;
16386 +
16387 +       subskb = mptcp_skb_entail(subsk, skb, -1);
16388 +       if (!subskb)
16389 +               goto failed;
16390 +
16391 +       TCP_SKB_CB(skb)->when = tcp_time_stamp;
16392 +       TCP_SKB_CB(subskb)->when = tcp_time_stamp;
16393 +       err = tcp_transmit_skb(subsk, subskb, 1, GFP_ATOMIC);
16394 +       if (!err) {
16395 +               /* Update global TCP statistics. */
16396 +               TCP_INC_STATS(sock_net(meta_sk), TCP_MIB_RETRANSSEGS);
16397 +
16398 +               /* Diff to tcp_retransmit_skb */
16399 +
16400 +               /* Save stamp of the first retransmit. */
16401 +               if (!meta_tp->retrans_stamp)
16402 +                       meta_tp->retrans_stamp = TCP_SKB_CB(subskb)->when;
16403 +               mptcp_sub_event_new_data_sent(subsk, subskb, skb);
16404 +       } else {
16405 +               mptcp_transmit_skb_failed(subsk, skb, subskb);
16406 +       }
16407 +
16408 +failed:
16409 +       return err;
16410 +}
16411 +
16412 +/* Similar to tcp_retransmit_timer
16413 + *
16414 + * The diff is that we have to handle retransmissions of the FAST_CLOSE-message
16415 + * and that we don't have an srtt estimation at the meta-level.
16416 + */
16417 +void mptcp_retransmit_timer(struct sock *meta_sk)
16418 +{
16419 +       struct tcp_sock *meta_tp = tcp_sk(meta_sk);
16420 +       struct mptcp_cb *mpcb = meta_tp->mpcb;
16421 +       struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
16422 +       int err;
16423 +
16424 +       /* In fallback, retransmission is handled at the subflow-level */
16425 +       if (!meta_tp->packets_out || mpcb->infinite_mapping_snd ||
16426 +           mpcb->send_infinite_mapping)
16427 +               return;
16428 +
16429 +       WARN_ON(tcp_write_queue_empty(meta_sk));
16430 +
16431 +       if (!meta_tp->snd_wnd && !sock_flag(meta_sk, SOCK_DEAD) &&
16432 +           !((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
16433 +               /* Receiver dastardly shrinks window. Our retransmits
16434 +                * become zero probes, but we should not timeout this
16435 +                * connection. If the socket is an orphan, time it out,
16436 +                * we cannot allow such beasts to hang infinitely.
16437 +                */
16438 +               struct inet_sock *meta_inet = inet_sk(meta_sk);
16439 +               if (meta_sk->sk_family == AF_INET) {
16440 +                       LIMIT_NETDEBUG(KERN_DEBUG "MPTCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
16441 +                                      &meta_inet->inet_daddr,
16442 +                                      ntohs(meta_inet->inet_dport),
16443 +                                      meta_inet->inet_num, meta_tp->snd_una,
16444 +                                      meta_tp->snd_nxt);
16445 +               }
16446 +#if IS_ENABLED(CONFIG_IPV6)
16447 +               else if (meta_sk->sk_family == AF_INET6) {
16448 +                       LIMIT_NETDEBUG(KERN_DEBUG "MPTCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
16449 +                                      &meta_sk->sk_v6_daddr,
16450 +                                      ntohs(meta_inet->inet_dport),
16451 +                                      meta_inet->inet_num, meta_tp->snd_una,
16452 +                                      meta_tp->snd_nxt);
16453 +               }
16454 +#endif
16455 +               if (tcp_time_stamp - meta_tp->rcv_tstamp > TCP_RTO_MAX) {
16456 +                       tcp_write_err(meta_sk);
16457 +                       return;
16458 +               }
16459 +
16460 +               mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk));
16461 +               goto out_reset_timer;
16462 +       }
16463 +
16464 +       if (tcp_write_timeout(meta_sk))
16465 +               return;
16466 +
16467 +       if (meta_icsk->icsk_retransmits == 0)
16468 +               NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPTIMEOUTS);
16469 +
16470 +       meta_icsk->icsk_ca_state = TCP_CA_Loss;
16471 +
16472 +       err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk));
16473 +       if (err > 0) {
16474 +               /* Retransmission failed because of local congestion,
16475 +                * do not backoff.
16476 +                */
16477 +               if (!meta_icsk->icsk_retransmits)
16478 +                       meta_icsk->icsk_retransmits = 1;
16479 +               inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS,
16480 +                                         min(meta_icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
16481 +                                         TCP_RTO_MAX);
16482 +               return;
16483 +       }
16484 +
16485 +       /* Increase the timeout each time we retransmit.  Note that
16486 +        * we do not increase the rtt estimate.  rto is initialized
16487 +        * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
16488 +        * that doubling rto each time is the least we can get away with.
16489 +        * In KA9Q, Karn uses this for the first few times, and then
16490 +        * goes to quadratic.  netBSD doubles, but only goes up to *64,
16491 +        * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
16492 +        * defined in the protocol as the maximum possible RTT.  I guess
16493 +        * we'll have to use something other than TCP to talk to the
16494 +        * University of Mars.
16495 +        *
16496 +        * PAWS allows us longer timeouts and large windows, so once
16497 +        * implemented ftp to mars will work nicely. We will have to fix
16498 +        * the 120 second clamps though!
16499 +        */
16500 +       meta_icsk->icsk_backoff++;
16501 +       meta_icsk->icsk_retransmits++;
16502 +
16503 +out_reset_timer:
16504 +       /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
16505 +        * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
16506 +        * might be increased if the stream oscillates between thin and thick,
16507 +        * thus the old value might already be too high compared to the value
16508 +        * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
16509 +        * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
16510 +        * exponential backoff behaviour to avoid continue hammering
16511 +        * linear-timeout retransmissions into a black hole
16512 +        */
16513 +       if (meta_sk->sk_state == TCP_ESTABLISHED &&
16514 +           (meta_tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
16515 +           tcp_stream_is_thin(meta_tp) &&
16516 +           meta_icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
16517 +               meta_icsk->icsk_backoff = 0;
16518 +               /* We cannot do the same as in tcp_write_timer because the
16519 +                * srtt is not set here.
16520 +                */
16521 +               mptcp_set_rto(meta_sk);
16522 +       } else {
16523 +               /* Use normal (exponential) backoff */
16524 +               meta_icsk->icsk_rto = min(meta_icsk->icsk_rto << 1, TCP_RTO_MAX);
16525 +       }
16526 +       inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, meta_icsk->icsk_rto, TCP_RTO_MAX);
16527 +
16528 +       return;
16529 +}
16530 +
16531 +/* Modify values to an mptcp-level for the initial window of new subflows */
16532 +void mptcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
16533 +                               __u32 *window_clamp, int wscale_ok,
16534 +                               __u8 *rcv_wscale, __u32 init_rcv_wnd,
16535 +                                const struct sock *sk)
16536 +{
16537 +       struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
16538 +
16539 +       *window_clamp = mpcb->orig_window_clamp;
16540 +       __space = tcp_win_from_space(mpcb->orig_sk_rcvbuf);
16541 +
16542 +       tcp_select_initial_window(__space, mss, rcv_wnd, window_clamp,
16543 +                                 wscale_ok, rcv_wscale, init_rcv_wnd, sk);
16544 +}
16545 +
16546 +unsigned int mptcp_current_mss(struct sock *meta_sk)
16547 +{
16548 +       unsigned int mss = 0;
16549 +       struct sock *sk;
16550 +
16551 +       mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
16552 +               int this_mss;
16553 +
16554 +               if (!mptcp_sk_can_send(sk))
16555 +                       continue;
16556 +
16557 +               this_mss = tcp_current_mss(sk);
16558 +               if (this_mss > mss)
16559 +                       mss = this_mss;
16560 +       }
16561 +
16562 +       /* If no subflow is available, we take a default-mss from the
16563 +        * meta-socket.
16564 +        */
16565 +       return !mss ? tcp_current_mss(meta_sk) : mss;
16566 +}
16567 +
16568 +int mptcp_select_size(const struct sock *meta_sk, bool sg)
16569 +{
16570 +       int mss = 0; /* We look for the smallest MSS */
16571 +       struct sock *sk;
16572 +
16573 +       mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
16574 +               int this_mss;
16575 +
16576 +               if (!mptcp_sk_can_send(sk))
16577 +                       continue;
16578 +
16579 +               this_mss = tcp_sk(sk)->mss_cache;
16580 +               if (this_mss > mss)
16581 +                       mss = this_mss;
16582 +       }
16583 +
16584 +       if (sg) {
16585 +               if (mptcp_sk_can_gso(meta_sk)) {
16586 +                       mss = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
16587 +               } else {
16588 +                       int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
16589 +
16590 +                       if (mss >= pgbreak &&
16591 +                           mss <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
16592 +                               mss = pgbreak;
16593 +               }
16594 +       }
16595 +
16596 +       return !mss ? tcp_sk(meta_sk)->mss_cache : mss;
16597 +}
16598 +
16599 +int mptcp_check_snd_buf(const struct tcp_sock *tp)
16600 +{
16601 +       struct sock *sk;
16602 +       u32 rtt_max = tp->srtt;
16603 +       u64 bw_est;
16604 +
16605 +       if (!tp->srtt)
16606 +               return tp->reordering + 1;
16607 +
16608 +       mptcp_for_each_sk(tp->mpcb, sk) {
16609 +               if (!mptcp_sk_can_send(sk))
16610 +                       continue;
16611 +
16612 +               if (rtt_max < tcp_sk(sk)->srtt)
16613 +                       rtt_max = tcp_sk(sk)->srtt;
16614 +       }
16615 +
16616 +       bw_est = div64_u64(((u64)tp->snd_cwnd * rtt_max) << 16,
16617 +                               (u64)tp->srtt);
16618 +
16619 +       return max_t(unsigned int, (u32)(bw_est >> 16),
16620 +                       tp->reordering + 1);
16621 +
16622 +}
16623 +
16624 +unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, u32 mss_now,
16625 +                                 int large_allowed)
16626 +{
16627 +       struct sock *sk;
16628 +       u32 xmit_size_goal = 0;
16629 +
16630 +       if (large_allowed && mptcp_sk_can_gso(meta_sk)) {
16631 +               mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
16632 +                       int this_size_goal;
16633 +
16634 +                       if (!mptcp_sk_can_send(sk))
16635 +                               continue;
16636 +
16637 +                       this_size_goal = tcp_xmit_size_goal(sk, mss_now, 1);
16638 +                       if (this_size_goal > xmit_size_goal)
16639 +                               xmit_size_goal = this_size_goal;
16640 +               }
16641 +       }
16642 +
16643 +       return max(xmit_size_goal, mss_now);
16644 +}
16645 +
16646 +/* Similar to tcp_trim_head - but we correctly copy the DSS-option */
16647 +int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
16648 +{
16649 +       int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
16650 +                    MPTCP_SUB_LEN_SEQ_ALIGN;
16651 +       char dss[dsslen];
16652 +
16653 +       /* DSS-option must be recovered afterwards. */
16654 +       memcpy(dss, skb->data - dsslen, dsslen);
16655 +
16656 +       if (skb_cloned(skb)) {
16657 +               /* pskb_expand_head will delete our DSS-option. We have to copy
16658 +                * it back if pskb_expand_head succeeds.
16659 +                */
16660 +
16661 +               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
16662 +                       return -ENOMEM;
16663 +
16664 +               memcpy(skb->data - dsslen, dss, dsslen);
16665 +       }
16666 +
16667 +       __pskb_trim_head(skb, len);
16668 +
16669 +       /* Put the DSS-option back in our header */
16670 +       memcpy(skb->data - dsslen, dss, dsslen);
16671 +
16672 +       TCP_SKB_CB(skb)->seq += len;
16673 +       skb->ip_summed = CHECKSUM_PARTIAL;
16674 +
16675 +       skb->truesize        -= len;
16676 +       sk->sk_wmem_queued   -= len;
16677 +       sk_mem_uncharge(sk, len);
16678 +       sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
16679 +
16680 +       /* Any change of skb->len requires recalculation of tso factor. */
16681 +       if (tcp_skb_pcount(skb) > 1)
16682 +               tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
16683 +
16684 +       return 0;
16685 +}
16686 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_pm.c linux-3.14.45/net/mptcp/mptcp_pm.c
16687 --- linux-3.14.45.orig/net/mptcp/mptcp_pm.c     1970-01-01 01:00:00.000000000 +0100
16688 +++ linux-3.14.45/net/mptcp/mptcp_pm.c  2015-06-24 14:15:48.931862523 +0200
16689 @@ -0,0 +1,170 @@
16690 +/*
16691 + *     MPTCP implementation - MPTCP-subflow-management
16692 + *
16693 + *     Initial Design & Implementation:
16694 + *     Sébastien Barré <sebastien.barre@uclouvain.be>
16695 + *
16696 + *     Current Maintainer & Author:
16697 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
16698 + *
16699 + *     Additional authors:
16700 + *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
16701 + *     Gregory Detal <gregory.detal@uclouvain.be>
16702 + *     Fabien Duchêne <fabien.duchene@uclouvain.be>
16703 + *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
16704 + *     Lavkesh Lahngir <lavkesh51@gmail.com>
16705 + *     Andreas Ripke <ripke@neclab.eu>
16706 + *     Vlad Dogaru <vlad.dogaru@intel.com>
16707 + *     Octavian Purdila <octavian.purdila@intel.com>
16708 + *     John Ronan <jronan@tssg.org>
16709 + *     Catalin Nicutar <catalin.nicutar@gmail.com>
16710 + *     Brandon Heller <brandonh@stanford.edu>
16711 + *
16712 + *
16713 + *     This program is free software; you can redistribute it and/or
16714 + *      modify it under the terms of the GNU General Public License
16715 + *      as published by the Free Software Foundation; either version
16716 + *      2 of the License, or (at your option) any later version.
16717 + */
16718 +
16719 +
16720 +#include <linux/module.h>
16721 +#include <net/mptcp.h>
16722 +
16723 +static DEFINE_SPINLOCK(mptcp_pm_list_lock);
16724 +static LIST_HEAD(mptcp_pm_list);
16725 +
16726 +static int mptcp_default_index(sa_family_t family, union inet_addr *addr,
16727 +                              struct net *net)
16728 +{
16729 +       return 0;
16730 +}
16731 +
16732 +struct mptcp_pm_ops mptcp_pm_default = {
16733 +       .get_local_index = mptcp_default_index,
16734 +       .get_local_id = mptcp_default_index, /* We do not care */
16735 +       .name = "default",
16736 +       .owner = THIS_MODULE,
16737 +};
16738 +
16739 +static struct mptcp_pm_ops *mptcp_pm_find(const char *name)
16740 +{
16741 +       struct mptcp_pm_ops *e;
16742 +
16743 +       list_for_each_entry_rcu(e, &mptcp_pm_list, list) {
16744 +               if (strcmp(e->name, name) == 0)
16745 +                       return e;
16746 +       }
16747 +
16748 +       return NULL;
16749 +}
16750 +
16751 +int mptcp_register_path_manager(struct mptcp_pm_ops *pm)
16752 +{
16753 +       int ret = 0;
16754 +
16755 +       if (!pm->get_local_index || !pm->get_local_id)
16756 +               return -EINVAL;
16757 +
16758 +       spin_lock(&mptcp_pm_list_lock);
16759 +       if (mptcp_pm_find(pm->name)) {
16760 +               pr_notice("%s already registered\n", pm->name);
16761 +               ret = -EEXIST;
16762 +       } else {
16763 +               list_add_tail_rcu(&pm->list, &mptcp_pm_list);
16764 +               pr_info("%s registered\n", pm->name);
16765 +       }
16766 +       spin_unlock(&mptcp_pm_list_lock);
16767 +
16768 +       return ret;
16769 +}
16770 +EXPORT_SYMBOL_GPL(mptcp_register_path_manager);
16771 +
16772 +void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm)
16773 +{
16774 +       spin_lock(&mptcp_pm_list_lock);
16775 +       list_del_rcu(&pm->list);
16776 +       spin_unlock(&mptcp_pm_list_lock);
16777 +}
16778 +EXPORT_SYMBOL_GPL(mptcp_unregister_path_manager);
16779 +
16780 +void mptcp_get_default_path_manager(char *name)
16781 +{
16782 +       struct mptcp_pm_ops *pm;
16783 +
16784 +       BUG_ON(list_empty(&mptcp_pm_list));
16785 +
16786 +       rcu_read_lock();
16787 +       pm = list_entry(mptcp_pm_list.next, struct mptcp_pm_ops, list);
16788 +       strncpy(name, pm->name, MPTCP_PM_NAME_MAX);
16789 +       rcu_read_unlock();
16790 +}
16791 +
16792 +int mptcp_set_default_path_manager(const char *name)
16793 +{
16794 +       struct mptcp_pm_ops *pm;
16795 +       int ret = -ENOENT;
16796 +
16797 +       spin_lock(&mptcp_pm_list_lock);
16798 +       pm = mptcp_pm_find(name);
16799 +#ifdef CONFIG_MODULES
16800 +       if (!pm && capable(CAP_NET_ADMIN)) {
16801 +               spin_unlock(&mptcp_pm_list_lock);
16802 +
16803 +               request_module("mptcp_%s", name);
16804 +               spin_lock(&mptcp_pm_list_lock);
16805 +               pm = mptcp_pm_find(name);
16806 +       }
16807 +#endif
16808 +
16809 +       if (pm) {
16810 +               list_move(&pm->list, &mptcp_pm_list);
16811 +               ret = 0;
16812 +       } else {
16813 +               pr_info("%s is not available\n", name);
16814 +       }
16815 +       spin_unlock(&mptcp_pm_list_lock);
16816 +
16817 +       return ret;
16818 +}
16819 +
16820 +void mptcp_init_path_manager(struct mptcp_cb *mpcb)
16821 +{
16822 +       struct mptcp_pm_ops *pm;
16823 +
16824 +       rcu_read_lock();
16825 +       list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
16826 +               if (try_module_get(pm->owner)) {
16827 +                       mpcb->pm_ops = pm;
16828 +                       break;
16829 +               }
16830 +       }
16831 +       rcu_read_unlock();
16832 +}
16833 +
16834 +/* Manage refcounts on socket close. */
16835 +void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb)
16836 +{
16837 +       module_put(mpcb->pm_ops->owner);
16838 +}
16839 +
16840 +/* Fallback to the default path-manager. */
16841 +void mptcp_fallback_default(struct mptcp_cb *mpcb)
16842 +{
16843 +       struct mptcp_pm_ops *pm;
16844 +
16845 +       mptcp_cleanup_path_manager(mpcb);
16846 +       pm = mptcp_pm_find("default");
16847 +
16848 +       /* Cannot fail - it's the default module */
16849 +       try_module_get(pm->owner);
16850 +       mpcb->pm_ops = pm;
16851 +}
16852 +EXPORT_SYMBOL_GPL(mptcp_fallback_default);
16853 +
16854 +/* Set default value from kernel configuration at bootup */
16855 +static int __init mptcp_path_manager_default(void)
16856 +{
16857 +       return mptcp_set_default_path_manager(CONFIG_DEFAULT_MPTCP_PM);
16858 +}
16859 +late_initcall(mptcp_path_manager_default);
16860 diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_wvegas.c linux-3.14.45/net/mptcp/mptcp_wvegas.c
16861 --- linux-3.14.45.orig/net/mptcp/mptcp_wvegas.c 1970-01-01 01:00:00.000000000 +0100
16862 +++ linux-3.14.45/net/mptcp/mptcp_wvegas.c      2015-06-24 14:15:48.931862523 +0200
16863 @@ -0,0 +1,270 @@
16864 +/*
16865 + *     MPTCP implementation - WEIGHTED VEGAS
16866 + *
16867 + *     Algorithm design:
16868 + *     Yu Cao <cyAnalyst@126.com>
16869 + *     Mingwei Xu <xmw@csnet1.cs.tsinghua.edu.cn>
16870 + *     Xiaoming Fu <fu@cs.uni-goettinggen.de>
16871 + *
16872 + *     Implementation:
16873 + *     Yu Cao <cyAnalyst@126.com>
16874 + *     Enhuan Dong <deh13@mails.tsinghua.edu.cn>
16875 + *
16876 + *     Ported to the official MPTCP-kernel:
16877 + *     Christoph Paasch <christoph.paasch@uclouvain.be>
16878 + *
16879 + *     This program is free software; you can redistribute it and/or
16880 + *     modify it under the terms of the GNU General Public License
16881 + *     as published by the Free Software Foundation; either version
16882 + *     2 of the License, or (at your option) any later version.
16883 + */
16884 +
16885 +#include <linux/skbuff.h>
16886 +#include <net/tcp.h>
16887 +#include <net/mptcp.h>
16888 +#include <linux/module.h>
16889 +#include <linux/tcp.h>
16890 +
16891 +static int initial_alpha = 2;
16892 +static int total_alpha = 10;
16893 +static int gamma = 1;
16894 +
16895 +module_param(initial_alpha, int, 0644);
16896 +MODULE_PARM_DESC(initial_alpha, "initial alpha for all subflows");
16897 +module_param(total_alpha, int, 0644);
16898 +MODULE_PARM_DESC(total_alpha, "total alpha for all subflows");
16899 +module_param(gamma, int, 0644);
16900 +MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
16901 +
16902 +#define MPTCP_WVEGAS_SCALE 16
16903 +
16904 +/* wVegas variables */
16905 +struct wvegas {
16906 +       u32     beg_snd_nxt;    /* right edge during last RTT */
16907 +       u8      doing_wvegas_now;/* if true, do wvegas for this RTT */
16908 +
16909 +       u16     cnt_rtt;                /* # of RTTs measured within last RTT */
16910 +       u32 sampled_rtt; /* cumulative RTTs measured within last RTT (in usec) */
16911 +       u32     base_rtt;       /* the min of all wVegas RTT measurements seen (in usec) */
16912 +
16913 +       u64 instant_rate; /* cwnd / srtt_us, unit: pkts/us * 2^16 */
16914 +       u64 weight; /* the ratio of subflow's rate to the total rate, * 2^16 */
16915 +       int alpha; /* alpha for each subflows */
16916 +
16917 +       u32 queue_delay; /* queue delay*/
16918 +};
16919 +
16920 +
16921 +static inline u64 mptcp_wvegas_scale(u32 val, int scale)
16922 +{
16923 +       return (u64) val << scale;
16924 +}
16925 +
16926 +static void wvegas_enable(struct sock *sk)
16927 +{
16928 +       const struct tcp_sock *tp = tcp_sk(sk);
16929 +       struct wvegas *wvegas = inet_csk_ca(sk);
16930 +
16931 +       wvegas->doing_wvegas_now = 1;
16932 +
16933 +       wvegas->beg_snd_nxt = tp->snd_nxt;
16934 +
16935 +       wvegas->cnt_rtt = 0;
16936 +       wvegas->sampled_rtt = 0;
16937 +
16938 +       wvegas->instant_rate = 0;
16939 +       wvegas->alpha = initial_alpha;
16940 +       wvegas->weight = mptcp_wvegas_scale(1, MPTCP_WVEGAS_SCALE);
16941 +
16942 +       wvegas->queue_delay = 0;
16943 +}
16944 +
16945 +static inline void wvegas_disable(struct sock *sk)
16946 +{
16947 +       struct wvegas *wvegas = inet_csk_ca(sk);
16948 +
16949 +       wvegas->doing_wvegas_now = 0;
16950 +}
16951 +
16952 +static void mptcp_wvegas_init(struct sock *sk)
16953 +{
16954 +       struct wvegas *wvegas = inet_csk_ca(sk);
16955 +
16956 +       wvegas->base_rtt = 0x7fffffff;
16957 +       wvegas_enable(sk);
16958 +}
16959 +
16960 +static inline u64 mptcp_wvegas_rate(u32 cwnd, u32 rtt_us)
16961 +{
16962 +       return div_u64(mptcp_wvegas_scale(cwnd, MPTCP_WVEGAS_SCALE), rtt_us);
16963 +}
16964 +
16965 +static void mptcp_wvegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
16966 +{
16967 +       struct wvegas *wvegas = inet_csk_ca(sk);
16968 +       u32 vrtt;
16969 +
16970 +       if (rtt_us < 0)
16971 +               return;
16972 +
16973 +       vrtt = rtt_us + 1;
16974 +
16975 +       if (vrtt < wvegas->base_rtt)
16976 +               wvegas->base_rtt = vrtt;
16977 +
16978 +       wvegas->sampled_rtt += vrtt;
16979 +       wvegas->cnt_rtt++;
16980 +}
16981 +
16982 +static void mptcp_wvegas_state(struct sock *sk, u8 ca_state)
16983 +{
16984 +       if (ca_state == TCP_CA_Open)
16985 +               wvegas_enable(sk);
16986 +       else
16987 +               wvegas_disable(sk);
16988 +}
16989 +
16990 +static void mptcp_wvegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
16991 +{
16992 +       if (event == CA_EVENT_CWND_RESTART) {
16993 +               mptcp_wvegas_init(sk);
16994 +       } else if (event == CA_EVENT_LOSS) {
16995 +               struct wvegas *wvegas = inet_csk_ca(sk);
16996 +               wvegas->instant_rate = 0;
16997 +       }
16998 +}
16999 +
17000 +static inline u32 mptcp_wvegas_ssthresh(struct tcp_sock *tp)
17001 +{
17002 +       return  min(tp->snd_ssthresh, tp->snd_cwnd - 1);
17003 +}
17004 +
17005 +static u64 mptcp_wvegas_weight(struct mptcp_cb *mpcb, struct sock *sk)
17006 +{
17007 +       u64 total_rate = 0;
17008 +       struct sock *sub_sk;
17009 +       struct wvegas *wvegas = inet_csk_ca(sk);
17010 +
17011 +       if (!mpcb)
17012 +               return wvegas->weight;
17013 +
17014 +
17015 +       mptcp_for_each_sk(mpcb, sub_sk) {
17016 +               struct wvegas *sub_wvegas = inet_csk_ca(sub_sk);
17017 +
17018 +               /* sampled_rtt is initialized by 0 */
17019 +               if (mptcp_sk_can_send(sub_sk) && (sub_wvegas->sampled_rtt > 0))
17020 +                       total_rate += sub_wvegas->instant_rate;
17021 +       }
17022 +
17023 +       if (total_rate && wvegas->instant_rate)
17024 +               return div64_u64(mptcp_wvegas_scale(wvegas->instant_rate, MPTCP_WVEGAS_SCALE), total_rate);
17025 +       else
17026 +               return wvegas->weight;
17027 +}
17028 +
17029 +static void mptcp_wvegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
17030 +{
17031 +       struct tcp_sock *tp = tcp_sk(sk);
17032 +       struct wvegas *wvegas = inet_csk_ca(sk);
17033 +
17034 +       if (!wvegas->doing_wvegas_now) {
17035 +               tcp_reno_cong_avoid(sk, ack, acked, in_flight);
17036 +               return;
17037 +       }
17038 +
17039 +       if (after(ack, wvegas->beg_snd_nxt)) {
17040 +               wvegas->beg_snd_nxt  = tp->snd_nxt;
17041 +
17042 +               if (wvegas->cnt_rtt <= 2) {
17043 +                       tcp_reno_cong_avoid(sk, ack, acked, in_flight);
17044 +               } else {
17045 +                       u32 rtt, diff, q_delay;
17046 +                       u64 target_cwnd;
17047 +
17048 +                       rtt = wvegas->sampled_rtt / wvegas->cnt_rtt;
17049 +                       target_cwnd = div_u64(((u64)tp->snd_cwnd * wvegas->base_rtt), rtt);
17050 +
17051 +                       diff = div_u64((u64)tp->snd_cwnd * (rtt - wvegas->base_rtt), rtt);
17052 +
17053 +                       if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
17054 +                               tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
17055 +                               tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
17056 +
17057 +                       } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
17058 +                               tcp_slow_start(tp, acked);
17059 +                       } else {
17060 +                               if (diff >= wvegas->alpha) {
17061 +                                       wvegas->instant_rate = mptcp_wvegas_rate(tp->snd_cwnd, rtt);
17062 +                                       wvegas->weight = mptcp_wvegas_weight(tp->mpcb, sk);
17063 +                                       wvegas->alpha = max(2U, (u32)((wvegas->weight * total_alpha) >> MPTCP_WVEGAS_SCALE));
17064 +                               }
17065 +                               if (diff > wvegas->alpha) {
17066 +                                       tp->snd_cwnd--;
17067 +                                       tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
17068 +                               } else if (diff < wvegas->alpha) {
17069 +                                       tp->snd_cwnd++;
17070 +                               }
17071 +
17072 +                               /* Try to drain link queue if needed*/
17073 +                               q_delay = rtt - wvegas->base_rtt;
17074 +                               if ((wvegas->queue_delay == 0) || (wvegas->queue_delay > q_delay))
17075 +                                       wvegas->queue_delay = q_delay;
17076 +
17077 +                               if (q_delay >= 2 * wvegas->queue_delay) {
17078 +                                       u32 backoff_factor = div_u64(mptcp_wvegas_scale(wvegas->base_rtt, MPTCP_WVEGAS_SCALE), 2 * rtt);
17079 +                                       tp->snd_cwnd = ((u64)tp->snd_cwnd * backoff_factor) >> MPTCP_WVEGAS_SCALE;
17080 +                                       wvegas->queue_delay = 0;
17081 +                               }
17082 +                       }
17083 +
17084 +                       if (tp->snd_cwnd < 2)
17085 +                               tp->snd_cwnd = 2;
17086 +                       else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
17087 +                               tp->snd_cwnd = tp->snd_cwnd_clamp;
17088 +
17089 +                       tp->snd_ssthresh = tcp_current_ssthresh(sk);
17090 +               }
17091 +
17092 +               wvegas->cnt_rtt = 0;
17093 +               wvegas->sampled_rtt = 0;
17094 +       }
17095 +       /* Use normal slow start */
17096 +       else if (tp->snd_cwnd <= tp->snd_ssthresh)
17097 +               tcp_slow_start(tp, acked);
17098 +}
17099 +
17100 +
17101 +static struct tcp_congestion_ops mptcp_wvegas __read_mostly = {
17102 +       .flags          = TCP_CONG_RTT_STAMP,
17103 +       .init           = mptcp_wvegas_init,
17104 +       .ssthresh       = tcp_reno_ssthresh,
17105 +       .cong_avoid     = mptcp_wvegas_cong_avoid,
17106 +       .min_cwnd       = tcp_reno_min_cwnd,
17107 +       .pkts_acked     = mptcp_wvegas_pkts_acked,
17108 +       .set_state      = mptcp_wvegas_state,
17109 +       .cwnd_event     = mptcp_wvegas_cwnd_event,
17110 +
17111 +       .owner          = THIS_MODULE,
17112 +       .name           = "wvegas",
17113 +};
17114 +
17115 +static int __init mptcp_wvegas_register(void)
17116 +{
17117 +       BUILD_BUG_ON(sizeof(struct wvegas) > ICSK_CA_PRIV_SIZE);
17118 +       tcp_register_congestion_control(&mptcp_wvegas);
17119 +       return 0;
17120 +}
17121 +
17122 +static void __exit mptcp_wvegas_unregister(void)
17123 +{
17124 +       tcp_unregister_congestion_control(&mptcp_wvegas);
17125 +}
17126 +
17127 +module_init(mptcp_wvegas_register);
17128 +module_exit(mptcp_wvegas_unregister);
17129 +
17130 +MODULE_AUTHOR("Yu Cao, Enhuan Dong");
17131 +MODULE_LICENSE("GPL");
17132 +MODULE_DESCRIPTION("MPTCP wVegas");
17133 +MODULE_VERSION("0.1");