target/linux/patches/2.6.30.4/natt.patch

   1 diff -Nur linux-2.6.30.1.orig/include/net/xfrmudp.h linux-2.6.30.1/include/net/xfrmudp.h
   2 --- linux-2.6.30.1.orig/include/net/xfrmudp.h   1970-01-01 01:00:00.000000000 +0100
   3 +++ linux-2.6.30.1/include/net/xfrmudp.h        2009-07-24 22:00:56.771280384 +0200
   4 @@ -0,0 +1,10 @@
   5 +/*
   6 + * pointer to function for type that xfrm4_input wants, to permit
   7 + * decoupling of XFRM from udp.c
   8 + */
   9 +#define HAVE_XFRM4_UDP_REGISTER
  10 +
  11 +typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type);
  12 +extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
  13 +                                     , xfrm4_rcv_encap_t *oldfunc);
  14 +extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func);
  15 diff -Nur linux-2.6.30.1.orig/net/ipv4/Kconfig linux-2.6.30.1/net/ipv4/Kconfig
  16 --- linux-2.6.30.1.orig/net/ipv4/Kconfig        2009-07-03 01:52:38.000000000 +0200
  17 +++ linux-2.6.30.1/net/ipv4/Kconfig     2009-07-24 22:00:56.751278392 +0200
  18 @@ -379,6 +379,12 @@
  19         tristate
  20         default n
  21
  22 +config IPSEC_NAT_TRAVERSAL
  23 +       bool "IPSEC NAT-Traversal (KLIPS compatible)"
  24 +       depends on INET
  25 +       ---help---
  26 +          Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP.
  27 +
  28  config INET_XFRM_MODE_TRANSPORT
  29         tristate "IP: IPsec transport mode"
  30         default y
  31 diff -Nur linux-2.6.30.1.orig/net/ipv4/Kconfig.orig linux-2.6.30.1/net/ipv4/Kconfig.orig
  32 --- linux-2.6.30.1.orig/net/ipv4/Kconfig.orig   1970-01-01 01:00:00.000000000 +0100
  33 +++ linux-2.6.30.1/net/ipv4/Kconfig.orig        2009-07-03 01:52:38.000000000 +0200
  34 @@ -0,0 +1,638 @@
  35 +#
  36 +# IP configuration
  37 +#
  38 +config IP_MULTICAST
  39 +       bool "IP: multicasting"
  40 +       help
  41 +         This is code for addressing several networked computers at once,
  42 +         enlarging your kernel by about 2 KB. You need multicasting if you
  43 +         intend to participate in the MBONE, a high bandwidth network on top
  44 +         of the Internet which carries audio and video broadcasts. More
  45 +         information about the MBONE is on the WWW at
  46 +         <http://www.savetz.com/mbone/>. Information about the multicast
  47 +         capabilities of the various network cards is contained in
  48 +         <file:Documentation/networking/multicast.txt>. For most people, it's
  49 +         safe to say N.
  50 +
  51 +config IP_ADVANCED_ROUTER
  52 +       bool "IP: advanced router"
  53 +       ---help---
  54 +         If you intend to run your Linux box mostly as a router, i.e. as a
  55 +         computer that forwards and redistributes network packets, say Y; you
  56 +         will then be presented with several options that allow more precise
  57 +         control about the routing process.
  58 +
  59 +         The answer to this question won't directly affect the kernel:
  60 +         answering N will just cause the configurator to skip all the
  61 +         questions about advanced routing.
  62 +
  63 +         Note that your box can only act as a router if you enable IP
  64 +         forwarding in your kernel; you can do that by saying Y to "/proc
  65 +         file system support" and "Sysctl support" below and executing the
  66 +         line
  67 +
  68 +         echo "1" > /proc/sys/net/ipv4/ip_forward
  69 +
  70 +         at boot time after the /proc file system has been mounted.
  71 +
  72 +         If you turn on IP forwarding, you should consider the rp_filter, which
  73 +         automatically rejects incoming packets if the routing table entry
  74 +         for their source address doesn't match the network interface they're
  75 +         arriving on. This has security advantages because it prevents the
  76 +         so-called IP spoofing, however it can pose problems if you use
  77 +         asymmetric routing (packets from you to a host take a different path
  78 +         than packets from that host to you) or if you operate a non-routing
  79 +         host which has several IP addresses on different interfaces. To turn
  80 +         rp_filter on use:
  81 +
  82 +         echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
  83 +          and
  84 +         echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
  85 +
  86 +         Note that some distributions enable it in startup scripts.
  87 +         For details about rp_filter strict and loose mode read
  88 +         <file:Documentation/networking/ip-sysctl.txt>.
  89 +
  90 +         If unsure, say N here.
  91 +
  92 +choice
  93 +       prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
  94 +       depends on IP_ADVANCED_ROUTER
  95 +       default ASK_IP_FIB_HASH
  96 +
  97 +config ASK_IP_FIB_HASH
  98 +       bool "FIB_HASH"
  99 +       ---help---
 100 +         Current FIB is very proven and good enough for most users.
 101 +
 102 +config IP_FIB_TRIE
 103 +       bool "FIB_TRIE"
 104 +       ---help---
 105 +         Use new experimental LC-trie as FIB lookup algorithm.
 106 +         This improves lookup performance if you have a large
 107 +         number of routes.
 108 +
 109 +         LC-trie is a longest matching prefix lookup algorithm which
 110 +         performs better than FIB_HASH for large routing tables.
 111 +         But, it consumes more memory and is more complex.
 112 +
 113 +         LC-trie is described in:
 114 +
 115 +         IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
 116 +         IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
 117 +         June 1999
 118 +
 119 +         An experimental study of compression methods for dynamic tries
 120 +         Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
 121 +         http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
 122 +
 123 +endchoice
 124 +
 125 +config IP_FIB_HASH
 126 +       def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
 127 +
 128 +config IP_FIB_TRIE_STATS
 129 +       bool "FIB TRIE statistics"
 130 +       depends on IP_FIB_TRIE
 131 +       ---help---
 132 +         Keep track of statistics on structure of FIB TRIE table.
 133 +         Useful for testing and measuring TRIE performance.
 134 +
 135 +config IP_MULTIPLE_TABLES
 136 +       bool "IP: policy routing"
 137 +       depends on IP_ADVANCED_ROUTER
 138 +       select FIB_RULES
 139 +       ---help---
 140 +         Normally, a router decides what to do with a received packet based
 141 +         solely on the packet's final destination address. If you say Y here,
 142 +         the Linux router will also be able to take the packet's source
 143 +         address into account. Furthermore, the TOS (Type-Of-Service) field
 144 +         of the packet can be used for routing decisions as well.
 145 +
 146 +         If you are interested in this, please see the preliminary
 147 +         documentation at <http://www.compendium.com.ar/policy-routing.txt>
 148 +         and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
 149 +         You will need supporting software from
 150 +         <ftp://ftp.tux.org/pub/net/ip-routing/>.
 151 +
 152 +         If unsure, say N.
 153 +
 154 +config IP_ROUTE_MULTIPATH
 155 +       bool "IP: equal cost multipath"
 156 +       depends on IP_ADVANCED_ROUTER
 157 +       help
 158 +         Normally, the routing tables specify a single action to be taken in
 159 +         a deterministic manner for a given packet. If you say Y here
 160 +         however, it becomes possible to attach several actions to a packet
 161 +         pattern, in effect specifying several alternative paths to travel
 162 +         for those packets. The router considers all these paths to be of
 163 +         equal "cost" and chooses one of them in a non-deterministic fashion
 164 +         if a matching packet arrives.
 165 +
 166 +config IP_ROUTE_VERBOSE
 167 +       bool "IP: verbose route monitoring"
 168 +       depends on IP_ADVANCED_ROUTER
 169 +       help
 170 +         If you say Y here, which is recommended, then the kernel will print
 171 +         verbose messages regarding the routing, for example warnings about
 172 +         received packets which look strange and could be evidence of an
 173 +         attack or a misconfigured system somewhere. The information is
 174 +         handled by the klogd daemon which is responsible for kernel messages
 175 +         ("man klogd").
 176 +
 177 +config IP_PNP
 178 +       bool "IP: kernel level autoconfiguration"
 179 +       help
 180 +         This enables automatic configuration of IP addresses of devices and
 181 +         of the routing table during kernel boot, based on either information
 182 +         supplied on the kernel command line or by BOOTP or RARP protocols.
 183 +         You need to say Y only for diskless machines requiring network
 184 +         access to boot (in which case you want to say Y to "Root file system
 185 +         on NFS" as well), because all other machines configure the network
 186 +         in their startup scripts.
 187 +
 188 +config IP_PNP_DHCP
 189 +       bool "IP: DHCP support"
 190 +       depends on IP_PNP
 191 +       ---help---
 192 +         If you want your Linux box to mount its whole root file system (the
 193 +         one containing the directory /) from some other computer over the
 194 +         net via NFS and you want the IP address of your computer to be
 195 +         discovered automatically at boot time using the DHCP protocol (a
 196 +         special protocol designed for doing this job), say Y here. In case
 197 +         the boot ROM of your network card was designed for booting Linux and
 198 +         does DHCP itself, providing all necessary information on the kernel
 199 +         command line, you can say N here.
 200 +
 201 +         If unsure, say Y. Note that if you want to use DHCP, a DHCP server
 202 +         must be operating on your network.  Read
 203 +         <file:Documentation/filesystems/nfsroot.txt> for details.
 204 +
 205 +config IP_PNP_BOOTP
 206 +       bool "IP: BOOTP support"
 207 +       depends on IP_PNP
 208 +       ---help---
 209 +         If you want your Linux box to mount its whole root file system (the
 210 +         one containing the directory /) from some other computer over the
 211 +         net via NFS and you want the IP address of your computer to be
 212 +         discovered automatically at boot time using the BOOTP protocol (a
 213 +         special protocol designed for doing this job), say Y here. In case
 214 +         the boot ROM of your network card was designed for booting Linux and
 215 +         does BOOTP itself, providing all necessary information on the kernel
 216 +         command line, you can say N here. If unsure, say Y. Note that if you
 217 +         want to use BOOTP, a BOOTP server must be operating on your network.
 218 +         Read <file:Documentation/filesystems/nfsroot.txt> for details.
 219 +
 220 +config IP_PNP_RARP
 221 +       bool "IP: RARP support"
 222 +       depends on IP_PNP
 223 +       help
 224 +         If you want your Linux box to mount its whole root file system (the
 225 +         one containing the directory /) from some other computer over the
 226 +         net via NFS and you want the IP address of your computer to be
 227 +         discovered automatically at boot time using the RARP protocol (an
 228 +         older protocol which is being obsoleted by BOOTP and DHCP), say Y
 229 +         here. Note that if you want to use RARP, a RARP server must be
 230 +         operating on your network. Read
 231 +         <file:Documentation/filesystems/nfsroot.txt> for details.
 232 +
 233 +# not yet ready..
 234 +#   bool '    IP: ARP support' CONFIG_IP_PNP_ARP
 235 +config NET_IPIP
 236 +       tristate "IP: tunneling"
 237 +       select INET_TUNNEL
 238 +       ---help---
 239 +         Tunneling means encapsulating data of one protocol type within
 240 +         another protocol and sending it over a channel that understands the
 241 +         encapsulating protocol. This particular tunneling driver implements
 242 +         encapsulation of IP within IP, which sounds kind of pointless, but
 243 +         can be useful if you want to make your (or some other) machine
 244 +         appear on a different network than it physically is, or to use
 245 +         mobile-IP facilities (allowing laptops to seamlessly move between
 246 +         networks without changing their IP addresses).
 247 +
 248 +         Saying Y to this option will produce two modules ( = code which can
 249 +         be inserted in and removed from the running kernel whenever you
 250 +         want). Most people won't need this and can say N.
 251 +
 252 +config NET_IPGRE
 253 +       tristate "IP: GRE tunnels over IP"
 254 +       help
 255 +         Tunneling means encapsulating data of one protocol type within
 256 +         another protocol and sending it over a channel that understands the
 257 +         encapsulating protocol. This particular tunneling driver implements
 258 +         GRE (Generic Routing Encapsulation) and at this time allows
 259 +         encapsulating of IPv4 or IPv6 over existing IPv4 infrastructure.
 260 +         This driver is useful if the other endpoint is a Cisco router: Cisco
 261 +         likes GRE much better than the other Linux tunneling driver ("IP
 262 +         tunneling" above). In addition, GRE allows multicast redistribution
 263 +         through the tunnel.
 264 +
 265 +config NET_IPGRE_BROADCAST
 266 +       bool "IP: broadcast GRE over IP"
 267 +       depends on IP_MULTICAST && NET_IPGRE
 268 +       help
 269 +         One application of GRE/IP is to construct a broadcast WAN (Wide Area
 270 +         Network), which looks like a normal Ethernet LAN (Local Area
 271 +         Network), but can be distributed all over the Internet. If you want
 272 +         to do that, say Y here and to "IP multicast routing" below.
 273 +
 274 +config IP_MROUTE
 275 +       bool "IP: multicast routing"
 276 +       depends on IP_MULTICAST
 277 +       help
 278 +         This is used if you want your machine to act as a router for IP
 279 +         packets that have several destination addresses. It is needed on the
 280 +         MBONE, a high bandwidth network on top of the Internet which carries
 281 +         audio and video broadcasts. In order to do that, you would most
 282 +         likely run the program mrouted. Information about the multicast
 283 +         capabilities of the various network cards is contained in
 284 +         <file:Documentation/networking/multicast.txt>. If you haven't heard
 285 +         about it, you don't need it.
 286 +
 287 +config IP_PIMSM_V1
 288 +       bool "IP: PIM-SM version 1 support"
 289 +       depends on IP_MROUTE
 290 +       help
 291 +         Kernel side support for Sparse Mode PIM (Protocol Independent
 292 +         Multicast) version 1. This multicast routing protocol is used widely
 293 +         because Cisco supports it. You need special software to use it
 294 +         (pimd-v1). Please see <http://netweb.usc.edu/pim/> for more
 295 +         information about PIM.
 296 +
 297 +         Say Y if you want to use PIM-SM v1. Note that you can say N here if
 298 +         you just want to use Dense Mode PIM.
 299 +
 300 +config IP_PIMSM_V2
 301 +       bool "IP: PIM-SM version 2 support"
 302 +       depends on IP_MROUTE
 303 +       help
 304 +         Kernel side support for Sparse Mode PIM version 2. In order to use
 305 +         this, you need an experimental routing daemon supporting it (pimd or
 306 +         gated-5). This routing protocol is not used widely, so say N unless
 307 +         you want to play with it.
 308 +
 309 +config ARPD
 310 +       bool "IP: ARP daemon support (EXPERIMENTAL)"
 311 +       depends on EXPERIMENTAL
 312 +       ---help---
 313 +         Normally, the kernel maintains an internal cache which maps IP
 314 +         addresses to hardware addresses on the local network, so that
 315 +         Ethernet/Token Ring/ etc. frames are sent to the proper address on
 316 +         the physical networking layer. For small networks having a few
 317 +         hundred directly connected hosts or less, keeping this address
 318 +         resolution (ARP) cache inside the kernel works well. However,
 319 +         maintaining an internal ARP cache does not work well for very large
 320 +         switched networks, and will use a lot of kernel memory if TCP/IP
 321 +         connections are made to many machines on the network.
 322 +
 323 +         If you say Y here, the kernel's internal ARP cache will never grow
 324 +         to more than 256 entries (the oldest entries are expired in a LIFO
 325 +         manner) and communication will be attempted with the user space ARP
 326 +         daemon arpd. Arpd then answers the address resolution request either
 327 +         from its own cache or by asking the net.
 328 +
 329 +         This code is experimental and also obsolete. If you want to use it,
 330 +         you need to find a version of the daemon arpd on the net somewhere,
 331 +         and you should also say Y to "Kernel/User network link driver",
 332 +         below. If unsure, say N.
 333 +
 334 +config SYN_COOKIES
 335 +       bool "IP: TCP syncookie support (disabled per default)"
 336 +       ---help---
 337 +         Normal TCP/IP networking is open to an attack known as "SYN
 338 +         flooding". This denial-of-service attack prevents legitimate remote
 339 +         users from being able to connect to your computer during an ongoing
 340 +         attack and requires very little work from the attacker, who can
 341 +         operate from anywhere on the Internet.
 342 +
 343 +         SYN cookies provide protection against this type of attack. If you
 344 +         say Y here, the TCP/IP stack will use a cryptographic challenge
 345 +         protocol known as "SYN cookies" to enable legitimate users to
 346 +         continue to connect, even when your machine is under attack. There
 347 +         is no need for the legitimate users to change their TCP/IP software;
 348 +         SYN cookies work transparently to them. For technical information
 349 +         about SYN cookies, check out <http://cr.yp.to/syncookies.html>.
 350 +
 351 +         If you are SYN flooded, the source address reported by the kernel is
 352 +         likely to have been forged by the attacker; it is only reported as
 353 +         an aid in tracing the packets to their actual source and should not
 354 +         be taken as absolute truth.
 355 +
 356 +         SYN cookies may prevent correct error reporting on clients when the
 357 +         server is really overloaded. If this happens frequently better turn
 358 +         them off.
 359 +
 360 +         If you say Y here, note that SYN cookies aren't enabled by default;
 361 +         you can enable them by saying Y to "/proc file system support" and
 362 +         "Sysctl support" below and executing the command
 363 +
 364 +         echo 1 >/proc/sys/net/ipv4/tcp_syncookies
 365 +
 366 +         at boot time after the /proc file system has been mounted.
 367 +
 368 +         If unsure, say N.
 369 +
 370 +config INET_AH
 371 +       tristate "IP: AH transformation"
 372 +       select XFRM
 373 +       select CRYPTO
 374 +       select CRYPTO_HMAC
 375 +       select CRYPTO_MD5
 376 +       select CRYPTO_SHA1
 377 +       ---help---
 378 +         Support for IPsec AH.
 379 +
 380 +         If unsure, say Y.
 381 +
 382 +config INET_ESP
 383 +       tristate "IP: ESP transformation"
 384 +       select XFRM
 385 +       select CRYPTO
 386 +       select CRYPTO_AUTHENC
 387 +       select CRYPTO_HMAC
 388 +       select CRYPTO_MD5
 389 +       select CRYPTO_CBC
 390 +       select CRYPTO_SHA1
 391 +       select CRYPTO_DES
 392 +       ---help---
 393 +         Support for IPsec ESP.
 394 +
 395 +         If unsure, say Y.
 396 +
 397 +config INET_IPCOMP
 398 +       tristate "IP: IPComp transformation"
 399 +       select INET_XFRM_TUNNEL
 400 +       select XFRM_IPCOMP
 401 +       ---help---
 402 +         Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 403 +         typically needed for IPsec.
 404 +
 405 +         If unsure, say Y.
 406 +
 407 +config INET_XFRM_TUNNEL
 408 +       tristate
 409 +       select INET_TUNNEL
 410 +       default n
 411 +
 412 +config INET_TUNNEL
 413 +       tristate
 414 +       default n
 415 +
 416 +config INET_XFRM_MODE_TRANSPORT
 417 +       tristate "IP: IPsec transport mode"
 418 +       default y
 419 +       select XFRM
 420 +       ---help---
 421 +         Support for IPsec transport mode.
 422 +
 423 +         If unsure, say Y.
 424 +
 425 +config INET_XFRM_MODE_TUNNEL
 426 +       tristate "IP: IPsec tunnel mode"
 427 +       default y
 428 +       select XFRM
 429 +       ---help---
 430 +         Support for IPsec tunnel mode.
 431 +
 432 +         If unsure, say Y.
 433 +
 434 +config INET_XFRM_MODE_BEET
 435 +       tristate "IP: IPsec BEET mode"
 436 +       default y
 437 +       select XFRM
 438 +       ---help---
 439 +         Support for IPsec BEET mode.
 440 +
 441 +         If unsure, say Y.
 442 +
 443 +config INET_LRO
 444 +       bool "Large Receive Offload (ipv4/tcp)"
 445 +       default y
 446 +       ---help---
 447 +         Support for Large Receive Offload (ipv4/tcp).
 448 +
 449 +         If unsure, say Y.
 450 +
 451 +config INET_DIAG
 452 +       tristate "INET: socket monitoring interface"
 453 +       default y
 454 +       ---help---
 455 +         Support for INET (TCP, DCCP, etc) socket monitoring interface used by
 456 +         native Linux tools such as ss. ss is included in iproute2, currently
 457 +         downloadable at <http://linux-net.osdl.org/index.php/Iproute2>.
 458 +
 459 +         If unsure, say Y.
 460 +
 461 +config INET_TCP_DIAG
 462 +       depends on INET_DIAG
 463 +       def_tristate INET_DIAG
 464 +
 465 +menuconfig TCP_CONG_ADVANCED
 466 +       bool "TCP: advanced congestion control"
 467 +       ---help---
 468 +         Support for selection of various TCP congestion control
 469 +         modules.
 470 +
 471 +         Nearly all users can safely say no here, and a safe default
 472 +         selection will be made (CUBIC with new Reno as a fallback).
 473 +
 474 +         If unsure, say N.
 475 +
 476 +if TCP_CONG_ADVANCED
 477 +
 478 +config TCP_CONG_BIC
 479 +       tristate "Binary Increase Congestion (BIC) control"
 480 +       default m
 481 +       ---help---
 482 +       BIC-TCP is a sender-side only change that ensures a linear RTT
 483 +       fairness under large windows while offering both scalability and
 484 +       bounded TCP-friendliness. The protocol combines two schemes
 485 +       called additive increase and binary search increase. When the
 486 +       congestion window is large, additive increase with a large
 487 +       increment ensures linear RTT fairness as well as good
 488 +       scalability. Under small congestion windows, binary search
 489 +       increase provides TCP friendliness.
 490 +       See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
 491 +
 492 +config TCP_CONG_CUBIC
 493 +       tristate "CUBIC TCP"
 494 +       default y
 495 +       ---help---
 496 +       This is version 2.0 of BIC-TCP which uses a cubic growth function
 497 +       among other techniques.
 498 +       See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
 499 +
 500 +config TCP_CONG_WESTWOOD
 501 +       tristate "TCP Westwood+"
 502 +       default m
 503 +       ---help---
 504 +       TCP Westwood+ is a sender-side only modification of the TCP Reno
 505 +       protocol stack that optimizes the performance of TCP congestion
 506 +       control. It is based on end-to-end bandwidth estimation to set
 507 +       congestion window and slow start threshold after a congestion
 508 +       episode. Using this estimation, TCP Westwood+ adaptively sets a
 509 +       slow start threshold and a congestion window which takes into
 510 +       account the bandwidth used  at the time congestion is experienced.
 511 +       TCP Westwood+ significantly increases fairness wrt TCP Reno in
 512 +       wired networks and throughput over wireless links.
 513 +
 514 +config TCP_CONG_HTCP
 515 +        tristate "H-TCP"
 516 +        default m
 517 +       ---help---
 518 +       H-TCP is a send-side only modifications of the TCP Reno
 519 +       protocol stack that optimizes the performance of TCP
 520 +       congestion control for high speed network links. It uses a
 521 +       modeswitch to change the alpha and beta parameters of TCP Reno
 522 +       based on network conditions and in a way so as to be fair with
 523 +       other Reno and H-TCP flows.
 524 +
 525 +config TCP_CONG_HSTCP
 526 +       tristate "High Speed TCP"
 527 +       depends on EXPERIMENTAL
 528 +       default n
 529 +       ---help---
 530 +       Sally Floyd's High Speed TCP (RFC 3649) congestion control.
 531 +       A modification to TCP's congestion control mechanism for use
 532 +       with large congestion windows. A table indicates how much to
 533 +       increase the congestion window by when an ACK is received.
 534 +       For more detail see http://www.icir.org/floyd/hstcp.html
 535 +
 536 +config TCP_CONG_HYBLA
 537 +       tristate "TCP-Hybla congestion control algorithm"
 538 +       depends on EXPERIMENTAL
 539 +       default n
 540 +       ---help---
 541 +       TCP-Hybla is a sender-side only change that eliminates penalization of
 542 +       long-RTT, large-bandwidth connections, like when satellite legs are
 543 +       involved, especially when sharing a common bottleneck with normal
 544 +       terrestrial connections.
 545 +
 546 +config TCP_CONG_VEGAS
 547 +       tristate "TCP Vegas"
 548 +       depends on EXPERIMENTAL
 549 +       default n
 550 +       ---help---
 551 +       TCP Vegas is a sender-side only change to TCP that anticipates
 552 +       the onset of congestion by estimating the bandwidth. TCP Vegas
 553 +       adjusts the sending rate by modifying the congestion
 554 +       window. TCP Vegas should provide less packet loss, but it is
 555 +       not as aggressive as TCP Reno.
 556 +
 557 +config TCP_CONG_SCALABLE
 558 +       tristate "Scalable TCP"
 559 +       depends on EXPERIMENTAL
 560 +       default n
 561 +       ---help---
 562 +       Scalable TCP is a sender-side only change to TCP which uses a
 563 +       MIMD congestion control algorithm which has some nice scaling
 564 +       properties, though is known to have fairness issues.
 565 +       See http://www.deneholme.net/tom/scalable/
 566 +
 567 +config TCP_CONG_LP
 568 +       tristate "TCP Low Priority"
 569 +       depends on EXPERIMENTAL
 570 +       default n
 571 +       ---help---
 572 +       TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
 573 +       to utilize only the excess network bandwidth as compared to the
 574 +       ``fair share`` of bandwidth as targeted by TCP.
 575 +       See http://www-ece.rice.edu/networks/TCP-LP/
 576 +
 577 +config TCP_CONG_VENO
 578 +       tristate "TCP Veno"
 579 +       depends on EXPERIMENTAL
 580 +       default n
 581 +       ---help---
 582 +       TCP Veno is a sender-side only enhancement of TCP to obtain better
 583 +       throughput over wireless networks. TCP Veno makes use of state
 584 +       distinguishing to circumvent the difficult judgment of the packet loss
 585 +       type. TCP Veno cuts down less congestion window in response to random
 586 +       loss packets.
 587 +       See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 588 +
 589 +config TCP_CONG_YEAH
 590 +       tristate "YeAH TCP"
 591 +       depends on EXPERIMENTAL
 592 +       select TCP_CONG_VEGAS
 593 +       default n
 594 +       ---help---
 595 +       YeAH-TCP is a sender-side high-speed enabled TCP congestion control
 596 +       algorithm, which uses a mixed loss/delay approach to compute the
 597 +       congestion window. It's design goals target high efficiency,
 598 +       internal, RTT and Reno fairness, resilience to link loss while
 599 +       keeping network elements load as low as possible.
 600 +
 601 +       For further details look here:
 602 +         http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
 603 +
 604 +config TCP_CONG_ILLINOIS
 605 +       tristate "TCP Illinois"
 606 +       depends on EXPERIMENTAL
 607 +       default n
 608 +       ---help---
 609 +       TCP-Illinois is a sender-side modification of TCP Reno for
 610 +       high speed long delay links. It uses round-trip-time to
 611 +       adjust the alpha and beta parameters to achieve a higher average
 612 +       throughput and maintain fairness.
 613 +
 614 +       For further details see:
 615 +         http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
 616 +
 617 +choice
 618 +       prompt "Default TCP congestion control"
 619 +       default DEFAULT_CUBIC
 620 +       help
 621 +         Select the TCP congestion control that will be used by default
 622 +         for all connections.
 623 +
 624 +       config DEFAULT_BIC
 625 +               bool "Bic" if TCP_CONG_BIC=y
 626 +
 627 +       config DEFAULT_CUBIC
 628 +               bool "Cubic" if TCP_CONG_CUBIC=y
 629 +
 630 +       config DEFAULT_HTCP
 631 +               bool "Htcp" if TCP_CONG_HTCP=y
 632 +
 633 +       config DEFAULT_VEGAS
 634 +               bool "Vegas" if TCP_CONG_VEGAS=y
 635 +
 636 +       config DEFAULT_WESTWOOD
 637 +               bool "Westwood" if TCP_CONG_WESTWOOD=y
 638 +
 639 +       config DEFAULT_RENO
 640 +               bool "Reno"
 641 +
 642 +endchoice
 643 +
 644 +endif
 645 +
 646 +config TCP_CONG_CUBIC
 647 +       tristate
 648 +       depends on !TCP_CONG_ADVANCED
 649 +       default y
 650 +
 651 +config DEFAULT_TCP_CONG
 652 +       string
 653 +       default "bic" if DEFAULT_BIC
 654 +       default "cubic" if DEFAULT_CUBIC
 655 +       default "htcp" if DEFAULT_HTCP
 656 +       default "vegas" if DEFAULT_VEGAS
 657 +       default "westwood" if DEFAULT_WESTWOOD
 658 +       default "reno" if DEFAULT_RENO
 659 +       default "cubic"
 660 +
 661 +config TCP_MD5SIG
 662 +       bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)"
 663 +       depends on EXPERIMENTAL
 664 +       select CRYPTO
 665 +       select CRYPTO_MD5
 666 +       ---help---
 667 +         RFC2385 specifies a method of giving MD5 protection to TCP sessions.
 668 +         Its main (only?) use is to protect BGP sessions between core routers
 669 +         on the Internet.
 670 +
 671 +         If unsure, say N.
 672 +
 673 diff -Nur linux-2.6.30.1.orig/net/ipv4/udp.c linux-2.6.30.1/net/ipv4/udp.c
 674 --- linux-2.6.30.1.orig/net/ipv4/udp.c  2009-07-03 01:52:38.000000000 +0200
 675 +++ linux-2.6.30.1/net/ipv4/udp.c       2009-07-24 22:00:56.755270521 +0200
 676 @@ -104,6 +104,7 @@
 677  #include <net/route.h>
 678  #include <net/checksum.h>
 679  #include <net/xfrm.h>
 680 +#include <net/xfrmudp.h>
 681  #include "udp_impl.h"
 682
 683  struct udp_table udp_table;
 684 @@ -1035,6 +1036,128 @@
 685         return -1;
 686  }
 687
 688 +#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
 689 +
 690 +static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL;
 691 +
 692 +/*
 693 + * de-encapsulate and pass to the registered xfrm4_rcv_encap_func function.
 694 + * Most of this code stolen from net/ipv4/xfrm4_input.c
 695 + * which is attributed to YOSHIFUJI Hideaki @USAGI, and
 696 + * Derek Atkins <derek@ihtfp.com>
 697 + */
 698 +
 699 +static int xfrm4_udp_encap_rcv_wrapper(struct sock *sk, struct sk_buff *skb)
 700 +{
 701 +       struct udp_sock *up = udp_sk(sk);
 702 +       struct udphdr *uh;
 703 +       struct iphdr *iph;
 704 +       int iphlen, len;
 705 +       int ret;
 706 +
 707 +       __u8 *udpdata;
 708 +       __be32 *udpdata32;
 709 +       __u16 encap_type = up->encap_type;
 710 +
 711 +       /* if this is not encapsulated socket, then just return now */
 712 +       if (!encap_type && !xfrm4_rcv_encap_func)
 713 +               return 1;
 714 +
 715 +       /* If this is a paged skb, make sure we pull up
 716 +        * whatever data we need to look at. */
 717 +       len = skb->len - sizeof(struct udphdr);
 718 +       if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
 719 +               return 1;
 720 +
 721 +       /* Now we can get the pointers */
 722 +       uh = udp_hdr(skb);
 723 +       udpdata = (__u8 *)uh + sizeof(struct udphdr);
 724 +       udpdata32 = (__be32 *)udpdata;
 725 +
 726 +       switch (encap_type) {
 727 +       default:
 728 +       case UDP_ENCAP_ESPINUDP:
 729 +               /* Check if this is a keepalive packet.  If so, eat it. */
 730 +               if (len == 1 && udpdata[0] == 0xff) {
 731 +                       goto drop;
 732 +               } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 733 +                       /* ESP Packet without Non-ESP header */
 734 +                       len = sizeof(struct udphdr);
 735 +               } else
 736 +                       /* Must be an IKE packet.. pass it through */
 737 +                       return 1;
 738 +               break;
 739 +       case UDP_ENCAP_ESPINUDP_NON_IKE:
 740 +               /* Check if this is a keepalive packet.  If so, eat it. */
 741 +               if (len == 1 && udpdata[0] == 0xff) {
 742 +                       goto drop;
 743 +               } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 744 +                          udpdata32[0] == 0 && udpdata32[1] == 0) {
 745 +
 746 +                       /* ESP Packet with Non-IKE marker */
 747 +                       len = sizeof(struct udphdr) + 2 * sizeof(u32);
 748 +               } else
 749 +                       /* Must be an IKE packet.. pass it through */
 750 +                       return 1;
 751 +               break;
 752 +       }
 753 +
 754 +       /* At this point we are sure that this is an ESPinUDP packet,
 755 +        * so we need to remove 'len' bytes from the packet (the UDP
 756 +        * header and optional ESP marker bytes) and then modify the
 757 +        * protocol to ESP, and then call into the transform receiver.
 758 +        */
 759 +       if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 760 +               goto drop;
 761 +
 762 +       /* Now we can update and verify the packet length... */
 763 +       iph = ip_hdr(skb);
 764 +       iphlen = iph->ihl << 2;
 765 +       iph->tot_len = htons(ntohs(iph->tot_len) - len);
 766 +       if (skb->len < iphlen + len) {
 767 +               /* packet is too small!?! */
 768 +               goto drop;
 769 +       }
 770 +
 771 +       /* pull the data buffer up to the ESP header and set the
 772 +        * transport header to point to ESP.  Keep UDP on the stack
 773 +        * for later.
 774 +        */
 775 +       __skb_pull(skb, len);
 776 +       skb_reset_transport_header(skb);
 777 +
 778 +       /* modify the protocol (it's ESP!) */
 779 +       iph->protocol = IPPROTO_ESP;
 780 +
 781 +       /* process ESP */
 782 +       ret = (*xfrm4_rcv_encap_func)(skb, encap_type);
 783 +       return ret;
 784 +
 785 +drop:
 786 +       kfree_skb(skb);
 787 +       return 0;
 788 +}
 789 +
 790 +int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func,
 791 +               xfrm4_rcv_encap_t *oldfunc)
 792 +{
 793 +       if (oldfunc != NULL)
 794 +               *oldfunc = xfrm4_rcv_encap_func;
 795 +       xfrm4_rcv_encap_func = func;
 796 +       return 0;
 797 +}
 798 +
 799 +int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func)
 800 +{
 801 +       if (xfrm4_rcv_encap_func != func)
 802 +               return -1;
 803 +
 804 +       xfrm4_rcv_encap_func = NULL;
 805 +       return 0;
 806 +}
 807 +
 808 +#endif /* CONFIG_XFRM_MODULE || CONFIG_IPSEC_NAT_TRAVERSAL */
 809 +
 810  /* returns:
 811   *  -1: error
 812   *   0: success
 813 @@ -1377,6 +1500,11 @@
 814                 case 0:
 815                 case UDP_ENCAP_ESPINUDP:
 816                 case UDP_ENCAP_ESPINUDP_NON_IKE:
 817 +#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
 818 +                       if (xfrm4_rcv_encap_func)
 819 +                               up->encap_rcv = xfrm4_udp_encap_rcv_wrapper;
 820 +                       else
 821 +#endif
 822                         up->encap_rcv = xfrm4_udp_encap_rcv;
 823                         /* FALLTHROUGH */
 824                 case UDP_ENCAP_L2TPINUDP:
 825 @@ -1828,3 +1956,9 @@
 826  EXPORT_SYMBOL(udp_proc_register);
 827  EXPORT_SYMBOL(udp_proc_unregister);
 828  #endif
 829 +
 830 +#if defined(CONFIG_IPSEC_NAT_TRAVERSAL)
 831 +EXPORT_SYMBOL(udp4_register_esp_rcvencap);
 832 +EXPORT_SYMBOL(udp4_unregister_esp_rcvencap);
 833 +#endif
 834 +
 835 diff -Nur linux-2.6.30.1.orig/net/ipv4/udp.c.orig linux-2.6.30.1/net/ipv4/udp.c.orig
 836 --- linux-2.6.30.1.orig/net/ipv4/udp.c.orig     1970-01-01 01:00:00.000000000 +0100
 837 +++ linux-2.6.30.1/net/ipv4/udp.c.orig  2009-07-03 01:52:38.000000000 +0200
 838 @@ -0,0 +1,1830 @@
 839 +/*
 840 + * INET                An implementation of the TCP/IP protocol suite for the LINUX
 841 + *             operating system.  INET is implemented using the  BSD Socket
 842 + *             interface as the means of communication with the user level.
 843 + *
 844 + *             The User Datagram Protocol (UDP).
 845 + *
 846 + * Authors:    Ross Biro
 847 + *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 848 + *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 849 + *             Alan Cox, <alan@lxorguk.ukuu.org.uk>
 850 + *             Hirokazu Takahashi, <taka@valinux.co.jp>
 851 + *
 852 + * Fixes:
 853 + *             Alan Cox        :       verify_area() calls
 854 + *             Alan Cox        :       stopped close while in use off icmp
 855 + *                                     messages. Not a fix but a botch that
 856 + *                                     for udp at least is 'valid'.
 857 + *             Alan Cox        :       Fixed icmp handling properly
 858 + *             Alan Cox        :       Correct error for oversized datagrams
 859 + *             Alan Cox        :       Tidied select() semantics.
 860 + *             Alan Cox        :       udp_err() fixed properly, also now
 861 + *                                     select and read wake correctly on errors
 862 + *             Alan Cox        :       udp_send verify_area moved to avoid mem leak
 863 + *             Alan Cox        :       UDP can count its memory
 864 + *             Alan Cox        :       send to an unknown connection causes
 865 + *                                     an ECONNREFUSED off the icmp, but
 866 + *                                     does NOT close.
 867 + *             Alan Cox        :       Switched to new sk_buff handlers. No more backlog!
 868 + *             Alan Cox        :       Using generic datagram code. Even smaller and the PEEK
 869 + *                                     bug no longer crashes it.
 870 + *             Fred Van Kempen :       Net2e support for sk->broadcast.
 871 + *             Alan Cox        :       Uses skb_free_datagram
 872 + *             Alan Cox        :       Added get/set sockopt support.
 873 + *             Alan Cox        :       Broadcasting without option set returns EACCES.
 874 + *             Alan Cox        :       No wakeup calls. Instead we now use the callbacks.
 875 + *             Alan Cox        :       Use ip_tos and ip_ttl
 876 + *             Alan Cox        :       SNMP Mibs
 877 + *             Alan Cox        :       MSG_DONTROUTE, and 0.0.0.0 support.
 878 + *             Matt Dillon     :       UDP length checks.
 879 + *             Alan Cox        :       Smarter af_inet used properly.
 880 + *             Alan Cox        :       Use new kernel side addressing.
 881 + *             Alan Cox        :       Incorrect return on truncated datagram receive.
 882 + *     Arnt Gulbrandsen        :       New udp_send and stuff
 883 + *             Alan Cox        :       Cache last socket
 884 + *             Alan Cox        :       Route cache
 885 + *             Jon Peatfield   :       Minor efficiency fix to sendto().
 886 + *             Mike Shaver     :       RFC1122 checks.
 887 + *             Alan Cox        :       Nonblocking error fix.
 888 + *     Willy Konynenberg       :       Transparent proxying support.
 889 + *             Mike McLagan    :       Routing by source
 890 + *             David S. Miller :       New socket lookup architecture.
 891 + *                                     Last socket cache retained as it
 892 + *                                     does have a high hit rate.
 893 + *             Olaf Kirch      :       Don't linearise iovec on sendmsg.
 894 + *             Andi Kleen      :       Some cleanups, cache destination entry
 895 + *                                     for connect.
 896 + *     Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
 897 + *             Melvin Smith    :       Check msg_name not msg_namelen in sendto(),
 898 + *                                     return ENOTCONN for unconnected sockets (POSIX)
 899 + *             Janos Farkas    :       don't deliver multi/broadcasts to a different
 900 + *                                     bound-to-device socket
 901 + *     Hirokazu Takahashi      :       HW checksumming for outgoing UDP
 902 + *                                     datagrams.
 903 + *     Hirokazu Takahashi      :       sendfile() on UDP works now.
 904 + *             Arnaldo C. Melo :       convert /proc/net/udp to seq_file
 905 + *     YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
 906 + *     Alexey Kuznetsov:               allow both IPv4 and IPv6 sockets to bind
 907 + *                                     a single port at the same time.
 908 + *     Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
 909 + *     James Chapman           :       Add L2TP encapsulation type.
 910 + *
 911 + *
 912 + *             This program is free software; you can redistribute it and/or
 913 + *             modify it under the terms of the GNU General Public License
 914 + *             as published by the Free Software Foundation; either version
 915 + *             2 of the License, or (at your option) any later version.
 916 + */
 917 +
 918 +#include <asm/system.h>
 919 +#include <asm/uaccess.h>
 920 +#include <asm/ioctls.h>
 921 +#include <linux/bootmem.h>
 922 +#include <linux/highmem.h>
 923 +#include <linux/swap.h>
 924 +#include <linux/types.h>
 925 +#include <linux/fcntl.h>
 926 +#include <linux/module.h>
 927 +#include <linux/socket.h>
 928 +#include <linux/sockios.h>
 929 +#include <linux/igmp.h>
 930 +#include <linux/in.h>
 931 +#include <linux/errno.h>
 932 +#include <linux/timer.h>
 933 +#include <linux/mm.h>
 934 +#include <linux/inet.h>
 935 +#include <linux/netdevice.h>
 936 +#include <net/tcp_states.h>
 937 +#include <linux/skbuff.h>
 938 +#include <linux/proc_fs.h>
 939 +#include <linux/seq_file.h>
 940 +#include <net/net_namespace.h>
 941 +#include <net/icmp.h>
 942 +#include <net/route.h>
 943 +#include <net/checksum.h>
 944 +#include <net/xfrm.h>
 945 +#include "udp_impl.h"
 946 +
 947 +struct udp_table udp_table;
 948 +EXPORT_SYMBOL(udp_table);
 949 +
 950 +int sysctl_udp_mem[3] __read_mostly;
 951 +int sysctl_udp_rmem_min __read_mostly;
 952 +int sysctl_udp_wmem_min __read_mostly;
 953 +
 954 +EXPORT_SYMBOL(sysctl_udp_mem);
 955 +EXPORT_SYMBOL(sysctl_udp_rmem_min);
 956 +EXPORT_SYMBOL(sysctl_udp_wmem_min);
 957 +
 958 +atomic_t udp_memory_allocated;
 959 +EXPORT_SYMBOL(udp_memory_allocated);
 960 +
 961 +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
 962 +
 963 +static int udp_lib_lport_inuse(struct net *net, __u16 num,
 964 +                              const struct udp_hslot *hslot,
 965 +                              unsigned long *bitmap,
 966 +                              struct sock *sk,
 967 +                              int (*saddr_comp)(const struct sock *sk1,
 968 +                                                const struct sock *sk2))
 969 +{
 970 +       struct sock *sk2;
 971 +       struct hlist_nulls_node *node;
 972 +
 973 +       sk_nulls_for_each(sk2, node, &hslot->head)
 974 +               if (net_eq(sock_net(sk2), net)                  &&
 975 +                   sk2 != sk                                   &&
 976 +                   (bitmap || sk2->sk_hash == num)             &&
 977 +                   (!sk2->sk_reuse || !sk->sk_reuse)           &&
 978 +                   (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
 979 +                       || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 980 +                   (*saddr_comp)(sk, sk2)) {
 981 +                       if (bitmap)
 982 +                               __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
 983 +                                         bitmap);
 984 +                       else
 985 +                               return 1;
 986 +               }
 987 +       return 0;
 988 +}
 989 +
 990 +/**
 991 + *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
 992 + *
 993 + *  @sk:          socket struct in question
 994 + *  @snum:        port number to look up
 995 + *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
 996 + */
 997 +int udp_lib_get_port(struct sock *sk, unsigned short snum,
 998 +                      int (*saddr_comp)(const struct sock *sk1,
 999 +                                        const struct sock *sk2 )    )
1000 +{
1001 +       struct udp_hslot *hslot;
1002 +       struct udp_table *udptable = sk->sk_prot->h.udp_table;
1003 +       int    error = 1;
1004 +       struct net *net = sock_net(sk);
1005 +
1006 +       if (!snum) {
1007 +               int low, high, remaining;
1008 +               unsigned rand;
1009 +               unsigned short first, last;
1010 +               DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
1011 +
1012 +               inet_get_local_port_range(&low, &high);
1013 +               remaining = (high - low) + 1;
1014 +
1015 +               rand = net_random();
1016 +               first = (((u64)rand * remaining) >> 32) + low;
1017 +               /*
1018 +                * force rand to be an odd multiple of UDP_HTABLE_SIZE
1019 +                */
1020 +               rand = (rand | 1) * UDP_HTABLE_SIZE;
1021 +               for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
1022 +                       hslot = &udptable->hash[udp_hashfn(net, first)];
1023 +                       bitmap_zero(bitmap, PORTS_PER_CHAIN);
1024 +                       spin_lock_bh(&hslot->lock);
1025 +                       udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
1026 +                                           saddr_comp);
1027 +
1028 +                       snum = first;
1029 +                       /*
1030 +                        * Iterate on all possible values of snum for this hash.
1031 +                        * Using steps of an odd multiple of UDP_HTABLE_SIZE
1032 +                        * give us randomization and full range coverage.
1033 +                        */
1034 +                       do {
1035 +                               if (low <= snum && snum <= high &&
1036 +                                   !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
1037 +                                       goto found;
1038 +                               snum += rand;
1039 +                       } while (snum != first);
1040 +                       spin_unlock_bh(&hslot->lock);
1041 +               }
1042 +               goto fail;
1043 +       } else {
1044 +               hslot = &udptable->hash[udp_hashfn(net, snum)];
1045 +               spin_lock_bh(&hslot->lock);
1046 +               if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
1047 +                       goto fail_unlock;
1048 +       }
1049 +found:
1050 +       inet_sk(sk)->num = snum;
1051 +       sk->sk_hash = snum;
1052 +       if (sk_unhashed(sk)) {
1053 +               sk_nulls_add_node_rcu(sk, &hslot->head);
1054 +               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
1055 +       }
1056 +       error = 0;
1057 +fail_unlock:
1058 +       spin_unlock_bh(&hslot->lock);
1059 +fail:
1060 +       return error;
1061 +}
1062 +
1063 +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
1064 +{
1065 +       struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
1066 +
1067 +       return  ( !ipv6_only_sock(sk2)  &&
1068 +                 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
1069 +                  inet1->rcv_saddr == inet2->rcv_saddr      ));
1070 +}
1071 +
1072 +int udp_v4_get_port(struct sock *sk, unsigned short snum)
1073 +{
1074 +       return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
1075 +}
1076 +
1077 +static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
1078 +                        unsigned short hnum,
1079 +                        __be16 sport, __be32 daddr, __be16 dport, int dif)
1080 +{
1081 +       int score = -1;
1082 +
1083 +       if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
1084 +                       !ipv6_only_sock(sk)) {
1085 +               struct inet_sock *inet = inet_sk(sk);
1086 +
1087 +               score = (sk->sk_family == PF_INET ? 1 : 0);
1088 +               if (inet->rcv_saddr) {
1089 +                       if (inet->rcv_saddr != daddr)
1090 +                               return -1;
1091 +                       score += 2;
1092 +               }
1093 +               if (inet->daddr) {
1094 +                       if (inet->daddr != saddr)
1095 +                               return -1;
1096 +                       score += 2;
1097 +               }
1098 +               if (inet->dport) {
1099 +                       if (inet->dport != sport)
1100 +                               return -1;
1101 +                       score += 2;
1102 +               }
1103 +               if (sk->sk_bound_dev_if) {
1104 +                       if (sk->sk_bound_dev_if != dif)
1105 +                               return -1;
1106 +                       score += 2;
1107 +               }
1108 +       }
1109 +       return score;
1110 +}
1111 +
1112 +/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
1113 + * harder than this. -DaveM
1114 + */
1115 +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
1116 +               __be16 sport, __be32 daddr, __be16 dport,
1117 +               int dif, struct udp_table *udptable)
1118 +{
1119 +       struct sock *sk, *result;
1120 +       struct hlist_nulls_node *node;
1121 +       unsigned short hnum = ntohs(dport);
1122 +       unsigned int hash = udp_hashfn(net, hnum);
1123 +       struct udp_hslot *hslot = &udptable->hash[hash];
1124 +       int score, badness;
1125 +
1126 +       rcu_read_lock();
1127 +begin:
1128 +       result = NULL;
1129 +       badness = -1;
1130 +       sk_nulls_for_each_rcu(sk, node, &hslot->head) {
1131 +               score = compute_score(sk, net, saddr, hnum, sport,
1132 +                                     daddr, dport, dif);
1133 +               if (score > badness) {
1134 +                       result = sk;
1135 +                       badness = score;
1136 +               }
1137 +       }
1138 +       /*
1139 +        * if the nulls value we got at the end of this lookup is
1140 +        * not the expected one, we must restart lookup.
1141 +        * We probably met an item that was moved to another chain.
1142 +        */
1143 +       if (get_nulls_value(node) != hash)
1144 +               goto begin;
1145 +
1146 +       if (result) {
1147 +               if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
1148 +                       result = NULL;
1149 +               else if (unlikely(compute_score(result, net, saddr, hnum, sport,
1150 +                                 daddr, dport, dif) < badness)) {
1151 +                       sock_put(result);
1152 +                       goto begin;
1153 +               }
1154 +       }
1155 +       rcu_read_unlock();
1156 +       return result;
1157 +}
1158 +
1159 +static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
1160 +                                                __be16 sport, __be16 dport,
1161 +                                                struct udp_table *udptable)
1162 +{
1163 +       struct sock *sk;
1164 +       const struct iphdr *iph = ip_hdr(skb);
1165 +
1166 +       if (unlikely(sk = skb_steal_sock(skb)))
1167 +               return sk;
1168 +       else
1169 +               return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
1170 +                                        iph->daddr, dport, inet_iif(skb),
1171 +                                        udptable);
1172 +}
1173 +
1174 +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
1175 +                            __be32 daddr, __be16 dport, int dif)
1176 +{
1177 +       return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
1178 +}
1179 +EXPORT_SYMBOL_GPL(udp4_lib_lookup);
1180 +
1181 +static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
1182 +                                            __be16 loc_port, __be32 loc_addr,
1183 +                                            __be16 rmt_port, __be32 rmt_addr,
1184 +                                            int dif)
1185 +{
1186 +       struct hlist_nulls_node *node;
1187 +       struct sock *s = sk;
1188 +       unsigned short hnum = ntohs(loc_port);
1189 +
1190 +       sk_nulls_for_each_from(s, node) {
1191 +               struct inet_sock *inet = inet_sk(s);
1192 +
1193 +               if (!net_eq(sock_net(s), net)                           ||
1194 +                   s->sk_hash != hnum                                  ||
1195 +                   (inet->daddr && inet->daddr != rmt_addr)            ||
1196 +                   (inet->dport != rmt_port && inet->dport)            ||
1197 +                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
1198 +                   ipv6_only_sock(s)                                   ||
1199 +                   (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
1200 +                       continue;
1201 +               if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
1202 +                       continue;
1203 +               goto found;
1204 +       }
1205 +       s = NULL;
1206 +found:
1207 +       return s;
1208 +}
1209 +
1210 +/*
1211 + * This routine is called by the ICMP module when it gets some
1212 + * sort of error condition.  If err < 0 then the socket should
1213 + * be closed and the error returned to the user.  If err > 0
1214 + * it's just the icmp type << 8 | icmp code.
1215 + * Header points to the ip header of the error packet. We move
1216 + * on past this. Then (as it used to claim before adjustment)
1217 + * header points to the first 8 bytes of the udp header.  We need
1218 + * to find the appropriate port.
1219 + */
1220 +
1221 +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
1222 +{
1223 +       struct inet_sock *inet;
1224 +       struct iphdr *iph = (struct iphdr*)skb->data;
1225 +       struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
1226 +       const int type = icmp_hdr(skb)->type;
1227 +       const int code = icmp_hdr(skb)->code;
1228 +       struct sock *sk;
1229 +       int harderr;
1230 +       int err;
1231 +       struct net *net = dev_net(skb->dev);
1232 +
1233 +       sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
1234 +                       iph->saddr, uh->source, skb->dev->ifindex, udptable);
1235 +       if (sk == NULL) {
1236 +               ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1237 +               return; /* No socket for error */
1238 +       }
1239 +
1240 +       err = 0;
1241 +       harderr = 0;
1242 +       inet = inet_sk(sk);
1243 +
1244 +       switch (type) {
1245 +       default:
1246 +       case ICMP_TIME_EXCEEDED:
1247 +               err = EHOSTUNREACH;
1248 +               break;
1249 +       case ICMP_SOURCE_QUENCH:
1250 +               goto out;
1251 +       case ICMP_PARAMETERPROB:
1252 +               err = EPROTO;
1253 +               harderr = 1;
1254 +               break;
1255 +       case ICMP_DEST_UNREACH:
1256 +               if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
1257 +                       if (inet->pmtudisc != IP_PMTUDISC_DONT) {
1258 +                               err = EMSGSIZE;
1259 +                               harderr = 1;
1260 +                               break;
1261 +                       }
1262 +                       goto out;
1263 +               }
1264 +               err = EHOSTUNREACH;
1265 +               if (code <= NR_ICMP_UNREACH) {
1266 +                       harderr = icmp_err_convert[code].fatal;
1267 +                       err = icmp_err_convert[code].errno;
1268 +               }
1269 +               break;
1270 +       }
1271 +
1272 +       /*
1273 +        *      RFC1122: OK.  Passes ICMP errors back to application, as per
1274 +        *      4.1.3.3.
1275 +        */
1276 +       if (!inet->recverr) {
1277 +               if (!harderr || sk->sk_state != TCP_ESTABLISHED)
1278 +                       goto out;
1279 +       } else {
1280 +               ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
1281 +       }
1282 +       sk->sk_err = err;
1283 +       sk->sk_error_report(sk);
1284 +out:
1285 +       sock_put(sk);
1286 +}
1287 +
1288 +void udp_err(struct sk_buff *skb, u32 info)
1289 +{
1290 +       __udp4_lib_err(skb, info, &udp_table);
1291 +}
1292 +
1293 +/*
1294 + * Throw away all pending data and cancel the corking. Socket is locked.
1295 + */
1296 +void udp_flush_pending_frames(struct sock *sk)
1297 +{
1298 +       struct udp_sock *up = udp_sk(sk);
1299 +
1300 +       if (up->pending) {
1301 +               up->len = 0;
1302 +               up->pending = 0;
1303 +               ip_flush_pending_frames(sk);
1304 +       }
1305 +}
1306 +EXPORT_SYMBOL(udp_flush_pending_frames);
1307 +
1308 +/**
1309 + *     udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
1310 + *     @sk:    socket we are sending on
1311 + *     @skb:   sk_buff containing the filled-in UDP header
1312 + *             (checksum field must be zeroed out)
1313 + */
1314 +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
1315 +                                __be32 src, __be32 dst, int len      )
1316 +{
1317 +       unsigned int offset;
1318 +       struct udphdr *uh = udp_hdr(skb);
1319 +       __wsum csum = 0;
1320 +
1321 +       if (skb_queue_len(&sk->sk_write_queue) == 1) {
1322 +               /*
1323 +                * Only one fragment on the socket.
1324 +                */
1325 +               skb->csum_start = skb_transport_header(skb) - skb->head;
1326 +               skb->csum_offset = offsetof(struct udphdr, check);
1327 +               uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
1328 +       } else {
1329 +               /*
1330 +                * HW-checksum won't work as there are two or more
1331 +                * fragments on the socket so that all csums of sk_buffs
1332 +                * should be together
1333 +                */
1334 +               offset = skb_transport_offset(skb);
1335 +               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
1336 +
1337 +               skb->ip_summed = CHECKSUM_NONE;
1338 +
1339 +               skb_queue_walk(&sk->sk_write_queue, skb) {
1340 +                       csum = csum_add(csum, skb->csum);
1341 +               }
1342 +
1343 +               uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
1344 +               if (uh->check == 0)
1345 +                       uh->check = CSUM_MANGLED_0;
1346 +       }
1347 +}
1348 +
1349 +/*
1350 + * Push out all pending data as one UDP datagram. Socket is locked.
1351 + */
1352 +static int udp_push_pending_frames(struct sock *sk)
1353 +{
1354 +       struct udp_sock  *up = udp_sk(sk);
1355 +       struct inet_sock *inet = inet_sk(sk);
1356 +       struct flowi *fl = &inet->cork.fl;
1357 +       struct sk_buff *skb;
1358 +       struct udphdr *uh;
1359 +       int err = 0;
1360 +       int is_udplite = IS_UDPLITE(sk);
1361 +       __wsum csum = 0;
1362 +
1363 +       /* Grab the skbuff where UDP header space exists. */
1364 +       if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
1365 +               goto out;
1366 +
1367 +       /*
1368 +        * Create a UDP header
1369 +        */
1370 +       uh = udp_hdr(skb);
1371 +       uh->source = fl->fl_ip_sport;
1372 +       uh->dest = fl->fl_ip_dport;
1373 +       uh->len = htons(up->len);
1374 +       uh->check = 0;
1375 +
1376 +       if (is_udplite)                                  /*     UDP-Lite      */
1377 +               csum  = udplite_csum_outgoing(sk, skb);
1378 +
1379 +       else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
1380 +
1381 +               skb->ip_summed = CHECKSUM_NONE;
1382 +               goto send;
1383 +
1384 +       } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
1385 +
1386 +               udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
1387 +               goto send;
1388 +
1389 +       } else                                           /*   `normal' UDP    */
1390 +               csum = udp_csum_outgoing(sk, skb);
1391 +
1392 +       /* add protocol-dependent pseudo-header */
1393 +       uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
1394 +                                     sk->sk_protocol, csum             );
1395 +       if (uh->check == 0)
1396 +               uh->check = CSUM_MANGLED_0;
1397 +
1398 +send:
1399 +       err = ip_push_pending_frames(sk);
1400 +out:
1401 +       up->len = 0;
1402 +       up->pending = 0;
1403 +       if (!err)
1404 +               UDP_INC_STATS_USER(sock_net(sk),
1405 +                               UDP_MIB_OUTDATAGRAMS, is_udplite);
1406 +       return err;
1407 +}
1408 +
1409 +int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1410 +               size_t len)
1411 +{
1412 +       struct inet_sock *inet = inet_sk(sk);
1413 +       struct udp_sock *up = udp_sk(sk);
1414 +       int ulen = len;
1415 +       struct ipcm_cookie ipc;
1416 +       struct rtable *rt = NULL;
1417 +       int free = 0;
1418 +       int connected = 0;
1419 +       __be32 daddr, faddr, saddr;
1420 +       __be16 dport;
1421 +       u8  tos;
1422 +       int err, is_udplite = IS_UDPLITE(sk);
1423 +       int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
1424 +       int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
1425 +
1426 +       if (len > 0xFFFF)
1427 +               return -EMSGSIZE;
1428 +
1429 +       /*
1430 +        *      Check the flags.
1431 +        */
1432 +
1433 +       if (msg->msg_flags&MSG_OOB)     /* Mirror BSD error message compatibility */
1434 +               return -EOPNOTSUPP;
1435 +
1436 +       ipc.opt = NULL;
1437 +       ipc.shtx.flags = 0;
1438 +
1439 +       if (up->pending) {
1440 +               /*
1441 +                * There are pending frames.
1442 +                * The socket lock must be held while it's corked.
1443 +                */
1444 +               lock_sock(sk);
1445 +               if (likely(up->pending)) {
1446 +                       if (unlikely(up->pending != AF_INET)) {
1447 +                               release_sock(sk);
1448 +                               return -EINVAL;
1449 +                       }
1450 +                       goto do_append_data;
1451 +               }
1452 +               release_sock(sk);
1453 +       }
1454 +       ulen += sizeof(struct udphdr);
1455 +
1456 +       /*
1457 +        *      Get and verify the address.
1458 +        */
1459 +       if (msg->msg_name) {
1460 +               struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
1461 +               if (msg->msg_namelen < sizeof(*usin))
1462 +                       return -EINVAL;
1463 +               if (usin->sin_family != AF_INET) {
1464 +                       if (usin->sin_family != AF_UNSPEC)
1465 +                               return -EAFNOSUPPORT;
1466 +               }
1467 +
1468 +               daddr = usin->sin_addr.s_addr;
1469 +               dport = usin->sin_port;
1470 +               if (dport == 0)
1471 +                       return -EINVAL;
1472 +       } else {
1473 +               if (sk->sk_state != TCP_ESTABLISHED)
1474 +                       return -EDESTADDRREQ;
1475 +               daddr = inet->daddr;
1476 +               dport = inet->dport;
1477 +               /* Open fast path for connected socket.
1478 +                  Route will not be used, if at least one option is set.
1479 +                */
1480 +               connected = 1;
1481 +       }
1482 +       ipc.addr = inet->saddr;
1483 +
1484 +       ipc.oif = sk->sk_bound_dev_if;
1485 +       err = sock_tx_timestamp(msg, sk, &ipc.shtx);
1486 +       if (err)
1487 +               return err;
1488 +       if (msg->msg_controllen) {
1489 +               err = ip_cmsg_send(sock_net(sk), msg, &ipc);
1490 +               if (err)
1491 +                       return err;
1492 +               if (ipc.opt)
1493 +                       free = 1;
1494 +               connected = 0;
1495 +       }
1496 +       if (!ipc.opt)
1497 +               ipc.opt = inet->opt;
1498 +
1499 +       saddr = ipc.addr;
1500 +       ipc.addr = faddr = daddr;
1501 +
1502 +       if (ipc.opt && ipc.opt->srr) {
1503 +               if (!daddr)
1504 +                       return -EINVAL;
1505 +               faddr = ipc.opt->faddr;
1506 +               connected = 0;
1507 +       }
1508 +       tos = RT_TOS(inet->tos);
1509 +       if (sock_flag(sk, SOCK_LOCALROUTE) ||
1510 +           (msg->msg_flags & MSG_DONTROUTE) ||
1511 +           (ipc.opt && ipc.opt->is_strictroute)) {
1512 +               tos |= RTO_ONLINK;
1513 +               connected = 0;
1514 +       }
1515 +
1516 +       if (ipv4_is_multicast(daddr)) {
1517 +               if (!ipc.oif)
1518 +                       ipc.oif = inet->mc_index;
1519 +               if (!saddr)
1520 +                       saddr = inet->mc_addr;
1521 +               connected = 0;
1522 +       }
1523 +
1524 +       if (connected)
1525 +               rt = (struct rtable*)sk_dst_check(sk, 0);
1526 +
1527 +       if (rt == NULL) {
1528 +               struct flowi fl = { .oif = ipc.oif,
1529 +                                   .nl_u = { .ip4_u =
1530 +                                             { .daddr = faddr,
1531 +                                               .saddr = saddr,
1532 +                                               .tos = tos } },
1533 +                                   .proto = sk->sk_protocol,
1534 +                                   .flags = inet_sk_flowi_flags(sk),
1535 +                                   .uli_u = { .ports =
1536 +                                              { .sport = inet->sport,
1537 +                                                .dport = dport } } };
1538 +               struct net *net = sock_net(sk);
1539 +
1540 +               security_sk_classify_flow(sk, &fl);
1541 +               err = ip_route_output_flow(net, &rt, &fl, sk, 1);
1542 +               if (err) {
1543 +                       if (err == -ENETUNREACH)
1544 +                               IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
1545 +                       goto out;
1546 +               }
1547 +
1548 +               err = -EACCES;
1549 +               if ((rt->rt_flags & RTCF_BROADCAST) &&
1550 +                   !sock_flag(sk, SOCK_BROADCAST))
1551 +                       goto out;
1552 +               if (connected)
1553 +                       sk_dst_set(sk, dst_clone(&rt->u.dst));
1554 +       }
1555 +
1556 +       if (msg->msg_flags&MSG_CONFIRM)
1557 +               goto do_confirm;
1558 +back_from_confirm:
1559 +
1560 +       saddr = rt->rt_src;
1561 +       if (!ipc.addr)
1562 +               daddr = ipc.addr = rt->rt_dst;
1563 +
1564 +       lock_sock(sk);
1565 +       if (unlikely(up->pending)) {
1566 +               /* The socket is already corked while preparing it. */
1567 +               /* ... which is an evident application bug. --ANK */
1568 +               release_sock(sk);
1569 +
1570 +               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
1571 +               err = -EINVAL;
1572 +               goto out;
1573 +       }
1574 +       /*
1575 +        *      Now cork the socket to pend data.
1576 +        */
1577 +       inet->cork.fl.fl4_dst = daddr;
1578 +       inet->cork.fl.fl_ip_dport = dport;
1579 +       inet->cork.fl.fl4_src = saddr;
1580 +       inet->cork.fl.fl_ip_sport = inet->sport;
1581 +       up->pending = AF_INET;
1582 +
1583 +do_append_data:
1584 +       up->len += ulen;
1585 +       getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
1586 +       err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
1587 +                       sizeof(struct udphdr), &ipc, &rt,
1588 +                       corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
1589 +       if (err)
1590 +               udp_flush_pending_frames(sk);
1591 +       else if (!corkreq)
1592 +               err = udp_push_pending_frames(sk);
1593 +       else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
1594 +               up->pending = 0;
1595 +       release_sock(sk);
1596 +
1597 +out:
1598 +       ip_rt_put(rt);
1599 +       if (free)
1600 +               kfree(ipc.opt);
1601 +       if (!err)
1602 +               return len;
1603 +       /*
1604 +        * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
1605 +        * ENOBUFS might not be good (it's not tunable per se), but otherwise
1606 +        * we don't have a good statistic (IpOutDiscards but it can be too many
1607 +        * things).  We could add another new stat but at least for now that
1608 +        * seems like overkill.
1609 +        */
1610 +       if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1611 +               UDP_INC_STATS_USER(sock_net(sk),
1612 +                               UDP_MIB_SNDBUFERRORS, is_udplite);
1613 +       }
1614 +       return err;
1615 +
1616 +do_confirm:
1617 +       dst_confirm(&rt->u.dst);
1618 +       if (!(msg->msg_flags&MSG_PROBE) || len)
1619 +               goto back_from_confirm;
1620 +       err = 0;
1621 +       goto out;
1622 +}
1623 +
1624 +int udp_sendpage(struct sock *sk, struct page *page, int offset,
1625 +                size_t size, int flags)
1626 +{
1627 +       struct udp_sock *up = udp_sk(sk);
1628 +       int ret;
1629 +
1630 +       if (!up->pending) {
1631 +               struct msghdr msg = {   .msg_flags = flags|MSG_MORE };
1632 +
1633 +               /* Call udp_sendmsg to specify destination address which
1634 +                * sendpage interface can't pass.
1635 +                * This will succeed only when the socket is connected.
1636 +                */
1637 +               ret = udp_sendmsg(NULL, sk, &msg, 0);
1638 +               if (ret < 0)
1639 +                       return ret;
1640 +       }
1641 +
1642 +       lock_sock(sk);
1643 +
1644 +       if (unlikely(!up->pending)) {
1645 +               release_sock(sk);
1646 +
1647 +               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
1648 +               return -EINVAL;
1649 +       }
1650 +
1651 +       ret = ip_append_page(sk, page, offset, size, flags);
1652 +       if (ret == -EOPNOTSUPP) {
1653 +               release_sock(sk);
1654 +               return sock_no_sendpage(sk->sk_socket, page, offset,
1655 +                                       size, flags);
1656 +       }
1657 +       if (ret < 0) {
1658 +               udp_flush_pending_frames(sk);
1659 +               goto out;
1660 +       }
1661 +
1662 +       up->len += size;
1663 +       if (!(up->corkflag || (flags&MSG_MORE)))
1664 +               ret = udp_push_pending_frames(sk);
1665 +       if (!ret)
1666 +               ret = size;
1667 +out:
1668 +       release_sock(sk);
1669 +       return ret;
1670 +}
1671 +
1672 +/*
1673 + *     IOCTL requests applicable to the UDP protocol
1674 + */
1675 +
1676 +int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
1677 +{
1678 +       switch (cmd) {
1679 +       case SIOCOUTQ:
1680 +       {
1681 +               int amount = atomic_read(&sk->sk_wmem_alloc);
1682 +               return put_user(amount, (int __user *)arg);
1683 +       }
1684 +
1685 +       case SIOCINQ:
1686 +       {
1687 +               struct sk_buff *skb;
1688 +               unsigned long amount;
1689 +
1690 +               amount = 0;
1691 +               spin_lock_bh(&sk->sk_receive_queue.lock);
1692 +               skb = skb_peek(&sk->sk_receive_queue);
1693 +               if (skb != NULL) {
1694 +                       /*
1695 +                        * We will only return the amount
1696 +                        * of this packet since that is all
1697 +                        * that will be read.
1698 +                        */
1699 +                       amount = skb->len - sizeof(struct udphdr);
1700 +               }
1701 +               spin_unlock_bh(&sk->sk_receive_queue.lock);
1702 +               return put_user(amount, (int __user *)arg);
1703 +       }
1704 +
1705 +       default:
1706 +               return -ENOIOCTLCMD;
1707 +       }
1708 +
1709 +       return 0;
1710 +}
1711 +
1712 +/*
1713 + *     This should be easy, if there is something there we
1714 + *     return it, otherwise we block.
1715 + */
1716 +
1717 +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1718 +               size_t len, int noblock, int flags, int *addr_len)
1719 +{
1720 +       struct inet_sock *inet = inet_sk(sk);
1721 +       struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
1722 +       struct sk_buff *skb;
1723 +       unsigned int ulen, copied;
1724 +       int peeked;
1725 +       int err;
1726 +       int is_udplite = IS_UDPLITE(sk);
1727 +
1728 +       /*
1729 +        *      Check any passed addresses
1730 +        */
1731 +       if (addr_len)
1732 +               *addr_len=sizeof(*sin);
1733 +
1734 +       if (flags & MSG_ERRQUEUE)
1735 +               return ip_recv_error(sk, msg, len);
1736 +
1737 +try_again:
1738 +       skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
1739 +                                 &peeked, &err);
1740 +       if (!skb)
1741 +               goto out;
1742 +
1743 +       ulen = skb->len - sizeof(struct udphdr);
1744 +       copied = len;
1745 +       if (copied > ulen)
1746 +               copied = ulen;
1747 +       else if (copied < ulen)
1748 +               msg->msg_flags |= MSG_TRUNC;
1749 +
1750 +       /*
1751 +        * If checksum is needed at all, try to do it while copying the
1752 +        * data.  If the data is truncated, or if we only want a partial
1753 +        * coverage checksum (UDP-Lite), do it before the copy.
1754 +        */
1755 +
1756 +       if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
1757 +               if (udp_lib_checksum_complete(skb))
1758 +                       goto csum_copy_err;
1759 +       }
1760 +
1761 +       if (skb_csum_unnecessary(skb))
1762 +               err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
1763 +                                             msg->msg_iov, copied       );
1764 +       else {
1765 +               err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
1766 +
1767 +               if (err == -EINVAL)
1768 +                       goto csum_copy_err;
1769 +       }
1770 +
1771 +       if (err)
1772 +               goto out_free;
1773 +
1774 +       if (!peeked)
1775 +               UDP_INC_STATS_USER(sock_net(sk),
1776 +                               UDP_MIB_INDATAGRAMS, is_udplite);
1777 +
1778 +       sock_recv_timestamp(msg, sk, skb);
1779 +
1780 +       /* Copy the address. */
1781 +       if (sin)
1782 +       {
1783 +               sin->sin_family = AF_INET;
1784 +               sin->sin_port = udp_hdr(skb)->source;
1785 +               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
1786 +               memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
1787 +       }
1788 +       if (inet->cmsg_flags)
1789 +               ip_cmsg_recv(msg, skb);
1790 +
1791 +       err = copied;
1792 +       if (flags & MSG_TRUNC)
1793 +               err = ulen;
1794 +
1795 +out_free:
1796 +       lock_sock(sk);
1797 +       skb_free_datagram(sk, skb);
1798 +       release_sock(sk);
1799 +out:
1800 +       return err;
1801 +
1802 +csum_copy_err:
1803 +       lock_sock(sk);
1804 +       if (!skb_kill_datagram(sk, skb, flags))
1805 +               UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1806 +       release_sock(sk);
1807 +
1808 +       if (noblock)
1809 +               return -EAGAIN;
1810 +       goto try_again;
1811 +}
1812 +
1813 +
1814 +int udp_disconnect(struct sock *sk, int flags)
1815 +{
1816 +       struct inet_sock *inet = inet_sk(sk);
1817 +       /*
1818 +        *      1003.1g - break association.
1819 +        */
1820 +
1821 +       sk->sk_state = TCP_CLOSE;
1822 +       inet->daddr = 0;
1823 +       inet->dport = 0;
1824 +       sk->sk_bound_dev_if = 0;
1825 +       if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1826 +               inet_reset_saddr(sk);
1827 +
1828 +       if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
1829 +               sk->sk_prot->unhash(sk);
1830 +               inet->sport = 0;
1831 +       }
1832 +       sk_dst_reset(sk);
1833 +       return 0;
1834 +}
1835 +
1836 +void udp_lib_unhash(struct sock *sk)
1837 +{
1838 +       if (sk_hashed(sk)) {
1839 +               struct udp_table *udptable = sk->sk_prot->h.udp_table;
1840 +               unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
1841 +               struct udp_hslot *hslot = &udptable->hash[hash];
1842 +
1843 +               spin_lock_bh(&hslot->lock);
1844 +               if (sk_nulls_del_node_init_rcu(sk)) {
1845 +                       inet_sk(sk)->num = 0;
1846 +                       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1847 +               }
1848 +               spin_unlock_bh(&hslot->lock);
1849 +       }
1850 +}
1851 +EXPORT_SYMBOL(udp_lib_unhash);
1852 +
1853 +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1854 +{
1855 +       int is_udplite = IS_UDPLITE(sk);
1856 +       int rc;
1857 +
1858 +       if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
1859 +               /* Note that an ENOMEM error is charged twice */
1860 +               if (rc == -ENOMEM) {
1861 +                       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1862 +                                        is_udplite);
1863 +                       atomic_inc(&sk->sk_drops);
1864 +               }
1865 +               goto drop;
1866 +       }
1867 +
1868 +       return 0;
1869 +
1870 +drop:
1871 +       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1872 +       kfree_skb(skb);
1873 +       return -1;
1874 +}
1875 +
1876 +/* returns:
1877 + *  -1: error
1878 + *   0: success
1879 + *  >0: "udp encap" protocol resubmission
1880 + *
1881 + * Note that in the success and error cases, the skb is assumed to
1882 + * have either been requeued or freed.
1883 + */
1884 +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1885 +{
1886 +       struct udp_sock *up = udp_sk(sk);
1887 +       int rc;
1888 +       int is_udplite = IS_UDPLITE(sk);
1889 +
1890 +       /*
1891 +        *      Charge it to the socket, dropping if the queue is full.
1892 +        */
1893 +       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1894 +               goto drop;
1895 +       nf_reset(skb);
1896 +
1897 +       if (up->encap_type) {
1898 +               /*
1899 +                * This is an encapsulation socket so pass the skb to
1900 +                * the socket's udp_encap_rcv() hook. Otherwise, just
1901 +                * fall through and pass this up the UDP socket.
1902 +                * up->encap_rcv() returns the following value:
1903 +                * =0 if skb was successfully passed to the encap
1904 +                *    handler or was discarded by it.
1905 +                * >0 if skb should be passed on to UDP.
1906 +                * <0 if skb should be resubmitted as proto -N
1907 +                */
1908 +
1909 +               /* if we're overly short, let UDP handle it */
1910 +               if (skb->len > sizeof(struct udphdr) &&
1911 +                   up->encap_rcv != NULL) {
1912 +                       int ret;
1913 +
1914 +                       ret = (*up->encap_rcv)(sk, skb);
1915 +                       if (ret <= 0) {
1916 +                               UDP_INC_STATS_BH(sock_net(sk),
1917 +                                                UDP_MIB_INDATAGRAMS,
1918 +                                                is_udplite);
1919 +                               return -ret;
1920 +                       }
1921 +               }
1922 +
1923 +               /* FALLTHROUGH -- it's a UDP Packet */
1924 +       }
1925 +
1926 +       /*
1927 +        *      UDP-Lite specific tests, ignored on UDP sockets
1928 +        */
1929 +       if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
1930 +
1931 +               /*
1932 +                * MIB statistics other than incrementing the error count are
1933 +                * disabled for the following two types of errors: these depend
1934 +                * on the application settings, not on the functioning of the
1935 +                * protocol stack as such.
1936 +                *
1937 +                * RFC 3828 here recommends (sec 3.3): "There should also be a
1938 +                * way ... to ... at least let the receiving application block
1939 +                * delivery of packets with coverage values less than a value
1940 +                * provided by the application."
1941 +                */
1942 +               if (up->pcrlen == 0) {          /* full coverage was set  */
1943 +                       LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1944 +                               "%d while full coverage %d requested\n",
1945 +                               UDP_SKB_CB(skb)->cscov, skb->len);
1946 +                       goto drop;
1947 +               }
1948 +               /* The next case involves violating the min. coverage requested
1949 +                * by the receiver. This is subtle: if receiver wants x and x is
1950 +                * greater than the buffersize/MTU then receiver will complain
1951 +                * that it wants x while sender emits packets of smaller size y.
1952 +                * Therefore the above ...()->partial_cov statement is essential.
1953 +                */
1954 +               if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
1955 +                       LIMIT_NETDEBUG(KERN_WARNING
1956 +                               "UDPLITE: coverage %d too small, need min %d\n",
1957 +                               UDP_SKB_CB(skb)->cscov, up->pcrlen);
1958 +                       goto drop;
1959 +               }
1960 +       }
1961 +
1962 +       if (sk->sk_filter) {
1963 +               if (udp_lib_checksum_complete(skb))
1964 +                       goto drop;
1965 +       }
1966 +
1967 +       rc = 0;
1968 +
1969 +       bh_lock_sock(sk);
1970 +       if (!sock_owned_by_user(sk))
1971 +               rc = __udp_queue_rcv_skb(sk, skb);
1972 +       else
1973 +               sk_add_backlog(sk, skb);
1974 +       bh_unlock_sock(sk);
1975 +
1976 +       return rc;
1977 +
1978 +drop:
1979 +       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1980 +       kfree_skb(skb);
1981 +       return -1;
1982 +}
1983 +
1984 +/*
1985 + *     Multicasts and broadcasts go to each listener.
1986 + *
1987 + *     Note: called only from the BH handler context,
1988 + *     so we don't need to lock the hashes.
1989 + */
1990 +static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1991 +                                   struct udphdr  *uh,
1992 +                                   __be32 saddr, __be32 daddr,
1993 +                                   struct udp_table *udptable)
1994 +{
1995 +       struct sock *sk;
1996 +       struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
1997 +       int dif;
1998 +
1999 +       spin_lock(&hslot->lock);
2000 +       sk = sk_nulls_head(&hslot->head);
2001 +       dif = skb->dev->ifindex;
2002 +       sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
2003 +       if (sk) {
2004 +               struct sock *sknext = NULL;
2005 +
2006 +               do {
2007 +                       struct sk_buff *skb1 = skb;
2008 +
2009 +                       sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
2010 +                                                  daddr, uh->source, saddr,
2011 +                                                  dif);
2012 +                       if (sknext)
2013 +                               skb1 = skb_clone(skb, GFP_ATOMIC);
2014 +
2015 +                       if (skb1) {
2016 +                               int ret = udp_queue_rcv_skb(sk, skb1);
2017 +                               if (ret > 0)
2018 +                                       /* we should probably re-process instead
2019 +                                        * of dropping packets here. */
2020 +                                       kfree_skb(skb1);
2021 +                       }
2022 +                       sk = sknext;
2023 +               } while (sknext);
2024 +       } else
2025 +               consume_skb(skb);
2026 +       spin_unlock(&hslot->lock);
2027 +       return 0;
2028 +}
2029 +
2030 +/* Initialize UDP checksum. If exited with zero value (success),
2031 + * CHECKSUM_UNNECESSARY means, that no more checks are required.
2032 + * Otherwise, csum completion requires chacksumming packet body,
2033 + * including udp header and folding it to skb->csum.
2034 + */
2035 +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
2036 +                                int proto)
2037 +{
2038 +       const struct iphdr *iph;
2039 +       int err;
2040 +
2041 +       UDP_SKB_CB(skb)->partial_cov = 0;
2042 +       UDP_SKB_CB(skb)->cscov = skb->len;
2043 +
2044 +       if (proto == IPPROTO_UDPLITE) {
2045 +               err = udplite_checksum_init(skb, uh);
2046 +               if (err)
2047 +                       return err;
2048 +       }
2049 +
2050 +       iph = ip_hdr(skb);
2051 +       if (uh->check == 0) {
2052 +               skb->ip_summed = CHECKSUM_UNNECESSARY;
2053 +       } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
2054 +              if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
2055 +                                     proto, skb->csum))
2056 +                       skb->ip_summed = CHECKSUM_UNNECESSARY;
2057 +       }
2058 +       if (!skb_csum_unnecessary(skb))
2059 +               skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2060 +                                              skb->len, proto, 0);
2061 +       /* Probably, we should checksum udp header (it should be in cache
2062 +        * in any case) and data in tiny packets (< rx copybreak).
2063 +        */
2064 +
2065 +       return 0;
2066 +}
2067 +
2068 +/*
2069 + *     All we need to do is get the socket, and then do a checksum.
2070 + */
2071 +
2072 +int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
2073 +                  int proto)
2074 +{
2075 +       struct sock *sk;
2076 +       struct udphdr *uh;
2077 +       unsigned short ulen;
2078 +       struct rtable *rt = (struct rtable*)skb->dst;
2079 +       __be32 saddr, daddr;
2080 +       struct net *net = dev_net(skb->dev);
2081 +
2082 +       /*
2083 +        *  Validate the packet.
2084 +        */
2085 +       if (!pskb_may_pull(skb, sizeof(struct udphdr)))
2086 +               goto drop;              /* No space for header. */
2087 +
2088 +       uh   = udp_hdr(skb);
2089 +       ulen = ntohs(uh->len);
2090 +       if (ulen > skb->len)
2091 +               goto short_packet;
2092 +
2093 +       if (proto == IPPROTO_UDP) {
2094 +               /* UDP validates ulen. */
2095 +               if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
2096 +                       goto short_packet;
2097 +               uh = udp_hdr(skb);
2098 +       }
2099 +
2100 +       if (udp4_csum_init(skb, uh, proto))
2101 +               goto csum_error;
2102 +
2103 +       saddr = ip_hdr(skb)->saddr;
2104 +       daddr = ip_hdr(skb)->daddr;
2105 +
2106 +       if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
2107 +               return __udp4_lib_mcast_deliver(net, skb, uh,
2108 +                               saddr, daddr, udptable);
2109 +
2110 +       sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
2111 +
2112 +       if (sk != NULL) {
2113 +               int ret = udp_queue_rcv_skb(sk, skb);
2114 +               sock_put(sk);
2115 +
2116 +               /* a return value > 0 means to resubmit the input, but
2117 +                * it wants the return to be -protocol, or 0
2118 +                */
2119 +               if (ret > 0)
2120 +                       return -ret;
2121 +               return 0;
2122 +       }
2123 +
2124 +       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2125 +               goto drop;
2126 +       nf_reset(skb);
2127 +
2128 +       /* No socket. Drop packet silently, if checksum is wrong */
2129 +       if (udp_lib_checksum_complete(skb))
2130 +               goto csum_error;
2131 +
2132 +       UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
2133 +       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
2134 +
2135 +       /*
2136 +        * Hmm.  We got an UDP packet to a port to which we
2137 +        * don't wanna listen.  Ignore it.
2138 +        */
2139 +       kfree_skb(skb);
2140 +       return 0;
2141 +
2142 +short_packet:
2143 +       LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
2144 +                      proto == IPPROTO_UDPLITE ? "-Lite" : "",
2145 +                      &saddr,
2146 +                      ntohs(uh->source),
2147 +                      ulen,
2148 +                      skb->len,
2149 +                      &daddr,
2150 +                      ntohs(uh->dest));
2151 +       goto drop;
2152 +
2153 +csum_error:
2154 +       /*
2155 +        * RFC1122: OK.  Discards the bad packet silently (as far as
2156 +        * the network is concerned, anyway) as per 4.1.3.4 (MUST).
2157 +        */
2158 +       LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
2159 +                      proto == IPPROTO_UDPLITE ? "-Lite" : "",
2160 +                      &saddr,
2161 +                      ntohs(uh->source),
2162 +                      &daddr,
2163 +                      ntohs(uh->dest),
2164 +                      ulen);
2165 +drop:
2166 +       UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
2167 +       kfree_skb(skb);
2168 +       return 0;
2169 +}
2170 +
2171 +int udp_rcv(struct sk_buff *skb)
2172 +{
2173 +       return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
2174 +}
2175 +
2176 +void udp_destroy_sock(struct sock *sk)
2177 +{
2178 +       lock_sock(sk);
2179 +       udp_flush_pending_frames(sk);
2180 +       release_sock(sk);
2181 +}
2182 +
2183 +/*
2184 + *     Socket option code for UDP
2185 + */
2186 +int udp_lib_setsockopt(struct sock *sk, int level, int optname,
2187 +                      char __user *optval, int optlen,
2188 +                      int (*push_pending_frames)(struct sock *))
2189 +{
2190 +       struct udp_sock *up = udp_sk(sk);
2191 +       int val;
2192 +       int err = 0;
2193 +       int is_udplite = IS_UDPLITE(sk);
2194 +
2195 +       if (optlen<sizeof(int))
2196 +               return -EINVAL;
2197 +
2198 +       if (get_user(val, (int __user *)optval))
2199 +               return -EFAULT;
2200 +
2201 +       switch (optname) {
2202 +       case UDP_CORK:
2203 +               if (val != 0) {
2204 +                       up->corkflag = 1;
2205 +               } else {
2206 +                       up->corkflag = 0;
2207 +                       lock_sock(sk);
2208 +                       (*push_pending_frames)(sk);
2209 +                       release_sock(sk);
2210 +               }
2211 +               break;
2212 +
2213 +       case UDP_ENCAP:
2214 +               switch (val) {
2215 +               case 0:
2216 +               case UDP_ENCAP_ESPINUDP:
2217 +               case UDP_ENCAP_ESPINUDP_NON_IKE:
2218 +                       up->encap_rcv = xfrm4_udp_encap_rcv;
2219 +                       /* FALLTHROUGH */
2220 +               case UDP_ENCAP_L2TPINUDP:
2221 +                       up->encap_type = val;
2222 +                       break;
2223 +               default:
2224 +                       err = -ENOPROTOOPT;
2225 +                       break;
2226 +               }
2227 +               break;
2228 +
2229 +       /*
2230 +        *      UDP-Lite's partial checksum coverage (RFC 3828).
2231 +        */
2232 +       /* The sender sets actual checksum coverage length via this option.
2233 +        * The case coverage > packet length is handled by send module. */
2234 +       case UDPLITE_SEND_CSCOV:
2235 +               if (!is_udplite)         /* Disable the option on UDP sockets */
2236 +                       return -ENOPROTOOPT;
2237 +               if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
2238 +                       val = 8;
2239 +               else if (val > USHORT_MAX)
2240 +                       val = USHORT_MAX;
2241 +               up->pcslen = val;
2242 +               up->pcflag |= UDPLITE_SEND_CC;
2243 +               break;
2244 +
2245 +       /* The receiver specifies a minimum checksum coverage value. To make
2246 +        * sense, this should be set to at least 8 (as done below). If zero is
2247 +        * used, this again means full checksum coverage.                     */
2248 +       case UDPLITE_RECV_CSCOV:
2249 +               if (!is_udplite)         /* Disable the option on UDP sockets */
2250 +                       return -ENOPROTOOPT;
2251 +               if (val != 0 && val < 8) /* Avoid silly minimal values.       */
2252 +                       val = 8;
2253 +               else if (val > USHORT_MAX)
2254 +                       val = USHORT_MAX;
2255 +               up->pcrlen = val;
2256 +               up->pcflag |= UDPLITE_RECV_CC;
2257 +               break;
2258 +
2259 +       default:
2260 +               err = -ENOPROTOOPT;
2261 +               break;
2262 +       }
2263 +
2264 +       return err;
2265 +}
2266 +
2267 +int udp_setsockopt(struct sock *sk, int level, int optname,
2268 +                  char __user *optval, int optlen)
2269 +{
2270 +       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
2271 +               return udp_lib_setsockopt(sk, level, optname, optval, optlen,
2272 +                                         udp_push_pending_frames);
2273 +       return ip_setsockopt(sk, level, optname, optval, optlen);
2274 +}
2275 +
2276 +#ifdef CONFIG_COMPAT
2277 +int compat_udp_setsockopt(struct sock *sk, int level, int optname,
2278 +                         char __user *optval, int optlen)
2279 +{
2280 +       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
2281 +               return udp_lib_setsockopt(sk, level, optname, optval, optlen,
2282 +                                         udp_push_pending_frames);
2283 +       return compat_ip_setsockopt(sk, level, optname, optval, optlen);
2284 +}
2285 +#endif
2286 +
2287 +int udp_lib_getsockopt(struct sock *sk, int level, int optname,
2288 +                      char __user *optval, int __user *optlen)
2289 +{
2290 +       struct udp_sock *up = udp_sk(sk);
2291 +       int val, len;
2292 +
2293 +       if (get_user(len,optlen))
2294 +               return -EFAULT;
2295 +
2296 +       len = min_t(unsigned int, len, sizeof(int));
2297 +
2298 +       if (len < 0)
2299 +               return -EINVAL;
2300 +
2301 +       switch (optname) {
2302 +       case UDP_CORK:
2303 +               val = up->corkflag;
2304 +               break;
2305 +
2306 +       case UDP_ENCAP:
2307 +               val = up->encap_type;
2308 +               break;
2309 +
2310 +       /* The following two cannot be changed on UDP sockets, the return is
2311 +        * always 0 (which corresponds to the full checksum coverage of UDP). */
2312 +       case UDPLITE_SEND_CSCOV:
2313 +               val = up->pcslen;
2314 +               break;
2315 +
2316 +       case UDPLITE_RECV_CSCOV:
2317 +               val = up->pcrlen;
2318 +               break;
2319 +
2320 +       default:
2321 +               return -ENOPROTOOPT;
2322 +       }
2323 +
2324 +       if (put_user(len, optlen))
2325 +               return -EFAULT;
2326 +       if (copy_to_user(optval, &val,len))
2327 +               return -EFAULT;
2328 +       return 0;
2329 +}
2330 +
2331 +int udp_getsockopt(struct sock *sk, int level, int optname,
2332 +                  char __user *optval, int __user *optlen)
2333 +{
2334 +       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
2335 +               return udp_lib_getsockopt(sk, level, optname, optval, optlen);
2336 +       return ip_getsockopt(sk, level, optname, optval, optlen);
2337 +}
2338 +
2339 +#ifdef CONFIG_COMPAT
2340 +int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2341 +                                char __user *optval, int __user *optlen)
2342 +{
2343 +       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
2344 +               return udp_lib_getsockopt(sk, level, optname, optval, optlen);
2345 +       return compat_ip_getsockopt(sk, level, optname, optval, optlen);
2346 +}
2347 +#endif
2348 +/**
2349 + *     udp_poll - wait for a UDP event.
2350 + *     @file - file struct
2351 + *     @sock - socket
2352 + *     @wait - poll table
2353 + *
2354 + *     This is same as datagram poll, except for the special case of
2355 + *     blocking sockets. If application is using a blocking fd
2356 + *     and a packet with checksum error is in the queue;
2357 + *     then it could get return from select indicating data available
2358 + *     but then block when reading it. Add special case code
2359 + *     to work around these arguably broken applications.
2360 + */
2361 +unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
2362 +{
2363 +       unsigned int mask = datagram_poll(file, sock, wait);
2364 +       struct sock *sk = sock->sk;
2365 +       int     is_lite = IS_UDPLITE(sk);
2366 +
2367 +       /* Check for false positives due to checksum errors */
2368 +       if ( (mask & POLLRDNORM) &&
2369 +            !(file->f_flags & O_NONBLOCK) &&
2370 +            !(sk->sk_shutdown & RCV_SHUTDOWN)){
2371 +               struct sk_buff_head *rcvq = &sk->sk_receive_queue;
2372 +               struct sk_buff *skb;
2373 +
2374 +               spin_lock_bh(&rcvq->lock);
2375 +               while ((skb = skb_peek(rcvq)) != NULL &&
2376 +                      udp_lib_checksum_complete(skb)) {
2377 +                       UDP_INC_STATS_BH(sock_net(sk),
2378 +                                       UDP_MIB_INERRORS, is_lite);
2379 +                       __skb_unlink(skb, rcvq);
2380 +                       kfree_skb(skb);
2381 +               }
2382 +               spin_unlock_bh(&rcvq->lock);
2383 +
2384 +               /* nothing to see, move along */
2385 +               if (skb == NULL)
2386 +                       mask &= ~(POLLIN | POLLRDNORM);
2387 +       }
2388 +
2389 +       return mask;
2390 +
2391 +}
2392 +
2393 +struct proto udp_prot = {
2394 +       .name              = "UDP",
2395 +       .owner             = THIS_MODULE,
2396 +       .close             = udp_lib_close,
2397 +       .connect           = ip4_datagram_connect,
2398 +       .disconnect        = udp_disconnect,
2399 +       .ioctl             = udp_ioctl,
2400 +       .destroy           = udp_destroy_sock,
2401 +       .setsockopt        = udp_setsockopt,
2402 +       .getsockopt        = udp_getsockopt,
2403 +       .sendmsg           = udp_sendmsg,
2404 +       .recvmsg           = udp_recvmsg,
2405 +       .sendpage          = udp_sendpage,
2406 +       .backlog_rcv       = __udp_queue_rcv_skb,
2407 +       .hash              = udp_lib_hash,
2408 +       .unhash            = udp_lib_unhash,
2409 +       .get_port          = udp_v4_get_port,
2410 +       .memory_allocated  = &udp_memory_allocated,
2411 +       .sysctl_mem        = sysctl_udp_mem,
2412 +       .sysctl_wmem       = &sysctl_udp_wmem_min,
2413 +       .sysctl_rmem       = &sysctl_udp_rmem_min,
2414 +       .obj_size          = sizeof(struct udp_sock),
2415 +       .slab_flags        = SLAB_DESTROY_BY_RCU,
2416 +       .h.udp_table       = &udp_table,
2417 +#ifdef CONFIG_COMPAT
2418 +       .compat_setsockopt = compat_udp_setsockopt,
2419 +       .compat_getsockopt = compat_udp_getsockopt,
2420 +#endif
2421 +};
2422 +
2423 +/* ------------------------------------------------------------------------ */
2424 +#ifdef CONFIG_PROC_FS
2425 +
2426 +static struct sock *udp_get_first(struct seq_file *seq, int start)
2427 +{
2428 +       struct sock *sk;
2429 +       struct udp_iter_state *state = seq->private;
2430 +       struct net *net = seq_file_net(seq);
2431 +
2432 +       for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
2433 +               struct hlist_nulls_node *node;
2434 +               struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
2435 +               spin_lock_bh(&hslot->lock);
2436 +               sk_nulls_for_each(sk, node, &hslot->head) {
2437 +                       if (!net_eq(sock_net(sk), net))
2438 +                               continue;
2439 +                       if (sk->sk_family == state->family)
2440 +                               goto found;
2441 +               }
2442 +               spin_unlock_bh(&hslot->lock);
2443 +       }
2444 +       sk = NULL;
2445 +found:
2446 +       return sk;
2447 +}
2448 +
2449 +static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
2450 +{
2451 +       struct udp_iter_state *state = seq->private;
2452 +       struct net *net = seq_file_net(seq);
2453 +
2454 +       do {
2455 +               sk = sk_nulls_next(sk);
2456 +       } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
2457 +
2458 +       if (!sk) {
2459 +               if (state->bucket < UDP_HTABLE_SIZE)
2460 +                       spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
2461 +               return udp_get_first(seq, state->bucket + 1);
2462 +       }
2463 +       return sk;
2464 +}
2465 +
2466 +static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
2467 +{
2468 +       struct sock *sk = udp_get_first(seq, 0);
2469 +
2470 +       if (sk)
2471 +               while (pos && (sk = udp_get_next(seq, sk)) != NULL)
2472 +                       --pos;
2473 +       return pos ? NULL : sk;
2474 +}
2475 +
2476 +static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
2477 +{
2478 +       struct udp_iter_state *state = seq->private;
2479 +       state->bucket = UDP_HTABLE_SIZE;
2480 +
2481 +       return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
2482 +}
2483 +
2484 +static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2485 +{
2486 +       struct sock *sk;
2487 +
2488 +       if (v == SEQ_START_TOKEN)
2489 +               sk = udp_get_idx(seq, 0);
2490 +       else
2491 +               sk = udp_get_next(seq, v);
2492 +
2493 +       ++*pos;
2494 +       return sk;
2495 +}
2496 +
2497 +static void udp_seq_stop(struct seq_file *seq, void *v)
2498 +{
2499 +       struct udp_iter_state *state = seq->private;
2500 +
2501 +       if (state->bucket < UDP_HTABLE_SIZE)
2502 +               spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
2503 +}
2504 +
2505 +static int udp_seq_open(struct inode *inode, struct file *file)
2506 +{
2507 +       struct udp_seq_afinfo *afinfo = PDE(inode)->data;
2508 +       struct udp_iter_state *s;
2509 +       int err;
2510 +
2511 +       err = seq_open_net(inode, file, &afinfo->seq_ops,
2512 +                          sizeof(struct udp_iter_state));
2513 +       if (err < 0)
2514 +               return err;
2515 +
2516 +       s = ((struct seq_file *)file->private_data)->private;
2517 +       s->family               = afinfo->family;
2518 +       s->udp_table            = afinfo->udp_table;
2519 +       return err;
2520 +}
2521 +
2522 +/* ------------------------------------------------------------------------ */
2523 +int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
2524 +{
2525 +       struct proc_dir_entry *p;
2526 +       int rc = 0;
2527 +
2528 +       afinfo->seq_fops.open           = udp_seq_open;
2529 +       afinfo->seq_fops.read           = seq_read;
2530 +       afinfo->seq_fops.llseek         = seq_lseek;
2531 +       afinfo->seq_fops.release        = seq_release_net;
2532 +
2533 +       afinfo->seq_ops.start           = udp_seq_start;
2534 +       afinfo->seq_ops.next            = udp_seq_next;
2535 +       afinfo->seq_ops.stop            = udp_seq_stop;
2536 +
2537 +       p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2538 +                            &afinfo->seq_fops, afinfo);
2539 +       if (!p)
2540 +               rc = -ENOMEM;
2541 +       return rc;
2542 +}
2543 +
2544 +void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
2545 +{
2546 +       proc_net_remove(net, afinfo->name);
2547 +}
2548 +
2549 +/* ------------------------------------------------------------------------ */
2550 +static void udp4_format_sock(struct sock *sp, struct seq_file *f,
2551 +               int bucket, int *len)
2552 +{
2553 +       struct inet_sock *inet = inet_sk(sp);
2554 +       __be32 dest = inet->daddr;
2555 +       __be32 src  = inet->rcv_saddr;
2556 +       __u16 destp       = ntohs(inet->dport);
2557 +       __u16 srcp        = ntohs(inet->sport);
2558 +
2559 +       seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2560 +               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
2561 +               bucket, src, srcp, dest, destp, sp->sk_state,
2562 +               atomic_read(&sp->sk_wmem_alloc),
2563 +               atomic_read(&sp->sk_rmem_alloc),
2564 +               0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
2565 +               atomic_read(&sp->sk_refcnt), sp,
2566 +               atomic_read(&sp->sk_drops), len);
2567 +}
2568 +
2569 +int udp4_seq_show(struct seq_file *seq, void *v)
2570 +{
2571 +       if (v == SEQ_START_TOKEN)
2572 +               seq_printf(seq, "%-127s\n",
2573 +                          "  sl  local_address rem_address   st tx_queue "
2574 +                          "rx_queue tr tm->when retrnsmt   uid  timeout "
2575 +                          "inode ref pointer drops");
2576 +       else {
2577 +               struct udp_iter_state *state = seq->private;
2578 +               int len;
2579 +
2580 +               udp4_format_sock(v, seq, state->bucket, &len);
2581 +               seq_printf(seq, "%*s\n", 127 - len ,"");
2582 +       }
2583 +       return 0;
2584 +}
2585 +
2586 +/* ------------------------------------------------------------------------ */
2587 +static struct udp_seq_afinfo udp4_seq_afinfo = {
2588 +       .name           = "udp",
2589 +       .family         = AF_INET,
2590 +       .udp_table      = &udp_table,
2591 +       .seq_fops       = {
2592 +               .owner  =       THIS_MODULE,
2593 +       },
2594 +       .seq_ops        = {
2595 +               .show           = udp4_seq_show,
2596 +       },
2597 +};
2598 +
2599 +static int udp4_proc_init_net(struct net *net)
2600 +{
2601 +       return udp_proc_register(net, &udp4_seq_afinfo);
2602 +}
2603 +
2604 +static void udp4_proc_exit_net(struct net *net)
2605 +{
2606 +       udp_proc_unregister(net, &udp4_seq_afinfo);
2607 +}
2608 +
2609 +static struct pernet_operations udp4_net_ops = {
2610 +       .init = udp4_proc_init_net,
2611 +       .exit = udp4_proc_exit_net,
2612 +};
2613 +
2614 +int __init udp4_proc_init(void)
2615 +{
2616 +       return register_pernet_subsys(&udp4_net_ops);
2617 +}
2618 +
2619 +void udp4_proc_exit(void)
2620 +{
2621 +       unregister_pernet_subsys(&udp4_net_ops);
2622 +}
2623 +#endif /* CONFIG_PROC_FS */
2624 +
2625 +void __init udp_table_init(struct udp_table *table)
2626 +{
2627 +       int i;
2628 +
2629 +       for (i = 0; i < UDP_HTABLE_SIZE; i++) {
2630 +               INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
2631 +               spin_lock_init(&table->hash[i].lock);
2632 +       }
2633 +}
2634 +
2635 +void __init udp_init(void)
2636 +{
2637 +       unsigned long nr_pages, limit;
2638 +
2639 +       udp_table_init(&udp_table);
2640 +       /* Set the pressure threshold up by the same strategy of TCP. It is a
2641 +        * fraction of global memory that is up to 1/2 at 256 MB, decreasing
2642 +        * toward zero with the amount of memory, with a floor of 128 pages.
2643 +        */
2644 +       nr_pages = totalram_pages - totalhigh_pages;
2645 +       limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2646 +       limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2647 +       limit = max(limit, 128UL);
2648 +       sysctl_udp_mem[0] = limit / 4 * 3;
2649 +       sysctl_udp_mem[1] = limit;
2650 +       sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
2651 +
2652 +       sysctl_udp_rmem_min = SK_MEM_QUANTUM;
2653 +       sysctl_udp_wmem_min = SK_MEM_QUANTUM;
2654 +}
2655 +
2656 +EXPORT_SYMBOL(udp_disconnect);
2657 +EXPORT_SYMBOL(udp_ioctl);
2658 +EXPORT_SYMBOL(udp_prot);
2659 +EXPORT_SYMBOL(udp_sendmsg);
2660 +EXPORT_SYMBOL(udp_lib_getsockopt);
2661 +EXPORT_SYMBOL(udp_lib_setsockopt);
2662 +EXPORT_SYMBOL(udp_poll);
2663 +EXPORT_SYMBOL(udp_lib_get_port);
2664 +
2665 +#ifdef CONFIG_PROC_FS
2666 +EXPORT_SYMBOL(udp_proc_register);
2667 +EXPORT_SYMBOL(udp_proc_unregister);
2668 +#endif