net/core/netfilter.c

   1 /* netfilter.c: look after the filters for various protocols.
   2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
   3  *
   4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
   5  * way.
   6  *
   7  * Rusty Russell (C)2000 -- This code is GPL.
   8  *
   9  * February 2000: Modified by James Morris to have 1 queue per protocol.
  10  * 15-Mar-2000:   Added NF_REPEAT --RR.
  11  */
  12 #include <linux/config.h>
  13 #include <linux/netfilter.h>
  14 #include <net/protocol.h>
  15 #include <linux/init.h>
  16 #include <linux/skbuff.h>
  17 #include <linux/wait.h>
  18 #include <linux/module.h>
  19 #include <linux/interrupt.h>
  20 #include <linux/if.h>
  21 #include <linux/netdevice.h>
  22 #include <linux/brlock.h>
  23
  24 #define __KERNEL_SYSCALLS__
  25 #include <linux/unistd.h>
  26
  27 /* In this code, we can be waiting indefinitely for userspace to
  28  * service a packet if a hook returns NF_QUEUE.  We could keep a count
  29  * of skbuffs queued for userspace, and not deregister a hook unless
  30  * this is zero, but that sucks.  Now, we simply check when the
  31  * packets come back: if the hook is gone, the packet is discarded. */
  32 #ifdef CONFIG_NETFILTER_DEBUG
  33 #define NFDEBUG(format, args...)  printk(format , ## args)
  34 #else
  35 #define NFDEBUG(format, args...)
  36 #endif
  37
  38 /* Sockopts only registered and called from user context, so
  39    BR_NETPROTO_LOCK would be overkill.  Also, [gs]etsockopt calls may
  40    sleep. */
  41 static DECLARE_MUTEX(nf_sockopt_mutex);
  42
  43 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
  44 static LIST_HEAD(nf_sockopts);
  45
  46 /*
  47  * A queue handler may be registered for each protocol.  Each is protected by
  48  * long term mutex.  The handler must provide an an outfn() to accept packets
  49  * for queueing and must reinject all packets it receives, no matter what.
  50  */
  51 static struct nf_queue_handler_t {
  52         nf_queue_outfn_t outfn;
  53         void *data;
  54 } queue_handler[NPROTO];
  55
  56 int nf_register_hook(struct nf_hook_ops *reg)
  57 {
  58         struct list_head *i;
  59
  60         br_write_lock_bh(BR_NETPROTO_LOCK);
  61         for (i = nf_hooks[reg->pf][reg->hooknum].next;
  62              i != &nf_hooks[reg->pf][reg->hooknum];
  63              i = i->next) {
  64                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
  65                         break;
  66         }
  67         list_add(&reg->list, i->prev);
  68         br_write_unlock_bh(BR_NETPROTO_LOCK);
  69         return 0;
  70 }
  71
  72 void nf_unregister_hook(struct nf_hook_ops *reg)
  73 {
  74         br_write_lock_bh(BR_NETPROTO_LOCK);
  75         list_del(&reg->list);
  76         br_write_unlock_bh(BR_NETPROTO_LOCK);
  77 }
  78
  79 /* Do exclusive ranges overlap? */
  80 static inline int overlap(int min1, int max1, int min2, int max2)
  81 {
  82         return (min1 >= min2 && min1 < max2)
  83                 || (max1 > min2 && max1 <= max2);
  84 }
  85
  86 /* Functions to register sockopt ranges (exclusive). */
  87 int nf_register_sockopt(struct nf_sockopt_ops *reg)
  88 {
  89         struct list_head *i;
  90         int ret = 0;
  91
  92         if (down_interruptible(&nf_sockopt_mutex) != 0)
  93                 return -EINTR;
  94
  95         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
  96                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
  97                 if (ops->pf == reg->pf
  98                     && (overlap(ops->set_optmin, ops->set_optmax,
  99                                 reg->set_optmin, reg->set_optmax)
 100                         || overlap(ops->get_optmin, ops->get_optmax,
 101                                    reg->get_optmin, reg->get_optmax))) {
 102                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
 103                                 ops->set_optmin, ops->set_optmax,
 104                                 ops->get_optmin, ops->get_optmax,
 105                                 reg->set_optmin, reg->set_optmax,
 106                                 reg->get_optmin, reg->get_optmax);
 107                         ret = -EBUSY;
 108                         goto out;
 109                 }
 110         }
 111
 112         list_add(&reg->list, &nf_sockopts);
 113 out:
 114         up(&nf_sockopt_mutex);
 115         return ret;
 116 }
 117
 118 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
 119 {
 120         /* No point being interruptible: we're probably in cleanup_module() */
 121  restart:
 122         down(&nf_sockopt_mutex);
 123         if (reg->use != 0) {
 124                 /* To be woken by nf_sockopt call... */
 125                 reg->cleanup_task = current;
 126                 up(&nf_sockopt_mutex);
 127                 set_current_state(TASK_UNINTERRUPTIBLE);
 128                 schedule();
 129                 goto restart;
 130         }
 131         list_del(&reg->list);
 132         up(&nf_sockopt_mutex);
 133 }
 134
 135 #ifdef CONFIG_NETFILTER_DEBUG
 136 #include <net/ip.h>
 137 #include <net/route.h>
 138 #include <net/tcp.h>
 139 #include <linux/netfilter_ipv4.h>
 140
 141 static void debug_print_hooks_ip(unsigned int nf_debug)
 142 {
 143         if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
 144                 printk("PRE_ROUTING ");
 145                 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
 146         }
 147         if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
 148                 printk("LOCAL_IN ");
 149                 nf_debug ^= (1 << NF_IP_LOCAL_IN);
 150         }
 151         if (nf_debug & (1 << NF_IP_FORWARD)) {
 152                 printk("FORWARD ");
 153                 nf_debug ^= (1 << NF_IP_FORWARD);
 154         }
 155         if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
 156                 printk("LOCAL_OUT ");
 157                 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
 158         }
 159         if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
 160                 printk("POST_ROUTING ");
 161                 nf_debug ^= (1 << NF_IP_POST_ROUTING);
 162         }
 163         if (nf_debug)
 164                 printk("Crap bits: 0x%04X", nf_debug);
 165         printk("\n");
 166 }
 167
 168 void nf_dump_skb(int pf, struct sk_buff *skb)
 169 {
 170         printk("skb: pf=%i %s dev=%s len=%u\n",
 171                pf,
 172                skb->sk ? "(owned)" : "(unowned)",
 173                skb->dev ? skb->dev->name : "(no dev)",
 174                skb->len);
 175         switch (pf) {
 176         case PF_INET: {
 177                 const struct iphdr *ip = skb->nh.iph;
 178                 __u32 *opt = (__u32 *) (ip + 1);
 179                 int opti;
 180                 __u16 src_port = 0, dst_port = 0;
 181
 182                 if (ip->protocol == IPPROTO_TCP
 183                     || ip->protocol == IPPROTO_UDP) {
 184                         struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
 185                         src_port = ntohs(tcp->source);
 186                         dst_port = ntohs(tcp->dest);
 187                 }
 188
 189                 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
 190                        " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
 191                        ip->protocol, NIPQUAD(ip->saddr),
 192                        src_port, NIPQUAD(ip->daddr),
 193                        dst_port,
 194                        ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
 195                        ntohs(ip->frag_off), ip->ttl);
 196
 197                 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
 198                         printk(" O=0x%8.8X", *opt++);
 199                 printk("\n");
 200         }
 201         }
 202 }
 203
 204 void nf_debug_ip_local_deliver(struct sk_buff *skb)
 205 {
 206         /* If it's a loopback packet, it must have come through
 207          * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
 208          * NF_IP_LOCAL_IN.  Otherwise, must have gone through
 209          * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
 210         if (!skb->dev) {
 211                 printk("ip_local_deliver: skb->dev is NULL.\n");
 212         }
 213         else if (strcmp(skb->dev->name, "lo") == 0) {
 214                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 215                                       | (1 << NF_IP_POST_ROUTING)
 216                                       | (1 << NF_IP_PRE_ROUTING)
 217                                       | (1 << NF_IP_LOCAL_IN))) {
 218                         printk("ip_local_deliver: bad loopback skb: ");
 219                         debug_print_hooks_ip(skb->nf_debug);
 220                         nf_dump_skb(PF_INET, skb);
 221                 }
 222         }
 223         else {
 224                 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
 225                                       | (1<<NF_IP_LOCAL_IN))) {
 226                         printk("ip_local_deliver: bad non-lo skb: ");
 227                         debug_print_hooks_ip(skb->nf_debug);
 228                         nf_dump_skb(PF_INET, skb);
 229                 }
 230         }
 231 }
 232
 233 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
 234 {
 235         if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 236                                  | (1 << NF_IP_POST_ROUTING))) {
 237                 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
 238                        newskb);
 239                 debug_print_hooks_ip(newskb->nf_debug);
 240                 nf_dump_skb(PF_INET, newskb);
 241         }
 242         /* Clear to avoid confusing input check */
 243         newskb->nf_debug = 0;
 244 }
 245
 246 void nf_debug_ip_finish_output2(struct sk_buff *skb)
 247 {
 248         /* If it's owned, it must have gone through the
 249          * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
 250          * Otherwise, must have gone through
 251          * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
 252          */
 253         if (skb->sk) {
 254                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 255                                       | (1 << NF_IP_POST_ROUTING))) {
 256                         printk("ip_finish_output: bad owned skb = %p: ", skb);
 257                         debug_print_hooks_ip(skb->nf_debug);
 258                         nf_dump_skb(PF_INET, skb);
 259                 }
 260         } else {
 261                 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
 262                                       | (1 << NF_IP_FORWARD)
 263                                       | (1 << NF_IP_POST_ROUTING))) {
 264                         /* Fragments, entunnelled packets, TCP RSTs
 265                            generated by ipt_REJECT will have no
 266                            owners, but still may be local */
 267                         if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 268                                               | (1 << NF_IP_POST_ROUTING))){
 269                                 printk("ip_finish_output:"
 270                                        " bad unowned skb = %p: ",skb);
 271                                 debug_print_hooks_ip(skb->nf_debug);
 272                                 nf_dump_skb(PF_INET, skb);
 273                         }
 274                 }
 275         }
 276 }
 277 #endif /*CONFIG_NETFILTER_DEBUG*/
 278
 279 /* Call get/setsockopt() */
 280 static int nf_sockopt(struct sock *sk, int pf, int val,
 281                       char *opt, int *len, int get)
 282 {
 283         struct list_head *i;
 284         struct nf_sockopt_ops *ops;
 285         int ret;
 286
 287         if (down_interruptible(&nf_sockopt_mutex) != 0)
 288                 return -EINTR;
 289
 290         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
 291                 ops = (struct nf_sockopt_ops *)i;
 292                 if (ops->pf == pf) {
 293                         if (get) {
 294                                 if (val >= ops->get_optmin
 295                                     && val < ops->get_optmax) {
 296                                         ops->use++;
 297                                         up(&nf_sockopt_mutex);
 298                                         ret = ops->get(sk, val, opt, len);
 299                                         goto out;
 300                                 }
 301                         } else {
 302                                 if (val >= ops->set_optmin
 303                                     && val < ops->set_optmax) {
 304                                         ops->use++;
 305                                         up(&nf_sockopt_mutex);
 306                                         ret = ops->set(sk, val, opt, *len);
 307                                         goto out;
 308                                 }
 309                         }
 310                 }
 311         }
 312         up(&nf_sockopt_mutex);
 313         return -ENOPROTOOPT;
 314
 315  out:
 316         down(&nf_sockopt_mutex);
 317         ops->use--;
 318         if (ops->cleanup_task)
 319                 wake_up_process(ops->cleanup_task);
 320         up(&nf_sockopt_mutex);
 321         return ret;
 322 }
 323
 324 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
 325                   int len)
 326 {
 327         return nf_sockopt(sk, pf, val, opt, &len, 0);
 328 }
 329
 330 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
 331 {
 332         return nf_sockopt(sk, pf, val, opt, len, 1);
 333 }
 334
 335 static unsigned int nf_iterate(struct list_head *head,
 336                                struct sk_buff **skb,
 337                                int hook,
 338                                const struct net_device *indev,
 339                                const struct net_device *outdev,
 340                                struct list_head **i,
 341                                int (*okfn)(struct sk_buff *))
 342 {
 343         for (*i = (*i)->next; *i != head; *i = (*i)->next) {
 344                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
 345                 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
 346                 case NF_QUEUE:
 347                         return NF_QUEUE;
 348
 349                 case NF_STOLEN:
 350                         return NF_STOLEN;
 351
 352                 case NF_DROP:
 353                         return NF_DROP;
 354
 355                 case NF_REPEAT:
 356                         *i = (*i)->prev;
 357                         break;
 358
 359 #ifdef CONFIG_NETFILTER_DEBUG
 360                 case NF_ACCEPT:
 361                         break;
 362
 363                 default:
 364                         NFDEBUG("Evil return from %p(%u).\n",
 365                                 elem->hook, hook);
 366 #endif
 367                 }
 368         }
 369         return NF_ACCEPT;
 370 }
 371
 372 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
 373 {
 374         int ret;
 375
 376         br_write_lock_bh(BR_NETPROTO_LOCK);
 377         if (queue_handler[pf].outfn)
 378                 ret = -EBUSY;
 379         else {
 380                 queue_handler[pf].outfn = outfn;
 381                 queue_handler[pf].data = data;
 382                 ret = 0;
 383         }
 384         br_write_unlock_bh(BR_NETPROTO_LOCK);
 385
 386         return ret;
 387 }
 388
 389 /* The caller must flush their queue before this */
 390 int nf_unregister_queue_handler(int pf)
 391 {
 392         br_write_lock_bh(BR_NETPROTO_LOCK);
 393         queue_handler[pf].outfn = NULL;
 394         queue_handler[pf].data = NULL;
 395         br_write_unlock_bh(BR_NETPROTO_LOCK);
 396         return 0;
 397 }
 398
 399 /*
 400  * Any packet that leaves via this function must come back
 401  * through nf_reinject().
 402  */
 403 static void nf_queue(struct sk_buff *skb,
 404                      struct list_head *elem,
 405                      int pf, unsigned int hook,
 406                      struct net_device *indev,
 407                      struct net_device *outdev,
 408                      int (*okfn)(struct sk_buff *))
 409 {
 410         int status;
 411         struct nf_info *info;
 412
 413         if (!queue_handler[pf].outfn) {
 414                 kfree_skb(skb);
 415                 return;
 416         }
 417
 418         info = kmalloc(sizeof(*info), GFP_ATOMIC);
 419         if (!info) {
 420                 if (net_ratelimit())
 421                         printk(KERN_ERR "OOM queueing packet %p\n",
 422                                skb);
 423                 kfree_skb(skb);
 424                 return;
 425         }
 426
 427         *info = (struct nf_info) {
 428                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
 429
 430         /* Bump dev refs so they don't vanish while packet is out */
 431         if (indev) dev_hold(indev);
 432         if (outdev) dev_hold(outdev);
 433
 434         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
 435         if (status < 0) {
 436                 /* James M doesn't say fuck enough. */
 437                 if (indev) dev_put(indev);
 438                 if (outdev) dev_put(outdev);
 439                 kfree(info);
 440                 kfree_skb(skb);
 441                 return;
 442         }
 443 }
 444
 445 /* We have BR_NETPROTO_LOCK here */
 446 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 447                  struct net_device *indev,
 448                  struct net_device *outdev,
 449                  int (*okfn)(struct sk_buff *))
 450 {
 451         struct list_head *elem;
 452         unsigned int verdict;
 453         int ret = 0;
 454
 455 #ifdef CONFIG_NETFILTER_DEBUG
 456         if (skb->nf_debug & (1 << hook)) {
 457                 printk("nf_hook: hook %i already set.\n", hook);
 458                 nf_dump_skb(pf, skb);
 459         }
 460         skb->nf_debug |= (1 << hook);
 461 #endif
 462
 463         elem = &nf_hooks[pf][hook];
 464         verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
 465                              outdev, &elem, okfn);
 466         if (verdict == NF_QUEUE) {
 467                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
 468                 nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
 469         }
 470
 471         switch (verdict) {
 472         case NF_ACCEPT:
 473                 ret = okfn(skb);
 474                 break;
 475
 476         case NF_DROP:
 477                 kfree_skb(skb);
 478                 ret = -EPERM;
 479                 break;
 480         }
 481
 482         return ret;
 483 }
 484
 485 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 486                  unsigned int verdict)
 487 {
 488         struct list_head *elem = &info->elem->list;
 489         struct list_head *i;
 490
 491         /* We don't have BR_NETPROTO_LOCK here */
 492         br_read_lock_bh(BR_NETPROTO_LOCK);
 493         for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
 494                 if (i == &nf_hooks[info->pf][info->hook]) {
 495                         /* The module which sent it to userspace is gone. */
 496                         NFDEBUG("%s: module disappeared, dropping packet.\n",
 497                                  __FUNCTION__);
 498                         verdict = NF_DROP;
 499                         break;
 500                 }
 501         }
 502
 503         /* Continue traversal iff userspace said ok... */
 504         if (verdict == NF_REPEAT) {
 505                 elem = elem->prev;
 506                 verdict = NF_ACCEPT;
 507         }
 508
 509         if (verdict == NF_ACCEPT) {
 510                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
 511                                      &skb, info->hook,
 512                                      info->indev, info->outdev, &elem,
 513                                      info->okfn);
 514         }
 515
 516         switch (verdict) {
 517         case NF_ACCEPT:
 518                 info->okfn(skb);
 519                 break;
 520
 521         case NF_QUEUE:
 522                 nf_queue(skb, elem, info->pf, info->hook,
 523                          info->indev, info->outdev, info->okfn);
 524
 525         case NF_DROP:
 526                 kfree_skb(skb);
 527                 break;
 528         }
 529         br_read_unlock_bh(BR_NETPROTO_LOCK);
 530
 531         /* Release those devices we held, or Alexey will kill me. */
 532         if (info->indev) dev_put(info->indev);
 533         if (info->outdev) dev_put(info->outdev);
 534
 535         kfree(info);
 536         return;
 537 }
 538
 539 void __init netfilter_init(void)
 540 {
 541         int i, h;
 542
 543         for (i = 0; i < NPROTO; i++) {
 544                 for (h = 0; h < NF_MAX_HOOKS; h++)
 545                         INIT_LIST_HEAD(&nf_hooks[i][h]);
 546         }
 547 }