net/ipv4/xfrm4_policy.c

   1 /*
   2  * xfrm4_policy.c
   3  *
   4  * Changes:
   5  *      Kazunori MIYAZAWA @USAGI
   6  *      YOSHIFUJI Hideaki @USAGI
   7  *              Split up af-specific portion
   8  *
   9  */
  10
  11 #include <linux/err.h>
  12 #include <linux/kernel.h>
  13 #include <linux/inetdevice.h>
  14 #include <net/dst.h>
  15 #include <net/xfrm.h>
  16 #include <net/ip.h>
  17
  18 static struct dst_ops xfrm4_dst_ops;
  19 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
  20
  21 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
  22                                           xfrm_address_t *saddr,
  23                                           xfrm_address_t *daddr)
  24 {
  25         struct flowi fl = {
  26                 .nl_u = {
  27                         .ip4_u = {
  28                                 .tos = tos,
  29                                 .daddr = daddr->a4,
  30                         },
  31                 },
  32         };
  33         struct dst_entry *dst;
  34         struct rtable *rt;
  35         int err;
  36
  37         if (saddr)
  38                 fl.fl4_src = saddr->a4;
  39
  40         err = __ip_route_output_key(net, &rt, &fl);
  41         dst = &rt->u.dst;
  42         if (err)
  43                 dst = ERR_PTR(err);
  44         return dst;
  45 }
  46
  47 static int xfrm4_get_saddr(struct net *net,
  48                            xfrm_address_t *saddr, xfrm_address_t *daddr)
  49 {
  50         struct dst_entry *dst;
  51         struct rtable *rt;
  52
  53         dst = xfrm4_dst_lookup(net, 0, NULL, daddr);
  54         if (IS_ERR(dst))
  55                 return -EHOSTUNREACH;
  56
  57         rt = (struct rtable *)dst;
  58         saddr->a4 = rt->rt_src;
  59         dst_release(dst);
  60         return 0;
  61 }
  62
  63 static struct dst_entry *
  64 __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
  65 {
  66         struct dst_entry *dst;
  67
  68         read_lock_bh(&policy->lock);
  69         for (dst = policy->bundles; dst; dst = dst->next) {
  70                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
  71                 if (xdst->u.rt.fl.oif == fl->oif &&     /*XXX*/
  72                     xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
  73                     xdst->u.rt.fl.fl4_src == fl->fl4_src &&
  74                     xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
  75                     xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
  76                         dst_clone(dst);
  77                         break;
  78                 }
  79         }
  80         read_unlock_bh(&policy->lock);
  81         return dst;
  82 }
  83
  84 static int xfrm4_get_tos(struct flowi *fl)
  85 {
  86         return fl->fl4_tos;
  87 }
  88
  89 static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
  90                            int nfheader_len)
  91 {
  92         return 0;
  93 }
  94
  95 static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
  96 {
  97         struct rtable *rt = (struct rtable *)xdst->route;
  98
  99         xdst->u.rt.fl = rt->fl;
 100
 101         xdst->u.dst.dev = dev;
 102         dev_hold(dev);
 103
 104         xdst->u.rt.idev = in_dev_get(dev);
 105         if (!xdst->u.rt.idev)
 106                 return -ENODEV;
 107
 108         xdst->u.rt.peer = rt->peer;
 109         if (rt->peer)
 110                 atomic_inc(&rt->peer->refcnt);
 111
 112         /* Sheit... I remember I did this right. Apparently,
 113          * it was magically lost, so this code needs audit */
 114         xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 115                                               RTCF_LOCAL);
 116         xdst->u.rt.rt_type = rt->rt_type;
 117         xdst->u.rt.rt_src = rt->rt_src;
 118         xdst->u.rt.rt_dst = rt->rt_dst;
 119         xdst->u.rt.rt_gateway = rt->rt_gateway;
 120         xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
 121
 122         return 0;
 123 }
 124
 125 static void
 126 _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 127 {
 128         struct iphdr *iph = ip_hdr(skb);
 129         u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 130
 131         memset(fl, 0, sizeof(struct flowi));
 132         if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
 133                 switch (iph->protocol) {
 134                 case IPPROTO_UDP:
 135                 case IPPROTO_UDPLITE:
 136                 case IPPROTO_TCP:
 137                 case IPPROTO_SCTP:
 138                 case IPPROTO_DCCP:
 139                         if (xprth + 4 < skb->data ||
 140                             pskb_may_pull(skb, xprth + 4 - skb->data)) {
 141                                 __be16 *ports = (__be16 *)xprth;
 142
 143                                 fl->fl_ip_sport = ports[!!reverse];
 144                                 fl->fl_ip_dport = ports[!reverse];
 145                         }
 146                         break;
 147
 148                 case IPPROTO_ICMP:
 149                         if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
 150                                 u8 *icmp = xprth;
 151
 152                                 fl->fl_icmp_type = icmp[0];
 153                                 fl->fl_icmp_code = icmp[1];
 154                         }
 155                         break;
 156
 157                 case IPPROTO_ESP:
 158                         if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 159                                 __be32 *ehdr = (__be32 *)xprth;
 160
 161                                 fl->fl_ipsec_spi = ehdr[0];
 162                         }
 163                         break;
 164
 165                 case IPPROTO_AH:
 166                         if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
 167                                 __be32 *ah_hdr = (__be32*)xprth;
 168
 169                                 fl->fl_ipsec_spi = ah_hdr[1];
 170                         }
 171                         break;
 172
 173                 case IPPROTO_COMP:
 174                         if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 175                                 __be16 *ipcomp_hdr = (__be16 *)xprth;
 176
 177                                 fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 178                         }
 179                         break;
 180                 default:
 181                         fl->fl_ipsec_spi = 0;
 182                         break;
 183                 }
 184         }
 185         fl->proto = iph->protocol;
 186         fl->fl4_dst = reverse ? iph->saddr : iph->daddr;
 187         fl->fl4_src = reverse ? iph->daddr : iph->saddr;
 188         fl->fl4_tos = iph->tos;
 189 }
 190
 191 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 192 {
 193         xfrm4_policy_afinfo.garbage_collect(&init_net);
 194         return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
 195 }
 196
 197 static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
 198 {
 199         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 200         struct dst_entry *path = xdst->route;
 201
 202         path->ops->update_pmtu(path, mtu);
 203 }
 204
 205 static void xfrm4_dst_destroy(struct dst_entry *dst)
 206 {
 207         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 208
 209         if (likely(xdst->u.rt.idev))
 210                 in_dev_put(xdst->u.rt.idev);
 211         if (likely(xdst->u.rt.peer))
 212                 inet_putpeer(xdst->u.rt.peer);
 213         xfrm_dst_destroy(xdst);
 214 }
 215
 216 static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 217                              int unregister)
 218 {
 219         struct xfrm_dst *xdst;
 220
 221         if (!unregister)
 222                 return;
 223
 224         xdst = (struct xfrm_dst *)dst;
 225         if (xdst->u.rt.idev->dev == dev) {
 226                 struct in_device *loopback_idev =
 227                         in_dev_get(dev_net(dev)->loopback_dev);
 228                 BUG_ON(!loopback_idev);
 229
 230                 do {
 231                         in_dev_put(xdst->u.rt.idev);
 232                         xdst->u.rt.idev = loopback_idev;
 233                         in_dev_hold(loopback_idev);
 234                         xdst = (struct xfrm_dst *)xdst->u.dst.child;
 235                 } while (xdst->u.dst.xfrm);
 236
 237                 __in_dev_put(loopback_idev);
 238         }
 239
 240         xfrm_dst_ifdown(dst, dev);
 241 }
 242
 243 static struct dst_ops xfrm4_dst_ops = {
 244         .family =               AF_INET,
 245         .protocol =             cpu_to_be16(ETH_P_IP),
 246         .gc =                   xfrm4_garbage_collect,
 247         .update_pmtu =          xfrm4_update_pmtu,
 248         .destroy =              xfrm4_dst_destroy,
 249         .ifdown =               xfrm4_dst_ifdown,
 250         .local_out =            __ip_local_out,
 251         .gc_thresh =            1024,
 252         .entries =              ATOMIC_INIT(0),
 253 };
 254
 255 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 256         .family =               AF_INET,
 257         .dst_ops =              &xfrm4_dst_ops,
 258         .dst_lookup =           xfrm4_dst_lookup,
 259         .get_saddr =            xfrm4_get_saddr,
 260         .find_bundle =          __xfrm4_find_bundle,
 261         .decode_session =       _decode_session4,
 262         .get_tos =              xfrm4_get_tos,
 263         .init_path =            xfrm4_init_path,
 264         .fill_dst =             xfrm4_fill_dst,
 265 };
 266
 267 #ifdef CONFIG_SYSCTL
 268 static struct ctl_table xfrm4_policy_table[] = {
 269         {
 270                 .ctl_name       = CTL_UNNUMBERED,
 271                 .procname       = "xfrm4_gc_thresh",
 272                 .data           = &xfrm4_dst_ops.gc_thresh,
 273                 .maxlen         = sizeof(int),
 274                 .mode           = 0644,
 275                 .proc_handler   = proc_dointvec,
 276         },
 277         { }
 278 };
 279
 280 static struct ctl_table_header *sysctl_hdr;
 281 #endif
 282
 283 static void __init xfrm4_policy_init(void)
 284 {
 285         xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
 286 }
 287
 288 static void __exit xfrm4_policy_fini(void)
 289 {
 290 #ifdef CONFIG_SYSCTL
 291         if (sysctl_hdr)
 292                 unregister_net_sysctl_table(sysctl_hdr);
 293 #endif
 294         xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
 295 }
 296
 297 void __init xfrm4_init(int rt_max_size)
 298 {
 299         xfrm4_state_init();
 300         xfrm4_policy_init();
 301         /*
 302          * Select a default value for the gc_thresh based on the main route
 303          * table hash size.  It seems to me the worst case scenario is when
 304          * we have ipsec operating in transport mode, in which we create a
 305          * dst_entry per socket.  The xfrm gc algorithm starts trying to remove
 306          * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
 307          * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
 308          * That will let us store an ipsec connection per route table entry,
 309          * and start cleaning when were 1/2 full
 310          */
 311         xfrm4_dst_ops.gc_thresh = rt_max_size/2;
 312 #ifdef CONFIG_SYSCTL
 313         sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 314                                                 xfrm4_policy_table);
 315 #endif
 316 }
 317