2 * Linux Socket Filter - Kernel level socket filtering
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
20 * Andi Kleen - Fix a few bad bugs and races.
21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
24 #include <linux/module.h>
25 #include <linux/types.h>
27 #include <linux/fcntl.h>
28 #include <linux/socket.h>
29 #include <linux/sock_diag.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_packet.h>
34 #include <linux/if_arp.h>
35 #include <linux/gfp.h>
37 #include <net/protocol.h>
38 #include <net/netlink.h>
39 #include <linux/skbuff.h>
41 #include <net/flow_dissector.h>
42 #include <linux/errno.h>
43 #include <linux/timer.h>
44 #include <linux/uaccess.h>
45 #include <asm/unaligned.h>
46 #include <linux/filter.h>
47 #include <linux/ratelimit.h>
48 #include <linux/seccomp.h>
49 #include <linux/if_vlan.h>
50 #include <linux/bpf.h>
51 #include <net/sch_generic.h>
52 #include <net/cls_cgroup.h>
53 #include <net/dst_metadata.h>
55 #include <net/sock_reuseport.h>
56 #include <net/busy_poll.h>
59 * sk_filter_trim_cap - run a packet through a socket filter
60 * @sk: sock associated with &sk_buff
61 * @skb: buffer to filter
62 * @cap: limit on how short the eBPF program may trim the packet
64 * Run the eBPF program and then cut skb->data to correct size returned by
65 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
66 * than pkt_len we keep whole skb->data. This is the socket level
67 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
68 * be accepted or -EPERM if the packet should be tossed.
71 int sk_filter_trim_cap(struct sock
*sk
, struct sk_buff
*skb
, unsigned int cap
)
74 struct sk_filter
*filter
;
77 * If the skb was allocated from pfmemalloc reserves, only
78 * allow SOCK_MEMALLOC sockets to use it as this socket is
81 if (skb_pfmemalloc(skb
) && !sock_flag(sk
, SOCK_MEMALLOC
)) {
82 NET_INC_STATS(sock_net(sk
), LINUX_MIB_PFMEMALLOCDROP
);
85 err
= BPF_CGROUP_RUN_PROG_INET_INGRESS(sk
, skb
);
89 err
= security_sock_rcv_skb(sk
, skb
);
94 filter
= rcu_dereference(sk
->sk_filter
);
96 struct sock
*save_sk
= skb
->sk
;
100 pkt_len
= bpf_prog_run_save_cb(filter
->prog
, skb
);
102 err
= pkt_len
? pskb_trim(skb
, max(cap
, pkt_len
)) : -EPERM
;
108 EXPORT_SYMBOL(sk_filter_trim_cap
);
110 BPF_CALL_1(__skb_get_pay_offset
, struct sk_buff
*, skb
)
112 return skb_get_poff(skb
);
115 BPF_CALL_3(__skb_get_nlattr
, struct sk_buff
*, skb
, u32
, a
, u32
, x
)
119 if (skb_is_nonlinear(skb
))
122 if (skb
->len
< sizeof(struct nlattr
))
125 if (a
> skb
->len
- sizeof(struct nlattr
))
128 nla
= nla_find((struct nlattr
*) &skb
->data
[a
], skb
->len
- a
, x
);
130 return (void *) nla
- (void *) skb
->data
;
135 BPF_CALL_3(__skb_get_nlattr_nest
, struct sk_buff
*, skb
, u32
, a
, u32
, x
)
139 if (skb_is_nonlinear(skb
))
142 if (skb
->len
< sizeof(struct nlattr
))
145 if (a
> skb
->len
- sizeof(struct nlattr
))
148 nla
= (struct nlattr
*) &skb
->data
[a
];
149 if (nla
->nla_len
> skb
->len
- a
)
152 nla
= nla_find_nested(nla
, x
);
154 return (void *) nla
- (void *) skb
->data
;
159 BPF_CALL_0(__get_raw_cpu_id
)
161 return raw_smp_processor_id();
164 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto
= {
165 .func
= __get_raw_cpu_id
,
167 .ret_type
= RET_INTEGER
,
170 static u32
convert_skb_access(int skb_field
, int dst_reg
, int src_reg
,
171 struct bpf_insn
*insn_buf
)
173 struct bpf_insn
*insn
= insn_buf
;
177 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, mark
) != 4);
179 *insn
++ = BPF_LDX_MEM(BPF_W
, dst_reg
, src_reg
,
180 offsetof(struct sk_buff
, mark
));
184 *insn
++ = BPF_LDX_MEM(BPF_B
, dst_reg
, src_reg
, PKT_TYPE_OFFSET());
185 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
, PKT_TYPE_MAX
);
186 #ifdef __BIG_ENDIAN_BITFIELD
187 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, dst_reg
, 5);
192 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, queue_mapping
) != 2);
194 *insn
++ = BPF_LDX_MEM(BPF_H
, dst_reg
, src_reg
,
195 offsetof(struct sk_buff
, queue_mapping
));
198 case SKF_AD_VLAN_TAG
:
199 case SKF_AD_VLAN_TAG_PRESENT
:
200 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, vlan_tci
) != 2);
201 BUILD_BUG_ON(VLAN_TAG_PRESENT
!= 0x1000);
203 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
204 *insn
++ = BPF_LDX_MEM(BPF_H
, dst_reg
, src_reg
,
205 offsetof(struct sk_buff
, vlan_tci
));
206 if (skb_field
== SKF_AD_VLAN_TAG
) {
207 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
,
211 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, dst_reg
, 12);
213 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
, 1);
218 return insn
- insn_buf
;
221 static bool convert_bpf_extensions(struct sock_filter
*fp
,
222 struct bpf_insn
**insnp
)
224 struct bpf_insn
*insn
= *insnp
;
228 case SKF_AD_OFF
+ SKF_AD_PROTOCOL
:
229 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, protocol
) != 2);
231 /* A = *(u16 *) (CTX + offsetof(protocol)) */
232 *insn
++ = BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_CTX
,
233 offsetof(struct sk_buff
, protocol
));
234 /* A = ntohs(A) [emitting a nop or swap16] */
235 *insn
= BPF_ENDIAN(BPF_FROM_BE
, BPF_REG_A
, 16);
238 case SKF_AD_OFF
+ SKF_AD_PKTTYPE
:
239 cnt
= convert_skb_access(SKF_AD_PKTTYPE
, BPF_REG_A
, BPF_REG_CTX
, insn
);
243 case SKF_AD_OFF
+ SKF_AD_IFINDEX
:
244 case SKF_AD_OFF
+ SKF_AD_HATYPE
:
245 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, ifindex
) != 4);
246 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, type
) != 2);
248 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
249 BPF_REG_TMP
, BPF_REG_CTX
,
250 offsetof(struct sk_buff
, dev
));
251 /* if (tmp != 0) goto pc + 1 */
252 *insn
++ = BPF_JMP_IMM(BPF_JNE
, BPF_REG_TMP
, 0, 1);
253 *insn
++ = BPF_EXIT_INSN();
254 if (fp
->k
== SKF_AD_OFF
+ SKF_AD_IFINDEX
)
255 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_TMP
,
256 offsetof(struct net_device
, ifindex
));
258 *insn
= BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_TMP
,
259 offsetof(struct net_device
, type
));
262 case SKF_AD_OFF
+ SKF_AD_MARK
:
263 cnt
= convert_skb_access(SKF_AD_MARK
, BPF_REG_A
, BPF_REG_CTX
, insn
);
267 case SKF_AD_OFF
+ SKF_AD_RXHASH
:
268 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, hash
) != 4);
270 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_CTX
,
271 offsetof(struct sk_buff
, hash
));
274 case SKF_AD_OFF
+ SKF_AD_QUEUE
:
275 cnt
= convert_skb_access(SKF_AD_QUEUE
, BPF_REG_A
, BPF_REG_CTX
, insn
);
279 case SKF_AD_OFF
+ SKF_AD_VLAN_TAG
:
280 cnt
= convert_skb_access(SKF_AD_VLAN_TAG
,
281 BPF_REG_A
, BPF_REG_CTX
, insn
);
285 case SKF_AD_OFF
+ SKF_AD_VLAN_TAG_PRESENT
:
286 cnt
= convert_skb_access(SKF_AD_VLAN_TAG_PRESENT
,
287 BPF_REG_A
, BPF_REG_CTX
, insn
);
291 case SKF_AD_OFF
+ SKF_AD_VLAN_TPID
:
292 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, vlan_proto
) != 2);
294 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
295 *insn
++ = BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_CTX
,
296 offsetof(struct sk_buff
, vlan_proto
));
297 /* A = ntohs(A) [emitting a nop or swap16] */
298 *insn
= BPF_ENDIAN(BPF_FROM_BE
, BPF_REG_A
, 16);
301 case SKF_AD_OFF
+ SKF_AD_PAY_OFFSET
:
302 case SKF_AD_OFF
+ SKF_AD_NLATTR
:
303 case SKF_AD_OFF
+ SKF_AD_NLATTR_NEST
:
304 case SKF_AD_OFF
+ SKF_AD_CPU
:
305 case SKF_AD_OFF
+ SKF_AD_RANDOM
:
307 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG1
, BPF_REG_CTX
);
309 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG2
, BPF_REG_A
);
311 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG3
, BPF_REG_X
);
312 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
314 case SKF_AD_OFF
+ SKF_AD_PAY_OFFSET
:
315 *insn
= BPF_EMIT_CALL(__skb_get_pay_offset
);
317 case SKF_AD_OFF
+ SKF_AD_NLATTR
:
318 *insn
= BPF_EMIT_CALL(__skb_get_nlattr
);
320 case SKF_AD_OFF
+ SKF_AD_NLATTR_NEST
:
321 *insn
= BPF_EMIT_CALL(__skb_get_nlattr_nest
);
323 case SKF_AD_OFF
+ SKF_AD_CPU
:
324 *insn
= BPF_EMIT_CALL(__get_raw_cpu_id
);
326 case SKF_AD_OFF
+ SKF_AD_RANDOM
:
327 *insn
= BPF_EMIT_CALL(bpf_user_rnd_u32
);
328 bpf_user_rnd_init_once();
333 case SKF_AD_OFF
+ SKF_AD_ALU_XOR_X
:
335 *insn
= BPF_ALU32_REG(BPF_XOR
, BPF_REG_A
, BPF_REG_X
);
339 /* This is just a dummy call to avoid letting the compiler
340 * evict __bpf_call_base() as an optimization. Placed here
341 * where no-one bothers.
343 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
352 * bpf_convert_filter - convert filter program
353 * @prog: the user passed filter program
354 * @len: the length of the user passed filter program
355 * @new_prog: allocated 'struct bpf_prog' or NULL
356 * @new_len: pointer to store length of converted program
358 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
359 * style extended BPF (eBPF).
360 * Conversion workflow:
362 * 1) First pass for calculating the new program length:
363 * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
365 * 2) 2nd pass to remap in two passes: 1st pass finds new
366 * jump offsets, 2nd pass remapping:
367 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
369 static int bpf_convert_filter(struct sock_filter
*prog
, int len
,
370 struct bpf_prog
*new_prog
, int *new_len
)
372 int new_flen
= 0, pass
= 0, target
, i
, stack_off
;
373 struct bpf_insn
*new_insn
, *first_insn
= NULL
;
374 struct sock_filter
*fp
;
378 BUILD_BUG_ON(BPF_MEMWORDS
* sizeof(u32
) > MAX_BPF_STACK
);
379 BUILD_BUG_ON(BPF_REG_FP
+ 1 != MAX_BPF_REG
);
381 if (len
<= 0 || len
> BPF_MAXINSNS
)
385 first_insn
= new_prog
->insnsi
;
386 addrs
= kcalloc(len
, sizeof(*addrs
),
387 GFP_KERNEL
| __GFP_NOWARN
);
393 new_insn
= first_insn
;
396 /* Classic BPF related prologue emission. */
398 /* Classic BPF expects A and X to be reset first. These need
399 * to be guaranteed to be the first two instructions.
401 *new_insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_A
, BPF_REG_A
);
402 *new_insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_X
, BPF_REG_X
);
404 /* All programs must keep CTX in callee saved BPF_REG_CTX.
405 * In eBPF case it's done by the compiler, here we need to
406 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
408 *new_insn
++ = BPF_MOV64_REG(BPF_REG_CTX
, BPF_REG_ARG1
);
413 for (i
= 0; i
< len
; fp
++, i
++) {
414 struct bpf_insn tmp_insns
[6] = { };
415 struct bpf_insn
*insn
= tmp_insns
;
418 addrs
[i
] = new_insn
- first_insn
;
421 /* All arithmetic insns and skb loads map as-is. */
422 case BPF_ALU
| BPF_ADD
| BPF_X
:
423 case BPF_ALU
| BPF_ADD
| BPF_K
:
424 case BPF_ALU
| BPF_SUB
| BPF_X
:
425 case BPF_ALU
| BPF_SUB
| BPF_K
:
426 case BPF_ALU
| BPF_AND
| BPF_X
:
427 case BPF_ALU
| BPF_AND
| BPF_K
:
428 case BPF_ALU
| BPF_OR
| BPF_X
:
429 case BPF_ALU
| BPF_OR
| BPF_K
:
430 case BPF_ALU
| BPF_LSH
| BPF_X
:
431 case BPF_ALU
| BPF_LSH
| BPF_K
:
432 case BPF_ALU
| BPF_RSH
| BPF_X
:
433 case BPF_ALU
| BPF_RSH
| BPF_K
:
434 case BPF_ALU
| BPF_XOR
| BPF_X
:
435 case BPF_ALU
| BPF_XOR
| BPF_K
:
436 case BPF_ALU
| BPF_MUL
| BPF_X
:
437 case BPF_ALU
| BPF_MUL
| BPF_K
:
438 case BPF_ALU
| BPF_DIV
| BPF_X
:
439 case BPF_ALU
| BPF_DIV
| BPF_K
:
440 case BPF_ALU
| BPF_MOD
| BPF_X
:
441 case BPF_ALU
| BPF_MOD
| BPF_K
:
442 case BPF_ALU
| BPF_NEG
:
443 case BPF_LD
| BPF_ABS
| BPF_W
:
444 case BPF_LD
| BPF_ABS
| BPF_H
:
445 case BPF_LD
| BPF_ABS
| BPF_B
:
446 case BPF_LD
| BPF_IND
| BPF_W
:
447 case BPF_LD
| BPF_IND
| BPF_H
:
448 case BPF_LD
| BPF_IND
| BPF_B
:
449 /* Check for overloaded BPF extension and
450 * directly convert it if found, otherwise
451 * just move on with mapping.
453 if (BPF_CLASS(fp
->code
) == BPF_LD
&&
454 BPF_MODE(fp
->code
) == BPF_ABS
&&
455 convert_bpf_extensions(fp
, &insn
))
458 *insn
= BPF_RAW_INSN(fp
->code
, BPF_REG_A
, BPF_REG_X
, 0, fp
->k
);
461 /* Jump transformation cannot use BPF block macros
462 * everywhere as offset calculation and target updates
463 * require a bit more work than the rest, i.e. jump
464 * opcodes map as-is, but offsets need adjustment.
467 #define BPF_EMIT_JMP \
469 if (target >= len || target < 0) \
471 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
472 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
473 insn->off -= insn - tmp_insns; \
476 case BPF_JMP
| BPF_JA
:
477 target
= i
+ fp
->k
+ 1;
478 insn
->code
= fp
->code
;
482 case BPF_JMP
| BPF_JEQ
| BPF_K
:
483 case BPF_JMP
| BPF_JEQ
| BPF_X
:
484 case BPF_JMP
| BPF_JSET
| BPF_K
:
485 case BPF_JMP
| BPF_JSET
| BPF_X
:
486 case BPF_JMP
| BPF_JGT
| BPF_K
:
487 case BPF_JMP
| BPF_JGT
| BPF_X
:
488 case BPF_JMP
| BPF_JGE
| BPF_K
:
489 case BPF_JMP
| BPF_JGE
| BPF_X
:
490 if (BPF_SRC(fp
->code
) == BPF_K
&& (int) fp
->k
< 0) {
491 /* BPF immediates are signed, zero extend
492 * immediate into tmp register and use it
495 *insn
++ = BPF_MOV32_IMM(BPF_REG_TMP
, fp
->k
);
497 insn
->dst_reg
= BPF_REG_A
;
498 insn
->src_reg
= BPF_REG_TMP
;
501 insn
->dst_reg
= BPF_REG_A
;
503 bpf_src
= BPF_SRC(fp
->code
);
504 insn
->src_reg
= bpf_src
== BPF_X
? BPF_REG_X
: 0;
507 /* Common case where 'jump_false' is next insn. */
509 insn
->code
= BPF_JMP
| BPF_OP(fp
->code
) | bpf_src
;
510 target
= i
+ fp
->jt
+ 1;
515 /* Convert JEQ into JNE when 'jump_true' is next insn. */
516 if (fp
->jt
== 0 && BPF_OP(fp
->code
) == BPF_JEQ
) {
517 insn
->code
= BPF_JMP
| BPF_JNE
| bpf_src
;
518 target
= i
+ fp
->jf
+ 1;
523 /* Other jumps are mapped into two insns: Jxx and JA. */
524 target
= i
+ fp
->jt
+ 1;
525 insn
->code
= BPF_JMP
| BPF_OP(fp
->code
) | bpf_src
;
529 insn
->code
= BPF_JMP
| BPF_JA
;
530 target
= i
+ fp
->jf
+ 1;
534 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
535 case BPF_LDX
| BPF_MSH
| BPF_B
:
537 *insn
++ = BPF_MOV64_REG(BPF_REG_TMP
, BPF_REG_A
);
538 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
539 *insn
++ = BPF_LD_ABS(BPF_B
, fp
->k
);
541 *insn
++ = BPF_ALU32_IMM(BPF_AND
, BPF_REG_A
, 0xf);
543 *insn
++ = BPF_ALU32_IMM(BPF_LSH
, BPF_REG_A
, 2);
545 *insn
++ = BPF_MOV64_REG(BPF_REG_X
, BPF_REG_A
);
547 *insn
= BPF_MOV64_REG(BPF_REG_A
, BPF_REG_TMP
);
550 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
551 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
553 case BPF_RET
| BPF_A
:
554 case BPF_RET
| BPF_K
:
555 if (BPF_RVAL(fp
->code
) == BPF_K
)
556 *insn
++ = BPF_MOV32_RAW(BPF_K
, BPF_REG_0
,
558 *insn
= BPF_EXIT_INSN();
561 /* Store to stack. */
564 stack_off
= fp
->k
* 4 + 4;
565 *insn
= BPF_STX_MEM(BPF_W
, BPF_REG_FP
, BPF_CLASS(fp
->code
) ==
566 BPF_ST
? BPF_REG_A
: BPF_REG_X
,
568 /* check_load_and_stores() verifies that classic BPF can
569 * load from stack only after write, so tracking
570 * stack_depth for ST|STX insns is enough
572 if (new_prog
&& new_prog
->aux
->stack_depth
< stack_off
)
573 new_prog
->aux
->stack_depth
= stack_off
;
576 /* Load from stack. */
577 case BPF_LD
| BPF_MEM
:
578 case BPF_LDX
| BPF_MEM
:
579 stack_off
= fp
->k
* 4 + 4;
580 *insn
= BPF_LDX_MEM(BPF_W
, BPF_CLASS(fp
->code
) == BPF_LD
?
581 BPF_REG_A
: BPF_REG_X
, BPF_REG_FP
,
586 case BPF_LD
| BPF_IMM
:
587 case BPF_LDX
| BPF_IMM
:
588 *insn
= BPF_MOV32_IMM(BPF_CLASS(fp
->code
) == BPF_LD
?
589 BPF_REG_A
: BPF_REG_X
, fp
->k
);
593 case BPF_MISC
| BPF_TAX
:
594 *insn
= BPF_MOV64_REG(BPF_REG_X
, BPF_REG_A
);
598 case BPF_MISC
| BPF_TXA
:
599 *insn
= BPF_MOV64_REG(BPF_REG_A
, BPF_REG_X
);
602 /* A = skb->len or X = skb->len */
603 case BPF_LD
| BPF_W
| BPF_LEN
:
604 case BPF_LDX
| BPF_W
| BPF_LEN
:
605 *insn
= BPF_LDX_MEM(BPF_W
, BPF_CLASS(fp
->code
) == BPF_LD
?
606 BPF_REG_A
: BPF_REG_X
, BPF_REG_CTX
,
607 offsetof(struct sk_buff
, len
));
610 /* Access seccomp_data fields. */
611 case BPF_LDX
| BPF_ABS
| BPF_W
:
612 /* A = *(u32 *) (ctx + K) */
613 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_CTX
, fp
->k
);
616 /* Unknown instruction. */
623 memcpy(new_insn
, tmp_insns
,
624 sizeof(*insn
) * (insn
- tmp_insns
));
625 new_insn
+= insn
- tmp_insns
;
629 /* Only calculating new length. */
630 *new_len
= new_insn
- first_insn
;
635 if (new_flen
!= new_insn
- first_insn
) {
636 new_flen
= new_insn
- first_insn
;
643 BUG_ON(*new_len
!= new_flen
);
652 * As we dont want to clear mem[] array for each packet going through
653 * __bpf_prog_run(), we check that filter loaded by user never try to read
654 * a cell if not previously written, and we check all branches to be sure
655 * a malicious user doesn't try to abuse us.
657 static int check_load_and_stores(const struct sock_filter
*filter
, int flen
)
659 u16
*masks
, memvalid
= 0; /* One bit per cell, 16 cells */
662 BUILD_BUG_ON(BPF_MEMWORDS
> 16);
664 masks
= kmalloc_array(flen
, sizeof(*masks
), GFP_KERNEL
);
668 memset(masks
, 0xff, flen
* sizeof(*masks
));
670 for (pc
= 0; pc
< flen
; pc
++) {
671 memvalid
&= masks
[pc
];
673 switch (filter
[pc
].code
) {
676 memvalid
|= (1 << filter
[pc
].k
);
678 case BPF_LD
| BPF_MEM
:
679 case BPF_LDX
| BPF_MEM
:
680 if (!(memvalid
& (1 << filter
[pc
].k
))) {
685 case BPF_JMP
| BPF_JA
:
686 /* A jump must set masks on target */
687 masks
[pc
+ 1 + filter
[pc
].k
] &= memvalid
;
690 case BPF_JMP
| BPF_JEQ
| BPF_K
:
691 case BPF_JMP
| BPF_JEQ
| BPF_X
:
692 case BPF_JMP
| BPF_JGE
| BPF_K
:
693 case BPF_JMP
| BPF_JGE
| BPF_X
:
694 case BPF_JMP
| BPF_JGT
| BPF_K
:
695 case BPF_JMP
| BPF_JGT
| BPF_X
:
696 case BPF_JMP
| BPF_JSET
| BPF_K
:
697 case BPF_JMP
| BPF_JSET
| BPF_X
:
698 /* A jump must set masks on targets */
699 masks
[pc
+ 1 + filter
[pc
].jt
] &= memvalid
;
700 masks
[pc
+ 1 + filter
[pc
].jf
] &= memvalid
;
710 static bool chk_code_allowed(u16 code_to_probe
)
712 static const bool codes
[] = {
713 /* 32 bit ALU operations */
714 [BPF_ALU
| BPF_ADD
| BPF_K
] = true,
715 [BPF_ALU
| BPF_ADD
| BPF_X
] = true,
716 [BPF_ALU
| BPF_SUB
| BPF_K
] = true,
717 [BPF_ALU
| BPF_SUB
| BPF_X
] = true,
718 [BPF_ALU
| BPF_MUL
| BPF_K
] = true,
719 [BPF_ALU
| BPF_MUL
| BPF_X
] = true,
720 [BPF_ALU
| BPF_DIV
| BPF_K
] = true,
721 [BPF_ALU
| BPF_DIV
| BPF_X
] = true,
722 [BPF_ALU
| BPF_MOD
| BPF_K
] = true,
723 [BPF_ALU
| BPF_MOD
| BPF_X
] = true,
724 [BPF_ALU
| BPF_AND
| BPF_K
] = true,
725 [BPF_ALU
| BPF_AND
| BPF_X
] = true,
726 [BPF_ALU
| BPF_OR
| BPF_K
] = true,
727 [BPF_ALU
| BPF_OR
| BPF_X
] = true,
728 [BPF_ALU
| BPF_XOR
| BPF_K
] = true,
729 [BPF_ALU
| BPF_XOR
| BPF_X
] = true,
730 [BPF_ALU
| BPF_LSH
| BPF_K
] = true,
731 [BPF_ALU
| BPF_LSH
| BPF_X
] = true,
732 [BPF_ALU
| BPF_RSH
| BPF_K
] = true,
733 [BPF_ALU
| BPF_RSH
| BPF_X
] = true,
734 [BPF_ALU
| BPF_NEG
] = true,
735 /* Load instructions */
736 [BPF_LD
| BPF_W
| BPF_ABS
] = true,
737 [BPF_LD
| BPF_H
| BPF_ABS
] = true,
738 [BPF_LD
| BPF_B
| BPF_ABS
] = true,
739 [BPF_LD
| BPF_W
| BPF_LEN
] = true,
740 [BPF_LD
| BPF_W
| BPF_IND
] = true,
741 [BPF_LD
| BPF_H
| BPF_IND
] = true,
742 [BPF_LD
| BPF_B
| BPF_IND
] = true,
743 [BPF_LD
| BPF_IMM
] = true,
744 [BPF_LD
| BPF_MEM
] = true,
745 [BPF_LDX
| BPF_W
| BPF_LEN
] = true,
746 [BPF_LDX
| BPF_B
| BPF_MSH
] = true,
747 [BPF_LDX
| BPF_IMM
] = true,
748 [BPF_LDX
| BPF_MEM
] = true,
749 /* Store instructions */
752 /* Misc instructions */
753 [BPF_MISC
| BPF_TAX
] = true,
754 [BPF_MISC
| BPF_TXA
] = true,
755 /* Return instructions */
756 [BPF_RET
| BPF_K
] = true,
757 [BPF_RET
| BPF_A
] = true,
758 /* Jump instructions */
759 [BPF_JMP
| BPF_JA
] = true,
760 [BPF_JMP
| BPF_JEQ
| BPF_K
] = true,
761 [BPF_JMP
| BPF_JEQ
| BPF_X
] = true,
762 [BPF_JMP
| BPF_JGE
| BPF_K
] = true,
763 [BPF_JMP
| BPF_JGE
| BPF_X
] = true,
764 [BPF_JMP
| BPF_JGT
| BPF_K
] = true,
765 [BPF_JMP
| BPF_JGT
| BPF_X
] = true,
766 [BPF_JMP
| BPF_JSET
| BPF_K
] = true,
767 [BPF_JMP
| BPF_JSET
| BPF_X
] = true,
770 if (code_to_probe
>= ARRAY_SIZE(codes
))
773 return codes
[code_to_probe
];
776 static bool bpf_check_basics_ok(const struct sock_filter
*filter
,
781 if (flen
== 0 || flen
> BPF_MAXINSNS
)
788 * bpf_check_classic - verify socket filter code
789 * @filter: filter to verify
790 * @flen: length of filter
792 * Check the user's filter code. If we let some ugly
793 * filter code slip through kaboom! The filter must contain
794 * no references or jumps that are out of range, no illegal
795 * instructions, and must end with a RET instruction.
797 * All jumps are forward as they are not signed.
799 * Returns 0 if the rule set is legal or -EINVAL if not.
801 static int bpf_check_classic(const struct sock_filter
*filter
,
807 /* Check the filter code now */
808 for (pc
= 0; pc
< flen
; pc
++) {
809 const struct sock_filter
*ftest
= &filter
[pc
];
811 /* May we actually operate on this code? */
812 if (!chk_code_allowed(ftest
->code
))
815 /* Some instructions need special checks */
816 switch (ftest
->code
) {
817 case BPF_ALU
| BPF_DIV
| BPF_K
:
818 case BPF_ALU
| BPF_MOD
| BPF_K
:
819 /* Check for division by zero */
823 case BPF_ALU
| BPF_LSH
| BPF_K
:
824 case BPF_ALU
| BPF_RSH
| BPF_K
:
828 case BPF_LD
| BPF_MEM
:
829 case BPF_LDX
| BPF_MEM
:
832 /* Check for invalid memory addresses */
833 if (ftest
->k
>= BPF_MEMWORDS
)
836 case BPF_JMP
| BPF_JA
:
837 /* Note, the large ftest->k might cause loops.
838 * Compare this with conditional jumps below,
839 * where offsets are limited. --ANK (981016)
841 if (ftest
->k
>= (unsigned int)(flen
- pc
- 1))
844 case BPF_JMP
| BPF_JEQ
| BPF_K
:
845 case BPF_JMP
| BPF_JEQ
| BPF_X
:
846 case BPF_JMP
| BPF_JGE
| BPF_K
:
847 case BPF_JMP
| BPF_JGE
| BPF_X
:
848 case BPF_JMP
| BPF_JGT
| BPF_K
:
849 case BPF_JMP
| BPF_JGT
| BPF_X
:
850 case BPF_JMP
| BPF_JSET
| BPF_K
:
851 case BPF_JMP
| BPF_JSET
| BPF_X
:
852 /* Both conditionals must be safe */
853 if (pc
+ ftest
->jt
+ 1 >= flen
||
854 pc
+ ftest
->jf
+ 1 >= flen
)
857 case BPF_LD
| BPF_W
| BPF_ABS
:
858 case BPF_LD
| BPF_H
| BPF_ABS
:
859 case BPF_LD
| BPF_B
| BPF_ABS
:
861 if (bpf_anc_helper(ftest
) & BPF_ANC
)
863 /* Ancillary operation unknown or unsupported */
864 if (anc_found
== false && ftest
->k
>= SKF_AD_OFF
)
869 /* Last instruction must be a RET code */
870 switch (filter
[flen
- 1].code
) {
871 case BPF_RET
| BPF_K
:
872 case BPF_RET
| BPF_A
:
873 return check_load_and_stores(filter
, flen
);
879 static int bpf_prog_store_orig_filter(struct bpf_prog
*fp
,
880 const struct sock_fprog
*fprog
)
882 unsigned int fsize
= bpf_classic_proglen(fprog
);
883 struct sock_fprog_kern
*fkprog
;
885 fp
->orig_prog
= kmalloc(sizeof(*fkprog
), GFP_KERNEL
);
889 fkprog
= fp
->orig_prog
;
890 fkprog
->len
= fprog
->len
;
892 fkprog
->filter
= kmemdup(fp
->insns
, fsize
,
893 GFP_KERNEL
| __GFP_NOWARN
);
894 if (!fkprog
->filter
) {
895 kfree(fp
->orig_prog
);
902 static void bpf_release_orig_filter(struct bpf_prog
*fp
)
904 struct sock_fprog_kern
*fprog
= fp
->orig_prog
;
907 kfree(fprog
->filter
);
912 static void __bpf_prog_release(struct bpf_prog
*prog
)
914 if (prog
->type
== BPF_PROG_TYPE_SOCKET_FILTER
) {
917 bpf_release_orig_filter(prog
);
922 static void __sk_filter_release(struct sk_filter
*fp
)
924 __bpf_prog_release(fp
->prog
);
929 * sk_filter_release_rcu - Release a socket filter by rcu_head
930 * @rcu: rcu_head that contains the sk_filter to free
932 static void sk_filter_release_rcu(struct rcu_head
*rcu
)
934 struct sk_filter
*fp
= container_of(rcu
, struct sk_filter
, rcu
);
936 __sk_filter_release(fp
);
940 * sk_filter_release - release a socket filter
941 * @fp: filter to remove
943 * Remove a filter from a socket and release its resources.
945 static void sk_filter_release(struct sk_filter
*fp
)
947 if (refcount_dec_and_test(&fp
->refcnt
))
948 call_rcu(&fp
->rcu
, sk_filter_release_rcu
);
951 void sk_filter_uncharge(struct sock
*sk
, struct sk_filter
*fp
)
953 u32 filter_size
= bpf_prog_size(fp
->prog
->len
);
955 atomic_sub(filter_size
, &sk
->sk_omem_alloc
);
956 sk_filter_release(fp
);
959 /* try to charge the socket memory if there is space available
960 * return true on success
962 static bool __sk_filter_charge(struct sock
*sk
, struct sk_filter
*fp
)
964 u32 filter_size
= bpf_prog_size(fp
->prog
->len
);
966 /* same check as in sock_kmalloc() */
967 if (filter_size
<= sysctl_optmem_max
&&
968 atomic_read(&sk
->sk_omem_alloc
) + filter_size
< sysctl_optmem_max
) {
969 atomic_add(filter_size
, &sk
->sk_omem_alloc
);
975 bool sk_filter_charge(struct sock
*sk
, struct sk_filter
*fp
)
977 bool ret
= __sk_filter_charge(sk
, fp
);
979 refcount_inc(&fp
->refcnt
);
983 static struct bpf_prog
*bpf_migrate_filter(struct bpf_prog
*fp
)
985 struct sock_filter
*old_prog
;
986 struct bpf_prog
*old_fp
;
987 int err
, new_len
, old_len
= fp
->len
;
989 /* We are free to overwrite insns et al right here as it
990 * won't be used at this point in time anymore internally
991 * after the migration to the internal BPF instruction
994 BUILD_BUG_ON(sizeof(struct sock_filter
) !=
995 sizeof(struct bpf_insn
));
997 /* Conversion cannot happen on overlapping memory areas,
998 * so we need to keep the user BPF around until the 2nd
999 * pass. At this time, the user BPF is stored in fp->insns.
1001 old_prog
= kmemdup(fp
->insns
, old_len
* sizeof(struct sock_filter
),
1002 GFP_KERNEL
| __GFP_NOWARN
);
1008 /* 1st pass: calculate the new program length. */
1009 err
= bpf_convert_filter(old_prog
, old_len
, NULL
, &new_len
);
1013 /* Expand fp for appending the new filter representation. */
1015 fp
= bpf_prog_realloc(old_fp
, bpf_prog_size(new_len
), 0);
1017 /* The old_fp is still around in case we couldn't
1018 * allocate new memory, so uncharge on that one.
1027 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1028 err
= bpf_convert_filter(old_prog
, old_len
, fp
, &new_len
);
1030 /* 2nd bpf_convert_filter() can fail only if it fails
1031 * to allocate memory, remapping must succeed. Note,
1032 * that at this time old_fp has already been released
1037 /* We are guaranteed to never error here with cBPF to eBPF
1038 * transitions, since there's no issue with type compatibility
1039 * checks on program arrays.
1041 fp
= bpf_prog_select_runtime(fp
, &err
);
1049 __bpf_prog_release(fp
);
1050 return ERR_PTR(err
);
1053 static struct bpf_prog
*bpf_prepare_filter(struct bpf_prog
*fp
,
1054 bpf_aux_classic_check_t trans
)
1058 fp
->bpf_func
= NULL
;
1061 err
= bpf_check_classic(fp
->insns
, fp
->len
);
1063 __bpf_prog_release(fp
);
1064 return ERR_PTR(err
);
1067 /* There might be additional checks and transformations
1068 * needed on classic filters, f.e. in case of seccomp.
1071 err
= trans(fp
->insns
, fp
->len
);
1073 __bpf_prog_release(fp
);
1074 return ERR_PTR(err
);
1078 /* Probe if we can JIT compile the filter and if so, do
1079 * the compilation of the filter.
1081 bpf_jit_compile(fp
);
1083 /* JIT compiler couldn't process this filter, so do the
1084 * internal BPF translation for the optimized interpreter.
1087 fp
= bpf_migrate_filter(fp
);
1093 * bpf_prog_create - create an unattached filter
1094 * @pfp: the unattached filter that is created
1095 * @fprog: the filter program
1097 * Create a filter independent of any socket. We first run some
1098 * sanity checks on it to make sure it does not explode on us later.
1099 * If an error occurs or there is insufficient memory for the filter
1100 * a negative errno code is returned. On success the return is zero.
1102 int bpf_prog_create(struct bpf_prog
**pfp
, struct sock_fprog_kern
*fprog
)
1104 unsigned int fsize
= bpf_classic_proglen(fprog
);
1105 struct bpf_prog
*fp
;
1107 /* Make sure new filter is there and in the right amounts. */
1108 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1111 fp
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1115 memcpy(fp
->insns
, fprog
->filter
, fsize
);
1117 fp
->len
= fprog
->len
;
1118 /* Since unattached filters are not copied back to user
1119 * space through sk_get_filter(), we do not need to hold
1120 * a copy here, and can spare us the work.
1122 fp
->orig_prog
= NULL
;
1124 /* bpf_prepare_filter() already takes care of freeing
1125 * memory in case something goes wrong.
1127 fp
= bpf_prepare_filter(fp
, NULL
);
1134 EXPORT_SYMBOL_GPL(bpf_prog_create
);
1137 * bpf_prog_create_from_user - create an unattached filter from user buffer
1138 * @pfp: the unattached filter that is created
1139 * @fprog: the filter program
1140 * @trans: post-classic verifier transformation handler
1141 * @save_orig: save classic BPF program
1143 * This function effectively does the same as bpf_prog_create(), only
1144 * that it builds up its insns buffer from user space provided buffer.
1145 * It also allows for passing a bpf_aux_classic_check_t handler.
1147 int bpf_prog_create_from_user(struct bpf_prog
**pfp
, struct sock_fprog
*fprog
,
1148 bpf_aux_classic_check_t trans
, bool save_orig
)
1150 unsigned int fsize
= bpf_classic_proglen(fprog
);
1151 struct bpf_prog
*fp
;
1154 /* Make sure new filter is there and in the right amounts. */
1155 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1158 fp
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1162 if (copy_from_user(fp
->insns
, fprog
->filter
, fsize
)) {
1163 __bpf_prog_free(fp
);
1167 fp
->len
= fprog
->len
;
1168 fp
->orig_prog
= NULL
;
1171 err
= bpf_prog_store_orig_filter(fp
, fprog
);
1173 __bpf_prog_free(fp
);
1178 /* bpf_prepare_filter() already takes care of freeing
1179 * memory in case something goes wrong.
1181 fp
= bpf_prepare_filter(fp
, trans
);
1188 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user
);
1190 void bpf_prog_destroy(struct bpf_prog
*fp
)
1192 __bpf_prog_release(fp
);
1194 EXPORT_SYMBOL_GPL(bpf_prog_destroy
);
1196 static int __sk_attach_prog(struct bpf_prog
*prog
, struct sock
*sk
)
1198 struct sk_filter
*fp
, *old_fp
;
1200 fp
= kmalloc(sizeof(*fp
), GFP_KERNEL
);
1206 if (!__sk_filter_charge(sk
, fp
)) {
1210 refcount_set(&fp
->refcnt
, 1);
1212 old_fp
= rcu_dereference_protected(sk
->sk_filter
,
1213 lockdep_sock_is_held(sk
));
1214 rcu_assign_pointer(sk
->sk_filter
, fp
);
1217 sk_filter_uncharge(sk
, old_fp
);
1222 static int __reuseport_attach_prog(struct bpf_prog
*prog
, struct sock
*sk
)
1224 struct bpf_prog
*old_prog
;
1227 if (bpf_prog_size(prog
->len
) > sysctl_optmem_max
)
1230 if (sk_unhashed(sk
) && sk
->sk_reuseport
) {
1231 err
= reuseport_alloc(sk
);
1234 } else if (!rcu_access_pointer(sk
->sk_reuseport_cb
)) {
1235 /* The socket wasn't bound with SO_REUSEPORT */
1239 old_prog
= reuseport_attach_prog(sk
, prog
);
1241 bpf_prog_destroy(old_prog
);
1247 struct bpf_prog
*__get_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1249 unsigned int fsize
= bpf_classic_proglen(fprog
);
1250 struct bpf_prog
*prog
;
1253 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
1254 return ERR_PTR(-EPERM
);
1256 /* Make sure new filter is there and in the right amounts. */
1257 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1258 return ERR_PTR(-EINVAL
);
1260 prog
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1262 return ERR_PTR(-ENOMEM
);
1264 if (copy_from_user(prog
->insns
, fprog
->filter
, fsize
)) {
1265 __bpf_prog_free(prog
);
1266 return ERR_PTR(-EFAULT
);
1269 prog
->len
= fprog
->len
;
1271 err
= bpf_prog_store_orig_filter(prog
, fprog
);
1273 __bpf_prog_free(prog
);
1274 return ERR_PTR(-ENOMEM
);
1277 /* bpf_prepare_filter() already takes care of freeing
1278 * memory in case something goes wrong.
1280 return bpf_prepare_filter(prog
, NULL
);
1284 * sk_attach_filter - attach a socket filter
1285 * @fprog: the filter program
1286 * @sk: the socket to use
1288 * Attach the user's filter code. We first run some sanity checks on
1289 * it to make sure it does not explode on us later. If an error
1290 * occurs or there is insufficient memory for the filter a negative
1291 * errno code is returned. On success the return is zero.
1293 int sk_attach_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1295 struct bpf_prog
*prog
= __get_filter(fprog
, sk
);
1299 return PTR_ERR(prog
);
1301 err
= __sk_attach_prog(prog
, sk
);
1303 __bpf_prog_release(prog
);
1309 EXPORT_SYMBOL_GPL(sk_attach_filter
);
1311 int sk_reuseport_attach_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1313 struct bpf_prog
*prog
= __get_filter(fprog
, sk
);
1317 return PTR_ERR(prog
);
1319 err
= __reuseport_attach_prog(prog
, sk
);
1321 __bpf_prog_release(prog
);
1328 static struct bpf_prog
*__get_bpf(u32 ufd
, struct sock
*sk
)
1330 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
1331 return ERR_PTR(-EPERM
);
1333 return bpf_prog_get_type(ufd
, BPF_PROG_TYPE_SOCKET_FILTER
);
1336 int sk_attach_bpf(u32 ufd
, struct sock
*sk
)
1338 struct bpf_prog
*prog
= __get_bpf(ufd
, sk
);
1342 return PTR_ERR(prog
);
1344 err
= __sk_attach_prog(prog
, sk
);
1353 int sk_reuseport_attach_bpf(u32 ufd
, struct sock
*sk
)
1355 struct bpf_prog
*prog
= __get_bpf(ufd
, sk
);
1359 return PTR_ERR(prog
);
1361 err
= __reuseport_attach_prog(prog
, sk
);
1370 struct bpf_scratchpad
{
1372 __be32 diff
[MAX_BPF_STACK
/ sizeof(__be32
)];
1373 u8 buff
[MAX_BPF_STACK
];
1377 static DEFINE_PER_CPU(struct bpf_scratchpad
, bpf_sp
);
1379 static inline int __bpf_try_make_writable(struct sk_buff
*skb
,
1380 unsigned int write_len
)
1382 return skb_ensure_writable(skb
, write_len
);
1385 static inline int bpf_try_make_writable(struct sk_buff
*skb
,
1386 unsigned int write_len
)
1388 int err
= __bpf_try_make_writable(skb
, write_len
);
1390 bpf_compute_data_end(skb
);
1394 static int bpf_try_make_head_writable(struct sk_buff
*skb
)
1396 return bpf_try_make_writable(skb
, skb_headlen(skb
));
1399 static inline void bpf_push_mac_rcsum(struct sk_buff
*skb
)
1401 if (skb_at_tc_ingress(skb
))
1402 skb_postpush_rcsum(skb
, skb_mac_header(skb
), skb
->mac_len
);
1405 static inline void bpf_pull_mac_rcsum(struct sk_buff
*skb
)
1407 if (skb_at_tc_ingress(skb
))
1408 skb_postpull_rcsum(skb
, skb_mac_header(skb
), skb
->mac_len
);
1411 BPF_CALL_5(bpf_skb_store_bytes
, struct sk_buff
*, skb
, u32
, offset
,
1412 const void *, from
, u32
, len
, u64
, flags
)
1416 if (unlikely(flags
& ~(BPF_F_RECOMPUTE_CSUM
| BPF_F_INVALIDATE_HASH
)))
1418 if (unlikely(offset
> 0xffff))
1420 if (unlikely(bpf_try_make_writable(skb
, offset
+ len
)))
1423 ptr
= skb
->data
+ offset
;
1424 if (flags
& BPF_F_RECOMPUTE_CSUM
)
1425 __skb_postpull_rcsum(skb
, ptr
, len
, offset
);
1427 memcpy(ptr
, from
, len
);
1429 if (flags
& BPF_F_RECOMPUTE_CSUM
)
1430 __skb_postpush_rcsum(skb
, ptr
, len
, offset
);
1431 if (flags
& BPF_F_INVALIDATE_HASH
)
1432 skb_clear_hash(skb
);
1437 static const struct bpf_func_proto bpf_skb_store_bytes_proto
= {
1438 .func
= bpf_skb_store_bytes
,
1440 .ret_type
= RET_INTEGER
,
1441 .arg1_type
= ARG_PTR_TO_CTX
,
1442 .arg2_type
= ARG_ANYTHING
,
1443 .arg3_type
= ARG_PTR_TO_MEM
,
1444 .arg4_type
= ARG_CONST_SIZE
,
1445 .arg5_type
= ARG_ANYTHING
,
1448 BPF_CALL_4(bpf_skb_load_bytes
, const struct sk_buff
*, skb
, u32
, offset
,
1449 void *, to
, u32
, len
)
1453 if (unlikely(offset
> 0xffff))
1456 ptr
= skb_header_pointer(skb
, offset
, len
, to
);
1460 memcpy(to
, ptr
, len
);
1468 static const struct bpf_func_proto bpf_skb_load_bytes_proto
= {
1469 .func
= bpf_skb_load_bytes
,
1471 .ret_type
= RET_INTEGER
,
1472 .arg1_type
= ARG_PTR_TO_CTX
,
1473 .arg2_type
= ARG_ANYTHING
,
1474 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
1475 .arg4_type
= ARG_CONST_SIZE
,
1478 BPF_CALL_2(bpf_skb_pull_data
, struct sk_buff
*, skb
, u32
, len
)
1480 /* Idea is the following: should the needed direct read/write
1481 * test fail during runtime, we can pull in more data and redo
1482 * again, since implicitly, we invalidate previous checks here.
1484 * Or, since we know how much we need to make read/writeable,
1485 * this can be done once at the program beginning for direct
1486 * access case. By this we overcome limitations of only current
1487 * headroom being accessible.
1489 return bpf_try_make_writable(skb
, len
? : skb_headlen(skb
));
1492 static const struct bpf_func_proto bpf_skb_pull_data_proto
= {
1493 .func
= bpf_skb_pull_data
,
1495 .ret_type
= RET_INTEGER
,
1496 .arg1_type
= ARG_PTR_TO_CTX
,
1497 .arg2_type
= ARG_ANYTHING
,
1500 BPF_CALL_5(bpf_l3_csum_replace
, struct sk_buff
*, skb
, u32
, offset
,
1501 u64
, from
, u64
, to
, u64
, flags
)
1505 if (unlikely(flags
& ~(BPF_F_HDR_FIELD_MASK
)))
1507 if (unlikely(offset
> 0xffff || offset
& 1))
1509 if (unlikely(bpf_try_make_writable(skb
, offset
+ sizeof(*ptr
))))
1512 ptr
= (__sum16
*)(skb
->data
+ offset
);
1513 switch (flags
& BPF_F_HDR_FIELD_MASK
) {
1515 if (unlikely(from
!= 0))
1518 csum_replace_by_diff(ptr
, to
);
1521 csum_replace2(ptr
, from
, to
);
1524 csum_replace4(ptr
, from
, to
);
1533 static const struct bpf_func_proto bpf_l3_csum_replace_proto
= {
1534 .func
= bpf_l3_csum_replace
,
1536 .ret_type
= RET_INTEGER
,
1537 .arg1_type
= ARG_PTR_TO_CTX
,
1538 .arg2_type
= ARG_ANYTHING
,
1539 .arg3_type
= ARG_ANYTHING
,
1540 .arg4_type
= ARG_ANYTHING
,
1541 .arg5_type
= ARG_ANYTHING
,
1544 BPF_CALL_5(bpf_l4_csum_replace
, struct sk_buff
*, skb
, u32
, offset
,
1545 u64
, from
, u64
, to
, u64
, flags
)
1547 bool is_pseudo
= flags
& BPF_F_PSEUDO_HDR
;
1548 bool is_mmzero
= flags
& BPF_F_MARK_MANGLED_0
;
1549 bool do_mforce
= flags
& BPF_F_MARK_ENFORCE
;
1552 if (unlikely(flags
& ~(BPF_F_MARK_MANGLED_0
| BPF_F_MARK_ENFORCE
|
1553 BPF_F_PSEUDO_HDR
| BPF_F_HDR_FIELD_MASK
)))
1555 if (unlikely(offset
> 0xffff || offset
& 1))
1557 if (unlikely(bpf_try_make_writable(skb
, offset
+ sizeof(*ptr
))))
1560 ptr
= (__sum16
*)(skb
->data
+ offset
);
1561 if (is_mmzero
&& !do_mforce
&& !*ptr
)
1564 switch (flags
& BPF_F_HDR_FIELD_MASK
) {
1566 if (unlikely(from
!= 0))
1569 inet_proto_csum_replace_by_diff(ptr
, skb
, to
, is_pseudo
);
1572 inet_proto_csum_replace2(ptr
, skb
, from
, to
, is_pseudo
);
1575 inet_proto_csum_replace4(ptr
, skb
, from
, to
, is_pseudo
);
1581 if (is_mmzero
&& !*ptr
)
1582 *ptr
= CSUM_MANGLED_0
;
1586 static const struct bpf_func_proto bpf_l4_csum_replace_proto
= {
1587 .func
= bpf_l4_csum_replace
,
1589 .ret_type
= RET_INTEGER
,
1590 .arg1_type
= ARG_PTR_TO_CTX
,
1591 .arg2_type
= ARG_ANYTHING
,
1592 .arg3_type
= ARG_ANYTHING
,
1593 .arg4_type
= ARG_ANYTHING
,
1594 .arg5_type
= ARG_ANYTHING
,
1597 BPF_CALL_5(bpf_csum_diff
, __be32
*, from
, u32
, from_size
,
1598 __be32
*, to
, u32
, to_size
, __wsum
, seed
)
1600 struct bpf_scratchpad
*sp
= this_cpu_ptr(&bpf_sp
);
1601 u32 diff_size
= from_size
+ to_size
;
1604 /* This is quite flexible, some examples:
1606 * from_size == 0, to_size > 0, seed := csum --> pushing data
1607 * from_size > 0, to_size == 0, seed := csum --> pulling data
1608 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1610 * Even for diffing, from_size and to_size don't need to be equal.
1612 if (unlikely(((from_size
| to_size
) & (sizeof(__be32
) - 1)) ||
1613 diff_size
> sizeof(sp
->diff
)))
1616 for (i
= 0; i
< from_size
/ sizeof(__be32
); i
++, j
++)
1617 sp
->diff
[j
] = ~from
[i
];
1618 for (i
= 0; i
< to_size
/ sizeof(__be32
); i
++, j
++)
1619 sp
->diff
[j
] = to
[i
];
1621 return csum_partial(sp
->diff
, diff_size
, seed
);
1624 static const struct bpf_func_proto bpf_csum_diff_proto
= {
1625 .func
= bpf_csum_diff
,
1628 .ret_type
= RET_INTEGER
,
1629 .arg1_type
= ARG_PTR_TO_MEM
,
1630 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
1631 .arg3_type
= ARG_PTR_TO_MEM
,
1632 .arg4_type
= ARG_CONST_SIZE_OR_ZERO
,
1633 .arg5_type
= ARG_ANYTHING
,
1636 BPF_CALL_2(bpf_csum_update
, struct sk_buff
*, skb
, __wsum
, csum
)
1638 /* The interface is to be used in combination with bpf_csum_diff()
1639 * for direct packet writes. csum rotation for alignment as well
1640 * as emulating csum_sub() can be done from the eBPF program.
1642 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
1643 return (skb
->csum
= csum_add(skb
->csum
, csum
));
1648 static const struct bpf_func_proto bpf_csum_update_proto
= {
1649 .func
= bpf_csum_update
,
1651 .ret_type
= RET_INTEGER
,
1652 .arg1_type
= ARG_PTR_TO_CTX
,
1653 .arg2_type
= ARG_ANYTHING
,
1656 static inline int __bpf_rx_skb(struct net_device
*dev
, struct sk_buff
*skb
)
1658 return dev_forward_skb(dev
, skb
);
1661 static inline int __bpf_rx_skb_no_mac(struct net_device
*dev
,
1662 struct sk_buff
*skb
)
1664 int ret
= ____dev_forward_skb(dev
, skb
);
1668 ret
= netif_rx(skb
);
1674 static inline int __bpf_tx_skb(struct net_device
*dev
, struct sk_buff
*skb
)
1678 if (unlikely(__this_cpu_read(xmit_recursion
) > XMIT_RECURSION_LIMIT
)) {
1679 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
1686 __this_cpu_inc(xmit_recursion
);
1687 ret
= dev_queue_xmit(skb
);
1688 __this_cpu_dec(xmit_recursion
);
1693 static int __bpf_redirect_no_mac(struct sk_buff
*skb
, struct net_device
*dev
,
1696 /* skb->mac_len is not set on normal egress */
1697 unsigned int mlen
= skb
->network_header
- skb
->mac_header
;
1699 __skb_pull(skb
, mlen
);
1701 /* At ingress, the mac header has already been pulled once.
1702 * At egress, skb_pospull_rcsum has to be done in case that
1703 * the skb is originated from ingress (i.e. a forwarded skb)
1704 * to ensure that rcsum starts at net header.
1706 if (!skb_at_tc_ingress(skb
))
1707 skb_postpull_rcsum(skb
, skb_mac_header(skb
), mlen
);
1708 skb_pop_mac_header(skb
);
1709 skb_reset_mac_len(skb
);
1710 return flags
& BPF_F_INGRESS
?
1711 __bpf_rx_skb_no_mac(dev
, skb
) : __bpf_tx_skb(dev
, skb
);
1714 static int __bpf_redirect_common(struct sk_buff
*skb
, struct net_device
*dev
,
1717 /* Verify that a link layer header is carried */
1718 if (unlikely(skb
->mac_header
>= skb
->network_header
)) {
1723 bpf_push_mac_rcsum(skb
);
1724 return flags
& BPF_F_INGRESS
?
1725 __bpf_rx_skb(dev
, skb
) : __bpf_tx_skb(dev
, skb
);
1728 static int __bpf_redirect(struct sk_buff
*skb
, struct net_device
*dev
,
1731 if (dev_is_mac_header_xmit(dev
))
1732 return __bpf_redirect_common(skb
, dev
, flags
);
1734 return __bpf_redirect_no_mac(skb
, dev
, flags
);
1737 BPF_CALL_3(bpf_clone_redirect
, struct sk_buff
*, skb
, u32
, ifindex
, u64
, flags
)
1739 struct net_device
*dev
;
1740 struct sk_buff
*clone
;
1743 if (unlikely(flags
& ~(BPF_F_INGRESS
)))
1746 dev
= dev_get_by_index_rcu(dev_net(skb
->dev
), ifindex
);
1750 clone
= skb_clone(skb
, GFP_ATOMIC
);
1751 if (unlikely(!clone
))
1754 /* For direct write, we need to keep the invariant that the skbs
1755 * we're dealing with need to be uncloned. Should uncloning fail
1756 * here, we need to free the just generated clone to unclone once
1759 ret
= bpf_try_make_head_writable(skb
);
1760 if (unlikely(ret
)) {
1765 return __bpf_redirect(clone
, dev
, flags
);
1768 static const struct bpf_func_proto bpf_clone_redirect_proto
= {
1769 .func
= bpf_clone_redirect
,
1771 .ret_type
= RET_INTEGER
,
1772 .arg1_type
= ARG_PTR_TO_CTX
,
1773 .arg2_type
= ARG_ANYTHING
,
1774 .arg3_type
= ARG_ANYTHING
,
1777 struct redirect_info
{
1782 static DEFINE_PER_CPU(struct redirect_info
, redirect_info
);
1784 BPF_CALL_2(bpf_redirect
, u32
, ifindex
, u64
, flags
)
1786 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
1788 if (unlikely(flags
& ~(BPF_F_INGRESS
)))
1791 ri
->ifindex
= ifindex
;
1794 return TC_ACT_REDIRECT
;
1797 int skb_do_redirect(struct sk_buff
*skb
)
1799 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
1800 struct net_device
*dev
;
1802 dev
= dev_get_by_index_rcu(dev_net(skb
->dev
), ri
->ifindex
);
1804 if (unlikely(!dev
)) {
1809 return __bpf_redirect(skb
, dev
, ri
->flags
);
1812 static const struct bpf_func_proto bpf_redirect_proto
= {
1813 .func
= bpf_redirect
,
1815 .ret_type
= RET_INTEGER
,
1816 .arg1_type
= ARG_ANYTHING
,
1817 .arg2_type
= ARG_ANYTHING
,
1820 BPF_CALL_1(bpf_get_cgroup_classid
, const struct sk_buff
*, skb
)
1822 return task_get_classid(skb
);
1825 static const struct bpf_func_proto bpf_get_cgroup_classid_proto
= {
1826 .func
= bpf_get_cgroup_classid
,
1828 .ret_type
= RET_INTEGER
,
1829 .arg1_type
= ARG_PTR_TO_CTX
,
1832 BPF_CALL_1(bpf_get_route_realm
, const struct sk_buff
*, skb
)
1834 return dst_tclassid(skb
);
1837 static const struct bpf_func_proto bpf_get_route_realm_proto
= {
1838 .func
= bpf_get_route_realm
,
1840 .ret_type
= RET_INTEGER
,
1841 .arg1_type
= ARG_PTR_TO_CTX
,
1844 BPF_CALL_1(bpf_get_hash_recalc
, struct sk_buff
*, skb
)
1846 /* If skb_clear_hash() was called due to mangling, we can
1847 * trigger SW recalculation here. Later access to hash
1848 * can then use the inline skb->hash via context directly
1849 * instead of calling this helper again.
1851 return skb_get_hash(skb
);
1854 static const struct bpf_func_proto bpf_get_hash_recalc_proto
= {
1855 .func
= bpf_get_hash_recalc
,
1857 .ret_type
= RET_INTEGER
,
1858 .arg1_type
= ARG_PTR_TO_CTX
,
1861 BPF_CALL_1(bpf_set_hash_invalid
, struct sk_buff
*, skb
)
1863 /* After all direct packet write, this can be used once for
1864 * triggering a lazy recalc on next skb_get_hash() invocation.
1866 skb_clear_hash(skb
);
1870 static const struct bpf_func_proto bpf_set_hash_invalid_proto
= {
1871 .func
= bpf_set_hash_invalid
,
1873 .ret_type
= RET_INTEGER
,
1874 .arg1_type
= ARG_PTR_TO_CTX
,
1877 BPF_CALL_2(bpf_set_hash
, struct sk_buff
*, skb
, u32
, hash
)
1879 /* Set user specified hash as L4(+), so that it gets returned
1880 * on skb_get_hash() call unless BPF prog later on triggers a
1883 __skb_set_sw_hash(skb
, hash
, true);
1887 static const struct bpf_func_proto bpf_set_hash_proto
= {
1888 .func
= bpf_set_hash
,
1890 .ret_type
= RET_INTEGER
,
1891 .arg1_type
= ARG_PTR_TO_CTX
,
1892 .arg2_type
= ARG_ANYTHING
,
1895 BPF_CALL_3(bpf_skb_vlan_push
, struct sk_buff
*, skb
, __be16
, vlan_proto
,
1900 if (unlikely(vlan_proto
!= htons(ETH_P_8021Q
) &&
1901 vlan_proto
!= htons(ETH_P_8021AD
)))
1902 vlan_proto
= htons(ETH_P_8021Q
);
1904 bpf_push_mac_rcsum(skb
);
1905 ret
= skb_vlan_push(skb
, vlan_proto
, vlan_tci
);
1906 bpf_pull_mac_rcsum(skb
);
1908 bpf_compute_data_end(skb
);
1912 const struct bpf_func_proto bpf_skb_vlan_push_proto
= {
1913 .func
= bpf_skb_vlan_push
,
1915 .ret_type
= RET_INTEGER
,
1916 .arg1_type
= ARG_PTR_TO_CTX
,
1917 .arg2_type
= ARG_ANYTHING
,
1918 .arg3_type
= ARG_ANYTHING
,
1920 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto
);
1922 BPF_CALL_1(bpf_skb_vlan_pop
, struct sk_buff
*, skb
)
1926 bpf_push_mac_rcsum(skb
);
1927 ret
= skb_vlan_pop(skb
);
1928 bpf_pull_mac_rcsum(skb
);
1930 bpf_compute_data_end(skb
);
1934 const struct bpf_func_proto bpf_skb_vlan_pop_proto
= {
1935 .func
= bpf_skb_vlan_pop
,
1937 .ret_type
= RET_INTEGER
,
1938 .arg1_type
= ARG_PTR_TO_CTX
,
1940 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto
);
1942 static int bpf_skb_generic_push(struct sk_buff
*skb
, u32 off
, u32 len
)
1944 /* Caller already did skb_cow() with len as headroom,
1945 * so no need to do it here.
1948 memmove(skb
->data
, skb
->data
+ len
, off
);
1949 memset(skb
->data
+ off
, 0, len
);
1951 /* No skb_postpush_rcsum(skb, skb->data + off, len)
1952 * needed here as it does not change the skb->csum
1953 * result for checksum complete when summing over
1959 static int bpf_skb_generic_pop(struct sk_buff
*skb
, u32 off
, u32 len
)
1961 /* skb_ensure_writable() is not needed here, as we're
1962 * already working on an uncloned skb.
1964 if (unlikely(!pskb_may_pull(skb
, off
+ len
)))
1967 skb_postpull_rcsum(skb
, skb
->data
+ off
, len
);
1968 memmove(skb
->data
+ len
, skb
->data
, off
);
1969 __skb_pull(skb
, len
);
1974 static int bpf_skb_net_hdr_push(struct sk_buff
*skb
, u32 off
, u32 len
)
1976 bool trans_same
= skb
->transport_header
== skb
->network_header
;
1979 /* There's no need for __skb_push()/__skb_pull() pair to
1980 * get to the start of the mac header as we're guaranteed
1981 * to always start from here under eBPF.
1983 ret
= bpf_skb_generic_push(skb
, off
, len
);
1985 skb
->mac_header
-= len
;
1986 skb
->network_header
-= len
;
1988 skb
->transport_header
= skb
->network_header
;
1994 static int bpf_skb_net_hdr_pop(struct sk_buff
*skb
, u32 off
, u32 len
)
1996 bool trans_same
= skb
->transport_header
== skb
->network_header
;
1999 /* Same here, __skb_push()/__skb_pull() pair not needed. */
2000 ret
= bpf_skb_generic_pop(skb
, off
, len
);
2002 skb
->mac_header
+= len
;
2003 skb
->network_header
+= len
;
2005 skb
->transport_header
= skb
->network_header
;
2011 static int bpf_skb_proto_4_to_6(struct sk_buff
*skb
)
2013 const u32 len_diff
= sizeof(struct ipv6hdr
) - sizeof(struct iphdr
);
2014 u32 off
= skb
->network_header
- skb
->mac_header
;
2017 ret
= skb_cow(skb
, len_diff
);
2018 if (unlikely(ret
< 0))
2021 ret
= bpf_skb_net_hdr_push(skb
, off
, len_diff
);
2022 if (unlikely(ret
< 0))
2025 if (skb_is_gso(skb
)) {
2026 /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
2027 * be changed into SKB_GSO_TCPV6.
2029 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
) {
2030 skb_shinfo(skb
)->gso_type
&= ~SKB_GSO_TCPV4
;
2031 skb_shinfo(skb
)->gso_type
|= SKB_GSO_TCPV6
;
2034 /* Due to IPv6 header, MSS needs to be downgraded. */
2035 skb_shinfo(skb
)->gso_size
-= len_diff
;
2036 /* Header must be checked, and gso_segs recomputed. */
2037 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2038 skb_shinfo(skb
)->gso_segs
= 0;
2041 skb
->protocol
= htons(ETH_P_IPV6
);
2042 skb_clear_hash(skb
);
2047 static int bpf_skb_proto_6_to_4(struct sk_buff
*skb
)
2049 const u32 len_diff
= sizeof(struct ipv6hdr
) - sizeof(struct iphdr
);
2050 u32 off
= skb
->network_header
- skb
->mac_header
;
2053 ret
= skb_unclone(skb
, GFP_ATOMIC
);
2054 if (unlikely(ret
< 0))
2057 ret
= bpf_skb_net_hdr_pop(skb
, off
, len_diff
);
2058 if (unlikely(ret
< 0))
2061 if (skb_is_gso(skb
)) {
2062 /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
2063 * be changed into SKB_GSO_TCPV4.
2065 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
) {
2066 skb_shinfo(skb
)->gso_type
&= ~SKB_GSO_TCPV6
;
2067 skb_shinfo(skb
)->gso_type
|= SKB_GSO_TCPV4
;
2070 /* Due to IPv4 header, MSS can be upgraded. */
2071 skb_shinfo(skb
)->gso_size
+= len_diff
;
2072 /* Header must be checked, and gso_segs recomputed. */
2073 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2074 skb_shinfo(skb
)->gso_segs
= 0;
2077 skb
->protocol
= htons(ETH_P_IP
);
2078 skb_clear_hash(skb
);
2083 static int bpf_skb_proto_xlat(struct sk_buff
*skb
, __be16 to_proto
)
2085 __be16 from_proto
= skb
->protocol
;
2087 if (from_proto
== htons(ETH_P_IP
) &&
2088 to_proto
== htons(ETH_P_IPV6
))
2089 return bpf_skb_proto_4_to_6(skb
);
2091 if (from_proto
== htons(ETH_P_IPV6
) &&
2092 to_proto
== htons(ETH_P_IP
))
2093 return bpf_skb_proto_6_to_4(skb
);
2098 BPF_CALL_3(bpf_skb_change_proto
, struct sk_buff
*, skb
, __be16
, proto
,
2103 if (unlikely(flags
))
2106 /* General idea is that this helper does the basic groundwork
2107 * needed for changing the protocol, and eBPF program fills the
2108 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
2109 * and other helpers, rather than passing a raw buffer here.
2111 * The rationale is to keep this minimal and without a need to
2112 * deal with raw packet data. F.e. even if we would pass buffers
2113 * here, the program still needs to call the bpf_lX_csum_replace()
2114 * helpers anyway. Plus, this way we keep also separation of
2115 * concerns, since f.e. bpf_skb_store_bytes() should only take
2118 * Currently, additional options and extension header space are
2119 * not supported, but flags register is reserved so we can adapt
2120 * that. For offloads, we mark packet as dodgy, so that headers
2121 * need to be verified first.
2123 ret
= bpf_skb_proto_xlat(skb
, proto
);
2124 bpf_compute_data_end(skb
);
2128 static const struct bpf_func_proto bpf_skb_change_proto_proto
= {
2129 .func
= bpf_skb_change_proto
,
2131 .ret_type
= RET_INTEGER
,
2132 .arg1_type
= ARG_PTR_TO_CTX
,
2133 .arg2_type
= ARG_ANYTHING
,
2134 .arg3_type
= ARG_ANYTHING
,
2137 BPF_CALL_2(bpf_skb_change_type
, struct sk_buff
*, skb
, u32
, pkt_type
)
2139 /* We only allow a restricted subset to be changed for now. */
2140 if (unlikely(!skb_pkt_type_ok(skb
->pkt_type
) ||
2141 !skb_pkt_type_ok(pkt_type
)))
2144 skb
->pkt_type
= pkt_type
;
2148 static const struct bpf_func_proto bpf_skb_change_type_proto
= {
2149 .func
= bpf_skb_change_type
,
2151 .ret_type
= RET_INTEGER
,
2152 .arg1_type
= ARG_PTR_TO_CTX
,
2153 .arg2_type
= ARG_ANYTHING
,
2156 static u32
__bpf_skb_min_len(const struct sk_buff
*skb
)
2158 u32 min_len
= skb_network_offset(skb
);
2160 if (skb_transport_header_was_set(skb
))
2161 min_len
= skb_transport_offset(skb
);
2162 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
2163 min_len
= skb_checksum_start_offset(skb
) +
2164 skb
->csum_offset
+ sizeof(__sum16
);
2168 static u32
__bpf_skb_max_len(const struct sk_buff
*skb
)
2170 return skb
->dev
->mtu
+ skb
->dev
->hard_header_len
;
2173 static int bpf_skb_grow_rcsum(struct sk_buff
*skb
, unsigned int new_len
)
2175 unsigned int old_len
= skb
->len
;
2178 ret
= __skb_grow_rcsum(skb
, new_len
);
2180 memset(skb
->data
+ old_len
, 0, new_len
- old_len
);
2184 static int bpf_skb_trim_rcsum(struct sk_buff
*skb
, unsigned int new_len
)
2186 return __skb_trim_rcsum(skb
, new_len
);
2189 BPF_CALL_3(bpf_skb_change_tail
, struct sk_buff
*, skb
, u32
, new_len
,
2192 u32 max_len
= __bpf_skb_max_len(skb
);
2193 u32 min_len
= __bpf_skb_min_len(skb
);
2196 if (unlikely(flags
|| new_len
> max_len
|| new_len
< min_len
))
2198 if (skb
->encapsulation
)
2201 /* The basic idea of this helper is that it's performing the
2202 * needed work to either grow or trim an skb, and eBPF program
2203 * rewrites the rest via helpers like bpf_skb_store_bytes(),
2204 * bpf_lX_csum_replace() and others rather than passing a raw
2205 * buffer here. This one is a slow path helper and intended
2206 * for replies with control messages.
2208 * Like in bpf_skb_change_proto(), we want to keep this rather
2209 * minimal and without protocol specifics so that we are able
2210 * to separate concerns as in bpf_skb_store_bytes() should only
2211 * be the one responsible for writing buffers.
2213 * It's really expected to be a slow path operation here for
2214 * control message replies, so we're implicitly linearizing,
2215 * uncloning and drop offloads from the skb by this.
2217 ret
= __bpf_try_make_writable(skb
, skb
->len
);
2219 if (new_len
> skb
->len
)
2220 ret
= bpf_skb_grow_rcsum(skb
, new_len
);
2221 else if (new_len
< skb
->len
)
2222 ret
= bpf_skb_trim_rcsum(skb
, new_len
);
2223 if (!ret
&& skb_is_gso(skb
))
2227 bpf_compute_data_end(skb
);
2231 static const struct bpf_func_proto bpf_skb_change_tail_proto
= {
2232 .func
= bpf_skb_change_tail
,
2234 .ret_type
= RET_INTEGER
,
2235 .arg1_type
= ARG_PTR_TO_CTX
,
2236 .arg2_type
= ARG_ANYTHING
,
2237 .arg3_type
= ARG_ANYTHING
,
2240 BPF_CALL_3(bpf_skb_change_head
, struct sk_buff
*, skb
, u32
, head_room
,
2243 u32 max_len
= __bpf_skb_max_len(skb
);
2244 u32 new_len
= skb
->len
+ head_room
;
2247 if (unlikely(flags
|| (!skb_is_gso(skb
) && new_len
> max_len
) ||
2248 new_len
< skb
->len
))
2251 ret
= skb_cow(skb
, head_room
);
2253 /* Idea for this helper is that we currently only
2254 * allow to expand on mac header. This means that
2255 * skb->protocol network header, etc, stay as is.
2256 * Compared to bpf_skb_change_tail(), we're more
2257 * flexible due to not needing to linearize or
2258 * reset GSO. Intention for this helper is to be
2259 * used by an L3 skb that needs to push mac header
2260 * for redirection into L2 device.
2262 __skb_push(skb
, head_room
);
2263 memset(skb
->data
, 0, head_room
);
2264 skb_reset_mac_header(skb
);
2267 bpf_compute_data_end(skb
);
2271 static const struct bpf_func_proto bpf_skb_change_head_proto
= {
2272 .func
= bpf_skb_change_head
,
2274 .ret_type
= RET_INTEGER
,
2275 .arg1_type
= ARG_PTR_TO_CTX
,
2276 .arg2_type
= ARG_ANYTHING
,
2277 .arg3_type
= ARG_ANYTHING
,
2280 BPF_CALL_2(bpf_xdp_adjust_head
, struct xdp_buff
*, xdp
, int, offset
)
2282 void *data
= xdp
->data
+ offset
;
2284 if (unlikely(data
< xdp
->data_hard_start
||
2285 data
> xdp
->data_end
- ETH_HLEN
))
2293 static const struct bpf_func_proto bpf_xdp_adjust_head_proto
= {
2294 .func
= bpf_xdp_adjust_head
,
2296 .ret_type
= RET_INTEGER
,
2297 .arg1_type
= ARG_PTR_TO_CTX
,
2298 .arg2_type
= ARG_ANYTHING
,
2301 bool bpf_helper_changes_pkt_data(void *func
)
2303 if (func
== bpf_skb_vlan_push
||
2304 func
== bpf_skb_vlan_pop
||
2305 func
== bpf_skb_store_bytes
||
2306 func
== bpf_skb_change_proto
||
2307 func
== bpf_skb_change_head
||
2308 func
== bpf_skb_change_tail
||
2309 func
== bpf_skb_pull_data
||
2310 func
== bpf_clone_redirect
||
2311 func
== bpf_l3_csum_replace
||
2312 func
== bpf_l4_csum_replace
||
2313 func
== bpf_xdp_adjust_head
)
2319 static unsigned long bpf_skb_copy(void *dst_buff
, const void *skb
,
2320 unsigned long off
, unsigned long len
)
2322 void *ptr
= skb_header_pointer(skb
, off
, len
, dst_buff
);
2326 if (ptr
!= dst_buff
)
2327 memcpy(dst_buff
, ptr
, len
);
2332 BPF_CALL_5(bpf_skb_event_output
, struct sk_buff
*, skb
, struct bpf_map
*, map
,
2333 u64
, flags
, void *, meta
, u64
, meta_size
)
2335 u64 skb_size
= (flags
& BPF_F_CTXLEN_MASK
) >> 32;
2337 if (unlikely(flags
& ~(BPF_F_CTXLEN_MASK
| BPF_F_INDEX_MASK
)))
2339 if (unlikely(skb_size
> skb
->len
))
2342 return bpf_event_output(map
, flags
, meta
, meta_size
, skb
, skb_size
,
2346 static const struct bpf_func_proto bpf_skb_event_output_proto
= {
2347 .func
= bpf_skb_event_output
,
2349 .ret_type
= RET_INTEGER
,
2350 .arg1_type
= ARG_PTR_TO_CTX
,
2351 .arg2_type
= ARG_CONST_MAP_PTR
,
2352 .arg3_type
= ARG_ANYTHING
,
2353 .arg4_type
= ARG_PTR_TO_MEM
,
2354 .arg5_type
= ARG_CONST_SIZE
,
2357 static unsigned short bpf_tunnel_key_af(u64 flags
)
2359 return flags
& BPF_F_TUNINFO_IPV6
? AF_INET6
: AF_INET
;
2362 BPF_CALL_4(bpf_skb_get_tunnel_key
, struct sk_buff
*, skb
, struct bpf_tunnel_key
*, to
,
2363 u32
, size
, u64
, flags
)
2365 const struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2366 u8 compat
[sizeof(struct bpf_tunnel_key
)];
2370 if (unlikely(!info
|| (flags
& ~(BPF_F_TUNINFO_IPV6
)))) {
2374 if (ip_tunnel_info_af(info
) != bpf_tunnel_key_af(flags
)) {
2378 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
))) {
2381 case offsetof(struct bpf_tunnel_key
, tunnel_label
):
2382 case offsetof(struct bpf_tunnel_key
, tunnel_ext
):
2384 case offsetof(struct bpf_tunnel_key
, remote_ipv6
[1]):
2385 /* Fixup deprecated structure layouts here, so we have
2386 * a common path later on.
2388 if (ip_tunnel_info_af(info
) != AF_INET
)
2391 to
= (struct bpf_tunnel_key
*)compat
;
2398 to
->tunnel_id
= be64_to_cpu(info
->key
.tun_id
);
2399 to
->tunnel_tos
= info
->key
.tos
;
2400 to
->tunnel_ttl
= info
->key
.ttl
;
2402 if (flags
& BPF_F_TUNINFO_IPV6
) {
2403 memcpy(to
->remote_ipv6
, &info
->key
.u
.ipv6
.src
,
2404 sizeof(to
->remote_ipv6
));
2405 to
->tunnel_label
= be32_to_cpu(info
->key
.label
);
2407 to
->remote_ipv4
= be32_to_cpu(info
->key
.u
.ipv4
.src
);
2410 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
)))
2411 memcpy(to_orig
, to
, size
);
2415 memset(to_orig
, 0, size
);
2419 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto
= {
2420 .func
= bpf_skb_get_tunnel_key
,
2422 .ret_type
= RET_INTEGER
,
2423 .arg1_type
= ARG_PTR_TO_CTX
,
2424 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
2425 .arg3_type
= ARG_CONST_SIZE
,
2426 .arg4_type
= ARG_ANYTHING
,
2429 BPF_CALL_3(bpf_skb_get_tunnel_opt
, struct sk_buff
*, skb
, u8
*, to
, u32
, size
)
2431 const struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2434 if (unlikely(!info
||
2435 !(info
->key
.tun_flags
& TUNNEL_OPTIONS_PRESENT
))) {
2439 if (unlikely(size
< info
->options_len
)) {
2444 ip_tunnel_info_opts_get(to
, info
);
2445 if (size
> info
->options_len
)
2446 memset(to
+ info
->options_len
, 0, size
- info
->options_len
);
2448 return info
->options_len
;
2450 memset(to
, 0, size
);
2454 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto
= {
2455 .func
= bpf_skb_get_tunnel_opt
,
2457 .ret_type
= RET_INTEGER
,
2458 .arg1_type
= ARG_PTR_TO_CTX
,
2459 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
2460 .arg3_type
= ARG_CONST_SIZE
,
2463 static struct metadata_dst __percpu
*md_dst
;
2465 BPF_CALL_4(bpf_skb_set_tunnel_key
, struct sk_buff
*, skb
,
2466 const struct bpf_tunnel_key
*, from
, u32
, size
, u64
, flags
)
2468 struct metadata_dst
*md
= this_cpu_ptr(md_dst
);
2469 u8 compat
[sizeof(struct bpf_tunnel_key
)];
2470 struct ip_tunnel_info
*info
;
2472 if (unlikely(flags
& ~(BPF_F_TUNINFO_IPV6
| BPF_F_ZERO_CSUM_TX
|
2473 BPF_F_DONT_FRAGMENT
)))
2475 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
))) {
2477 case offsetof(struct bpf_tunnel_key
, tunnel_label
):
2478 case offsetof(struct bpf_tunnel_key
, tunnel_ext
):
2479 case offsetof(struct bpf_tunnel_key
, remote_ipv6
[1]):
2480 /* Fixup deprecated structure layouts here, so we have
2481 * a common path later on.
2483 memcpy(compat
, from
, size
);
2484 memset(compat
+ size
, 0, sizeof(compat
) - size
);
2485 from
= (const struct bpf_tunnel_key
*) compat
;
2491 if (unlikely((!(flags
& BPF_F_TUNINFO_IPV6
) && from
->tunnel_label
) ||
2496 dst_hold((struct dst_entry
*) md
);
2497 skb_dst_set(skb
, (struct dst_entry
*) md
);
2499 info
= &md
->u
.tun_info
;
2500 info
->mode
= IP_TUNNEL_INFO_TX
;
2502 info
->key
.tun_flags
= TUNNEL_KEY
| TUNNEL_CSUM
| TUNNEL_NOCACHE
;
2503 if (flags
& BPF_F_DONT_FRAGMENT
)
2504 info
->key
.tun_flags
|= TUNNEL_DONT_FRAGMENT
;
2506 info
->key
.tun_id
= cpu_to_be64(from
->tunnel_id
);
2507 info
->key
.tos
= from
->tunnel_tos
;
2508 info
->key
.ttl
= from
->tunnel_ttl
;
2510 if (flags
& BPF_F_TUNINFO_IPV6
) {
2511 info
->mode
|= IP_TUNNEL_INFO_IPV6
;
2512 memcpy(&info
->key
.u
.ipv6
.dst
, from
->remote_ipv6
,
2513 sizeof(from
->remote_ipv6
));
2514 info
->key
.label
= cpu_to_be32(from
->tunnel_label
) &
2515 IPV6_FLOWLABEL_MASK
;
2517 info
->key
.u
.ipv4
.dst
= cpu_to_be32(from
->remote_ipv4
);
2518 if (flags
& BPF_F_ZERO_CSUM_TX
)
2519 info
->key
.tun_flags
&= ~TUNNEL_CSUM
;
2525 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto
= {
2526 .func
= bpf_skb_set_tunnel_key
,
2528 .ret_type
= RET_INTEGER
,
2529 .arg1_type
= ARG_PTR_TO_CTX
,
2530 .arg2_type
= ARG_PTR_TO_MEM
,
2531 .arg3_type
= ARG_CONST_SIZE
,
2532 .arg4_type
= ARG_ANYTHING
,
2535 BPF_CALL_3(bpf_skb_set_tunnel_opt
, struct sk_buff
*, skb
,
2536 const u8
*, from
, u32
, size
)
2538 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2539 const struct metadata_dst
*md
= this_cpu_ptr(md_dst
);
2541 if (unlikely(info
!= &md
->u
.tun_info
|| (size
& (sizeof(u32
) - 1))))
2543 if (unlikely(size
> IP_TUNNEL_OPTS_MAX
))
2546 ip_tunnel_info_opts_set(info
, from
, size
);
2551 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto
= {
2552 .func
= bpf_skb_set_tunnel_opt
,
2554 .ret_type
= RET_INTEGER
,
2555 .arg1_type
= ARG_PTR_TO_CTX
,
2556 .arg2_type
= ARG_PTR_TO_MEM
,
2557 .arg3_type
= ARG_CONST_SIZE
,
2560 static const struct bpf_func_proto
*
2561 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which
)
2564 /* Race is not possible, since it's called from verifier
2565 * that is holding verifier mutex.
2567 md_dst
= metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX
,
2574 case BPF_FUNC_skb_set_tunnel_key
:
2575 return &bpf_skb_set_tunnel_key_proto
;
2576 case BPF_FUNC_skb_set_tunnel_opt
:
2577 return &bpf_skb_set_tunnel_opt_proto
;
2583 BPF_CALL_3(bpf_skb_under_cgroup
, struct sk_buff
*, skb
, struct bpf_map
*, map
,
2586 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
2587 struct cgroup
*cgrp
;
2590 sk
= skb_to_full_sk(skb
);
2591 if (!sk
|| !sk_fullsock(sk
))
2593 if (unlikely(idx
>= array
->map
.max_entries
))
2596 cgrp
= READ_ONCE(array
->ptrs
[idx
]);
2597 if (unlikely(!cgrp
))
2600 return sk_under_cgroup_hierarchy(sk
, cgrp
);
2603 static const struct bpf_func_proto bpf_skb_under_cgroup_proto
= {
2604 .func
= bpf_skb_under_cgroup
,
2606 .ret_type
= RET_INTEGER
,
2607 .arg1_type
= ARG_PTR_TO_CTX
,
2608 .arg2_type
= ARG_CONST_MAP_PTR
,
2609 .arg3_type
= ARG_ANYTHING
,
2612 static unsigned long bpf_xdp_copy(void *dst_buff
, const void *src_buff
,
2613 unsigned long off
, unsigned long len
)
2615 memcpy(dst_buff
, src_buff
+ off
, len
);
2619 BPF_CALL_5(bpf_xdp_event_output
, struct xdp_buff
*, xdp
, struct bpf_map
*, map
,
2620 u64
, flags
, void *, meta
, u64
, meta_size
)
2622 u64 xdp_size
= (flags
& BPF_F_CTXLEN_MASK
) >> 32;
2624 if (unlikely(flags
& ~(BPF_F_CTXLEN_MASK
| BPF_F_INDEX_MASK
)))
2626 if (unlikely(xdp_size
> (unsigned long)(xdp
->data_end
- xdp
->data
)))
2629 return bpf_event_output(map
, flags
, meta
, meta_size
, xdp
->data
,
2630 xdp_size
, bpf_xdp_copy
);
2633 static const struct bpf_func_proto bpf_xdp_event_output_proto
= {
2634 .func
= bpf_xdp_event_output
,
2636 .ret_type
= RET_INTEGER
,
2637 .arg1_type
= ARG_PTR_TO_CTX
,
2638 .arg2_type
= ARG_CONST_MAP_PTR
,
2639 .arg3_type
= ARG_ANYTHING
,
2640 .arg4_type
= ARG_PTR_TO_MEM
,
2641 .arg5_type
= ARG_CONST_SIZE
,
2644 BPF_CALL_1(bpf_get_socket_cookie
, struct sk_buff
*, skb
)
2646 return skb
->sk
? sock_gen_cookie(skb
->sk
) : 0;
2649 static const struct bpf_func_proto bpf_get_socket_cookie_proto
= {
2650 .func
= bpf_get_socket_cookie
,
2652 .ret_type
= RET_INTEGER
,
2653 .arg1_type
= ARG_PTR_TO_CTX
,
2656 BPF_CALL_1(bpf_get_socket_uid
, struct sk_buff
*, skb
)
2658 struct sock
*sk
= sk_to_full_sk(skb
->sk
);
2661 if (!sk
|| !sk_fullsock(sk
))
2663 kuid
= sock_net_uid(sock_net(sk
), sk
);
2664 return from_kuid_munged(sock_net(sk
)->user_ns
, kuid
);
2667 static const struct bpf_func_proto bpf_get_socket_uid_proto
= {
2668 .func
= bpf_get_socket_uid
,
2670 .ret_type
= RET_INTEGER
,
2671 .arg1_type
= ARG_PTR_TO_CTX
,
2674 static const struct bpf_func_proto
*
2675 bpf_base_func_proto(enum bpf_func_id func_id
)
2678 case BPF_FUNC_map_lookup_elem
:
2679 return &bpf_map_lookup_elem_proto
;
2680 case BPF_FUNC_map_update_elem
:
2681 return &bpf_map_update_elem_proto
;
2682 case BPF_FUNC_map_delete_elem
:
2683 return &bpf_map_delete_elem_proto
;
2684 case BPF_FUNC_get_prandom_u32
:
2685 return &bpf_get_prandom_u32_proto
;
2686 case BPF_FUNC_get_smp_processor_id
:
2687 return &bpf_get_raw_smp_processor_id_proto
;
2688 case BPF_FUNC_get_numa_node_id
:
2689 return &bpf_get_numa_node_id_proto
;
2690 case BPF_FUNC_tail_call
:
2691 return &bpf_tail_call_proto
;
2692 case BPF_FUNC_ktime_get_ns
:
2693 return &bpf_ktime_get_ns_proto
;
2694 case BPF_FUNC_trace_printk
:
2695 if (capable(CAP_SYS_ADMIN
))
2696 return bpf_get_trace_printk_proto();
2702 static const struct bpf_func_proto
*
2703 sk_filter_func_proto(enum bpf_func_id func_id
)
2706 case BPF_FUNC_skb_load_bytes
:
2707 return &bpf_skb_load_bytes_proto
;
2708 case BPF_FUNC_get_socket_cookie
:
2709 return &bpf_get_socket_cookie_proto
;
2710 case BPF_FUNC_get_socket_uid
:
2711 return &bpf_get_socket_uid_proto
;
2713 return bpf_base_func_proto(func_id
);
2717 static const struct bpf_func_proto
*
2718 tc_cls_act_func_proto(enum bpf_func_id func_id
)
2721 case BPF_FUNC_skb_store_bytes
:
2722 return &bpf_skb_store_bytes_proto
;
2723 case BPF_FUNC_skb_load_bytes
:
2724 return &bpf_skb_load_bytes_proto
;
2725 case BPF_FUNC_skb_pull_data
:
2726 return &bpf_skb_pull_data_proto
;
2727 case BPF_FUNC_csum_diff
:
2728 return &bpf_csum_diff_proto
;
2729 case BPF_FUNC_csum_update
:
2730 return &bpf_csum_update_proto
;
2731 case BPF_FUNC_l3_csum_replace
:
2732 return &bpf_l3_csum_replace_proto
;
2733 case BPF_FUNC_l4_csum_replace
:
2734 return &bpf_l4_csum_replace_proto
;
2735 case BPF_FUNC_clone_redirect
:
2736 return &bpf_clone_redirect_proto
;
2737 case BPF_FUNC_get_cgroup_classid
:
2738 return &bpf_get_cgroup_classid_proto
;
2739 case BPF_FUNC_skb_vlan_push
:
2740 return &bpf_skb_vlan_push_proto
;
2741 case BPF_FUNC_skb_vlan_pop
:
2742 return &bpf_skb_vlan_pop_proto
;
2743 case BPF_FUNC_skb_change_proto
:
2744 return &bpf_skb_change_proto_proto
;
2745 case BPF_FUNC_skb_change_type
:
2746 return &bpf_skb_change_type_proto
;
2747 case BPF_FUNC_skb_change_tail
:
2748 return &bpf_skb_change_tail_proto
;
2749 case BPF_FUNC_skb_get_tunnel_key
:
2750 return &bpf_skb_get_tunnel_key_proto
;
2751 case BPF_FUNC_skb_set_tunnel_key
:
2752 return bpf_get_skb_set_tunnel_proto(func_id
);
2753 case BPF_FUNC_skb_get_tunnel_opt
:
2754 return &bpf_skb_get_tunnel_opt_proto
;
2755 case BPF_FUNC_skb_set_tunnel_opt
:
2756 return bpf_get_skb_set_tunnel_proto(func_id
);
2757 case BPF_FUNC_redirect
:
2758 return &bpf_redirect_proto
;
2759 case BPF_FUNC_get_route_realm
:
2760 return &bpf_get_route_realm_proto
;
2761 case BPF_FUNC_get_hash_recalc
:
2762 return &bpf_get_hash_recalc_proto
;
2763 case BPF_FUNC_set_hash_invalid
:
2764 return &bpf_set_hash_invalid_proto
;
2765 case BPF_FUNC_set_hash
:
2766 return &bpf_set_hash_proto
;
2767 case BPF_FUNC_perf_event_output
:
2768 return &bpf_skb_event_output_proto
;
2769 case BPF_FUNC_get_smp_processor_id
:
2770 return &bpf_get_smp_processor_id_proto
;
2771 case BPF_FUNC_skb_under_cgroup
:
2772 return &bpf_skb_under_cgroup_proto
;
2773 case BPF_FUNC_get_socket_cookie
:
2774 return &bpf_get_socket_cookie_proto
;
2775 case BPF_FUNC_get_socket_uid
:
2776 return &bpf_get_socket_uid_proto
;
2778 return bpf_base_func_proto(func_id
);
2782 static const struct bpf_func_proto
*
2783 xdp_func_proto(enum bpf_func_id func_id
)
2786 case BPF_FUNC_perf_event_output
:
2787 return &bpf_xdp_event_output_proto
;
2788 case BPF_FUNC_get_smp_processor_id
:
2789 return &bpf_get_smp_processor_id_proto
;
2790 case BPF_FUNC_xdp_adjust_head
:
2791 return &bpf_xdp_adjust_head_proto
;
2793 return bpf_base_func_proto(func_id
);
2797 static const struct bpf_func_proto
*
2798 lwt_inout_func_proto(enum bpf_func_id func_id
)
2801 case BPF_FUNC_skb_load_bytes
:
2802 return &bpf_skb_load_bytes_proto
;
2803 case BPF_FUNC_skb_pull_data
:
2804 return &bpf_skb_pull_data_proto
;
2805 case BPF_FUNC_csum_diff
:
2806 return &bpf_csum_diff_proto
;
2807 case BPF_FUNC_get_cgroup_classid
:
2808 return &bpf_get_cgroup_classid_proto
;
2809 case BPF_FUNC_get_route_realm
:
2810 return &bpf_get_route_realm_proto
;
2811 case BPF_FUNC_get_hash_recalc
:
2812 return &bpf_get_hash_recalc_proto
;
2813 case BPF_FUNC_perf_event_output
:
2814 return &bpf_skb_event_output_proto
;
2815 case BPF_FUNC_get_smp_processor_id
:
2816 return &bpf_get_smp_processor_id_proto
;
2817 case BPF_FUNC_skb_under_cgroup
:
2818 return &bpf_skb_under_cgroup_proto
;
2820 return bpf_base_func_proto(func_id
);
2824 static const struct bpf_func_proto
*
2825 lwt_xmit_func_proto(enum bpf_func_id func_id
)
2828 case BPF_FUNC_skb_get_tunnel_key
:
2829 return &bpf_skb_get_tunnel_key_proto
;
2830 case BPF_FUNC_skb_set_tunnel_key
:
2831 return bpf_get_skb_set_tunnel_proto(func_id
);
2832 case BPF_FUNC_skb_get_tunnel_opt
:
2833 return &bpf_skb_get_tunnel_opt_proto
;
2834 case BPF_FUNC_skb_set_tunnel_opt
:
2835 return bpf_get_skb_set_tunnel_proto(func_id
);
2836 case BPF_FUNC_redirect
:
2837 return &bpf_redirect_proto
;
2838 case BPF_FUNC_clone_redirect
:
2839 return &bpf_clone_redirect_proto
;
2840 case BPF_FUNC_skb_change_tail
:
2841 return &bpf_skb_change_tail_proto
;
2842 case BPF_FUNC_skb_change_head
:
2843 return &bpf_skb_change_head_proto
;
2844 case BPF_FUNC_skb_store_bytes
:
2845 return &bpf_skb_store_bytes_proto
;
2846 case BPF_FUNC_csum_update
:
2847 return &bpf_csum_update_proto
;
2848 case BPF_FUNC_l3_csum_replace
:
2849 return &bpf_l3_csum_replace_proto
;
2850 case BPF_FUNC_l4_csum_replace
:
2851 return &bpf_l4_csum_replace_proto
;
2852 case BPF_FUNC_set_hash_invalid
:
2853 return &bpf_set_hash_invalid_proto
;
2855 return lwt_inout_func_proto(func_id
);
2859 static bool __is_valid_access(int off
, int size
, enum bpf_access_type type
,
2860 int *ctx_field_size
)
2862 if (off
< 0 || off
>= sizeof(struct __sk_buff
))
2865 /* The verifier guarantees that size > 0. */
2866 if (off
% size
!= 0)
2870 case offsetof(struct __sk_buff
, cb
[0]) ...
2871 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
) - 1:
2873 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
))
2876 case offsetof(struct __sk_buff
, data
) ...
2877 offsetof(struct __sk_buff
, data
) + sizeof(__u32
) - 1:
2878 case offsetof(struct __sk_buff
, data_end
) ...
2879 offsetof(struct __sk_buff
, data_end
) + sizeof(__u32
) - 1:
2880 if (size
!= sizeof(__u32
))
2884 /* permit narrower load for not cb/data/data_end fields */
2885 *ctx_field_size
= 4;
2886 if (type
== BPF_WRITE
) {
2887 if (size
!= sizeof(__u32
))
2890 if (size
!= sizeof(__u32
))
2891 #ifdef __LITTLE_ENDIAN
2892 return (off
& 0x3) == 0 && (size
== 1 || size
== 2);
2894 return (off
& 0x3) + size
== 4 && (size
== 1 || size
== 2);
2902 static bool sk_filter_is_valid_access(int off
, int size
,
2903 enum bpf_access_type type
,
2904 enum bpf_reg_type
*reg_type
,
2905 int *ctx_field_size
)
2908 case offsetof(struct __sk_buff
, tc_classid
) ...
2909 offsetof(struct __sk_buff
, tc_classid
) + sizeof(__u32
) - 1:
2910 case offsetof(struct __sk_buff
, data
) ...
2911 offsetof(struct __sk_buff
, data
) + sizeof(__u32
) - 1:
2912 case offsetof(struct __sk_buff
, data_end
) ...
2913 offsetof(struct __sk_buff
, data_end
) + sizeof(__u32
) - 1:
2917 if (type
== BPF_WRITE
) {
2919 case offsetof(struct __sk_buff
, cb
[0]) ...
2920 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
) - 1:
2927 return __is_valid_access(off
, size
, type
, ctx_field_size
);
2930 static bool lwt_is_valid_access(int off
, int size
,
2931 enum bpf_access_type type
,
2932 enum bpf_reg_type
*reg_type
,
2933 int *ctx_field_size
)
2936 case offsetof(struct __sk_buff
, tc_classid
) ...
2937 offsetof(struct __sk_buff
, tc_classid
) + sizeof(__u32
) - 1:
2941 if (type
== BPF_WRITE
) {
2943 case offsetof(struct __sk_buff
, mark
):
2944 case offsetof(struct __sk_buff
, priority
):
2945 case offsetof(struct __sk_buff
, cb
[0]) ...
2946 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
) - 1:
2954 case offsetof(struct __sk_buff
, data
):
2955 *reg_type
= PTR_TO_PACKET
;
2957 case offsetof(struct __sk_buff
, data_end
):
2958 *reg_type
= PTR_TO_PACKET_END
;
2962 return __is_valid_access(off
, size
, type
, ctx_field_size
);
2965 static bool sock_filter_is_valid_access(int off
, int size
,
2966 enum bpf_access_type type
,
2967 enum bpf_reg_type
*reg_type
,
2968 int *ctx_field_size
)
2970 if (type
== BPF_WRITE
) {
2972 case offsetof(struct bpf_sock
, bound_dev_if
):
2979 if (off
< 0 || off
+ size
> sizeof(struct bpf_sock
))
2981 /* The verifier guarantees that size > 0. */
2982 if (off
% size
!= 0)
2984 if (size
!= sizeof(__u32
))
2990 static int tc_cls_act_prologue(struct bpf_insn
*insn_buf
, bool direct_write
,
2991 const struct bpf_prog
*prog
)
2993 struct bpf_insn
*insn
= insn_buf
;
2998 /* if (!skb->cloned)
3001 * (Fast-path, otherwise approximation that we might be
3002 * a clone, do the rest in helper.)
3004 *insn
++ = BPF_LDX_MEM(BPF_B
, BPF_REG_6
, BPF_REG_1
, CLONED_OFFSET());
3005 *insn
++ = BPF_ALU32_IMM(BPF_AND
, BPF_REG_6
, CLONED_MASK
);
3006 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, BPF_REG_6
, 0, 7);
3008 /* ret = bpf_skb_pull_data(skb, 0); */
3009 *insn
++ = BPF_MOV64_REG(BPF_REG_6
, BPF_REG_1
);
3010 *insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_2
, BPF_REG_2
);
3011 *insn
++ = BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0,
3012 BPF_FUNC_skb_pull_data
);
3015 * return TC_ACT_SHOT;
3017 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 2);
3018 *insn
++ = BPF_ALU32_IMM(BPF_MOV
, BPF_REG_0
, TC_ACT_SHOT
);
3019 *insn
++ = BPF_EXIT_INSN();
3022 *insn
++ = BPF_MOV64_REG(BPF_REG_1
, BPF_REG_6
);
3024 *insn
++ = prog
->insnsi
[0];
3026 return insn
- insn_buf
;
3029 static bool tc_cls_act_is_valid_access(int off
, int size
,
3030 enum bpf_access_type type
,
3031 enum bpf_reg_type
*reg_type
,
3032 int *ctx_field_size
)
3034 if (type
== BPF_WRITE
) {
3036 case offsetof(struct __sk_buff
, mark
):
3037 case offsetof(struct __sk_buff
, tc_index
):
3038 case offsetof(struct __sk_buff
, priority
):
3039 case offsetof(struct __sk_buff
, cb
[0]) ...
3040 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
) - 1:
3041 case offsetof(struct __sk_buff
, tc_classid
):
3049 case offsetof(struct __sk_buff
, data
):
3050 *reg_type
= PTR_TO_PACKET
;
3052 case offsetof(struct __sk_buff
, data_end
):
3053 *reg_type
= PTR_TO_PACKET_END
;
3057 return __is_valid_access(off
, size
, type
, ctx_field_size
);
3060 static bool __is_valid_xdp_access(int off
, int size
)
3062 if (off
< 0 || off
>= sizeof(struct xdp_md
))
3064 if (off
% size
!= 0)
3066 if (size
!= sizeof(__u32
))
3072 static bool xdp_is_valid_access(int off
, int size
,
3073 enum bpf_access_type type
,
3074 enum bpf_reg_type
*reg_type
,
3075 int *ctx_field_size
)
3077 if (type
== BPF_WRITE
)
3081 case offsetof(struct xdp_md
, data
):
3082 *reg_type
= PTR_TO_PACKET
;
3084 case offsetof(struct xdp_md
, data_end
):
3085 *reg_type
= PTR_TO_PACKET_END
;
3089 return __is_valid_xdp_access(off
, size
);
3092 void bpf_warn_invalid_xdp_action(u32 act
)
3094 WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act
);
3096 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action
);
3098 static u32
bpf_convert_ctx_access(enum bpf_access_type type
,
3099 const struct bpf_insn
*si
,
3100 struct bpf_insn
*insn_buf
,
3101 struct bpf_prog
*prog
)
3103 struct bpf_insn
*insn
= insn_buf
;
3107 case offsetof(struct __sk_buff
, len
):
3108 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, len
) != 4);
3110 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3111 offsetof(struct sk_buff
, len
));
3114 case offsetof(struct __sk_buff
, protocol
):
3115 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, protocol
) != 2);
3117 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3118 offsetof(struct sk_buff
, protocol
));
3121 case offsetof(struct __sk_buff
, vlan_proto
):
3122 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, vlan_proto
) != 2);
3124 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3125 offsetof(struct sk_buff
, vlan_proto
));
3128 case offsetof(struct __sk_buff
, priority
):
3129 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, priority
) != 4);
3131 if (type
== BPF_WRITE
)
3132 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3133 offsetof(struct sk_buff
, priority
));
3135 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3136 offsetof(struct sk_buff
, priority
));
3139 case offsetof(struct __sk_buff
, ingress_ifindex
):
3140 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, skb_iif
) != 4);
3142 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3143 offsetof(struct sk_buff
, skb_iif
));
3146 case offsetof(struct __sk_buff
, ifindex
):
3147 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, ifindex
) != 4);
3149 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
3150 si
->dst_reg
, si
->src_reg
,
3151 offsetof(struct sk_buff
, dev
));
3152 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, si
->dst_reg
, 0, 1);
3153 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3154 offsetof(struct net_device
, ifindex
));
3157 case offsetof(struct __sk_buff
, hash
):
3158 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, hash
) != 4);
3160 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3161 offsetof(struct sk_buff
, hash
));
3164 case offsetof(struct __sk_buff
, mark
):
3165 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, mark
) != 4);
3167 if (type
== BPF_WRITE
)
3168 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3169 offsetof(struct sk_buff
, mark
));
3171 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3172 offsetof(struct sk_buff
, mark
));
3175 case offsetof(struct __sk_buff
, pkt_type
):
3176 return convert_skb_access(SKF_AD_PKTTYPE
, si
->dst_reg
,
3179 case offsetof(struct __sk_buff
, queue_mapping
):
3180 return convert_skb_access(SKF_AD_QUEUE
, si
->dst_reg
,
3183 case offsetof(struct __sk_buff
, vlan_present
):
3184 return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT
,
3185 si
->dst_reg
, si
->src_reg
, insn
);
3187 case offsetof(struct __sk_buff
, vlan_tci
):
3188 return convert_skb_access(SKF_AD_VLAN_TAG
,
3189 si
->dst_reg
, si
->src_reg
, insn
);
3191 case offsetof(struct __sk_buff
, cb
[0]) ...
3192 offsetof(struct __sk_buff
, cb
[4]) + sizeof(__u32
) - 1:
3193 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb
, data
) < 20);
3194 BUILD_BUG_ON((offsetof(struct sk_buff
, cb
) +
3195 offsetof(struct qdisc_skb_cb
, data
)) %
3198 prog
->cb_access
= 1;
3200 off
-= offsetof(struct __sk_buff
, cb
[0]);
3201 off
+= offsetof(struct sk_buff
, cb
);
3202 off
+= offsetof(struct qdisc_skb_cb
, data
);
3203 if (type
== BPF_WRITE
)
3204 *insn
++ = BPF_STX_MEM(BPF_SIZE(si
->code
), si
->dst_reg
,
3207 *insn
++ = BPF_LDX_MEM(BPF_SIZE(si
->code
), si
->dst_reg
,
3211 case offsetof(struct __sk_buff
, tc_classid
):
3212 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb
, tc_classid
) != 2);
3215 off
-= offsetof(struct __sk_buff
, tc_classid
);
3216 off
+= offsetof(struct sk_buff
, cb
);
3217 off
+= offsetof(struct qdisc_skb_cb
, tc_classid
);
3218 if (type
== BPF_WRITE
)
3219 *insn
++ = BPF_STX_MEM(BPF_H
, si
->dst_reg
,
3222 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
,
3226 case offsetof(struct __sk_buff
, data
):
3227 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, data
),
3228 si
->dst_reg
, si
->src_reg
,
3229 offsetof(struct sk_buff
, data
));
3232 case offsetof(struct __sk_buff
, data_end
):
3234 off
-= offsetof(struct __sk_buff
, data_end
);
3235 off
+= offsetof(struct sk_buff
, cb
);
3236 off
+= offsetof(struct bpf_skb_data_end
, data_end
);
3237 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si
->dst_reg
,
3241 case offsetof(struct __sk_buff
, tc_index
):
3242 #ifdef CONFIG_NET_SCHED
3243 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, tc_index
) != 2);
3245 if (type
== BPF_WRITE
)
3246 *insn
++ = BPF_STX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3247 offsetof(struct sk_buff
, tc_index
));
3249 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3250 offsetof(struct sk_buff
, tc_index
));
3252 if (type
== BPF_WRITE
)
3253 *insn
++ = BPF_MOV64_REG(si
->dst_reg
, si
->dst_reg
);
3255 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3259 case offsetof(struct __sk_buff
, napi_id
):
3260 #if defined(CONFIG_NET_RX_BUSY_POLL)
3261 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, napi_id
) != 4);
3263 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3264 offsetof(struct sk_buff
, napi_id
));
3265 *insn
++ = BPF_JMP_IMM(BPF_JGE
, si
->dst_reg
, MIN_NAPI_ID
, 1);
3266 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3268 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3273 return insn
- insn_buf
;
3276 static u32
sock_filter_convert_ctx_access(enum bpf_access_type type
,
3277 const struct bpf_insn
*si
,
3278 struct bpf_insn
*insn_buf
,
3279 struct bpf_prog
*prog
)
3281 struct bpf_insn
*insn
= insn_buf
;
3284 case offsetof(struct bpf_sock
, bound_dev_if
):
3285 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_bound_dev_if
) != 4);
3287 if (type
== BPF_WRITE
)
3288 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3289 offsetof(struct sock
, sk_bound_dev_if
));
3291 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3292 offsetof(struct sock
, sk_bound_dev_if
));
3295 case offsetof(struct bpf_sock
, family
):
3296 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_family
) != 2);
3298 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3299 offsetof(struct sock
, sk_family
));
3302 case offsetof(struct bpf_sock
, type
):
3303 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3304 offsetof(struct sock
, __sk_flags_offset
));
3305 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, SK_FL_TYPE_MASK
);
3306 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, SK_FL_TYPE_SHIFT
);
3309 case offsetof(struct bpf_sock
, protocol
):
3310 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3311 offsetof(struct sock
, __sk_flags_offset
));
3312 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, SK_FL_PROTO_MASK
);
3313 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, SK_FL_PROTO_SHIFT
);
3317 return insn
- insn_buf
;
3320 static u32
tc_cls_act_convert_ctx_access(enum bpf_access_type type
,
3321 const struct bpf_insn
*si
,
3322 struct bpf_insn
*insn_buf
,
3323 struct bpf_prog
*prog
)
3325 struct bpf_insn
*insn
= insn_buf
;
3328 case offsetof(struct __sk_buff
, ifindex
):
3329 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, ifindex
) != 4);
3331 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
3332 si
->dst_reg
, si
->src_reg
,
3333 offsetof(struct sk_buff
, dev
));
3334 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3335 offsetof(struct net_device
, ifindex
));
3338 return bpf_convert_ctx_access(type
, si
, insn_buf
, prog
);
3341 return insn
- insn_buf
;
3344 static u32
xdp_convert_ctx_access(enum bpf_access_type type
,
3345 const struct bpf_insn
*si
,
3346 struct bpf_insn
*insn_buf
,
3347 struct bpf_prog
*prog
)
3349 struct bpf_insn
*insn
= insn_buf
;
3352 case offsetof(struct xdp_md
, data
):
3353 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff
, data
),
3354 si
->dst_reg
, si
->src_reg
,
3355 offsetof(struct xdp_buff
, data
));
3357 case offsetof(struct xdp_md
, data_end
):
3358 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff
, data_end
),
3359 si
->dst_reg
, si
->src_reg
,
3360 offsetof(struct xdp_buff
, data_end
));
3364 return insn
- insn_buf
;
3367 const struct bpf_verifier_ops sk_filter_prog_ops
= {
3368 .get_func_proto
= sk_filter_func_proto
,
3369 .is_valid_access
= sk_filter_is_valid_access
,
3370 .convert_ctx_access
= bpf_convert_ctx_access
,
3373 const struct bpf_verifier_ops tc_cls_act_prog_ops
= {
3374 .get_func_proto
= tc_cls_act_func_proto
,
3375 .is_valid_access
= tc_cls_act_is_valid_access
,
3376 .convert_ctx_access
= tc_cls_act_convert_ctx_access
,
3377 .gen_prologue
= tc_cls_act_prologue
,
3378 .test_run
= bpf_prog_test_run_skb
,
3381 const struct bpf_verifier_ops xdp_prog_ops
= {
3382 .get_func_proto
= xdp_func_proto
,
3383 .is_valid_access
= xdp_is_valid_access
,
3384 .convert_ctx_access
= xdp_convert_ctx_access
,
3385 .test_run
= bpf_prog_test_run_xdp
,
3388 const struct bpf_verifier_ops cg_skb_prog_ops
= {
3389 .get_func_proto
= sk_filter_func_proto
,
3390 .is_valid_access
= sk_filter_is_valid_access
,
3391 .convert_ctx_access
= bpf_convert_ctx_access
,
3392 .test_run
= bpf_prog_test_run_skb
,
3395 const struct bpf_verifier_ops lwt_inout_prog_ops
= {
3396 .get_func_proto
= lwt_inout_func_proto
,
3397 .is_valid_access
= lwt_is_valid_access
,
3398 .convert_ctx_access
= bpf_convert_ctx_access
,
3399 .test_run
= bpf_prog_test_run_skb
,
3402 const struct bpf_verifier_ops lwt_xmit_prog_ops
= {
3403 .get_func_proto
= lwt_xmit_func_proto
,
3404 .is_valid_access
= lwt_is_valid_access
,
3405 .convert_ctx_access
= bpf_convert_ctx_access
,
3406 .gen_prologue
= tc_cls_act_prologue
,
3407 .test_run
= bpf_prog_test_run_skb
,
3410 const struct bpf_verifier_ops cg_sock_prog_ops
= {
3411 .get_func_proto
= bpf_base_func_proto
,
3412 .is_valid_access
= sock_filter_is_valid_access
,
3413 .convert_ctx_access
= sock_filter_convert_ctx_access
,
3416 int sk_detach_filter(struct sock
*sk
)
3419 struct sk_filter
*filter
;
3421 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
3424 filter
= rcu_dereference_protected(sk
->sk_filter
,
3425 lockdep_sock_is_held(sk
));
3427 RCU_INIT_POINTER(sk
->sk_filter
, NULL
);
3428 sk_filter_uncharge(sk
, filter
);
3434 EXPORT_SYMBOL_GPL(sk_detach_filter
);
3436 int sk_get_filter(struct sock
*sk
, struct sock_filter __user
*ubuf
,
3439 struct sock_fprog_kern
*fprog
;
3440 struct sk_filter
*filter
;
3444 filter
= rcu_dereference_protected(sk
->sk_filter
,
3445 lockdep_sock_is_held(sk
));
3449 /* We're copying the filter that has been originally attached,
3450 * so no conversion/decode needed anymore. eBPF programs that
3451 * have no original program cannot be dumped through this.
3454 fprog
= filter
->prog
->orig_prog
;
3460 /* User space only enquires number of filter blocks. */
3464 if (len
< fprog
->len
)
3468 if (copy_to_user(ubuf
, fprog
->filter
, bpf_classic_proglen(fprog
)))
3471 /* Instead of bytes, the API requests to return the number