2 * Functions to manage eBPF programs attached to cgroups
4 * Copyright (c) 2016 Daniel Mack
6 * This file is subject to the terms and conditions of version 2 of the GNU
7 * General Public License. See the file COPYING in the main directory of the
8 * Linux distribution for more details.
11 #include <linux/kernel.h>
12 #include <linux/atomic.h>
13 #include <linux/cgroup.h>
14 #include <linux/slab.h>
15 #include <linux/bpf.h>
16 #include <linux/bpf-cgroup.h>
19 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key
);
20 EXPORT_SYMBOL(cgroup_bpf_enabled_key
);
23 * cgroup_bpf_put() - put references of all bpf programs
24 * @cgrp: the cgroup to modify
26 void cgroup_bpf_put(struct cgroup
*cgrp
)
30 for (type
= 0; type
< ARRAY_SIZE(cgrp
->bpf
.prog
); type
++) {
31 struct bpf_prog
*prog
= cgrp
->bpf
.prog
[type
];
35 static_branch_dec(&cgroup_bpf_enabled_key
);
41 * cgroup_bpf_inherit() - inherit effective programs from parent
42 * @cgrp: the cgroup to modify
43 * @parent: the parent to inherit from
45 void cgroup_bpf_inherit(struct cgroup
*cgrp
, struct cgroup
*parent
)
49 for (type
= 0; type
< ARRAY_SIZE(cgrp
->bpf
.effective
); type
++) {
52 e
= rcu_dereference_protected(parent
->bpf
.effective
[type
],
53 lockdep_is_held(&cgroup_mutex
));
54 rcu_assign_pointer(cgrp
->bpf
.effective
[type
], e
);
55 cgrp
->bpf
.disallow_override
[type
] = parent
->bpf
.disallow_override
[type
];
60 * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
61 * propagate the change to descendants
62 * @cgrp: The cgroup which descendants to traverse
63 * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
64 * @prog: A new program to pin
65 * @type: Type of pinning operation (ingress/egress)
67 * Each cgroup has a set of two pointers for bpf programs; one for eBPF
68 * programs it owns, and which is effective for execution.
70 * If @prog is not %NULL, this function attaches a new program to the cgroup
71 * and releases the one that is currently attached, if any. @prog is then made
72 * the effective program of type @type in that cgroup.
74 * If @prog is %NULL, the currently attached program of type @type is released,
75 * and the effective program of the parent cgroup (if any) is inherited to
78 * Then, the descendants of @cgrp are walked and the effective program for
79 * each of them is set to the effective program of @cgrp unless the
80 * descendant has its own program attached, in which case the subbranch is
81 * skipped. This ensures that delegated subcgroups with own programs are left
84 * Must be called with cgroup_mutex held.
86 int __cgroup_bpf_update(struct cgroup
*cgrp
, struct cgroup
*parent
,
87 struct bpf_prog
*prog
, enum bpf_attach_type type
,
90 struct bpf_prog
*old_prog
, *effective
= NULL
;
91 struct cgroup_subsys_state
*pos
;
92 bool overridable
= true;
95 overridable
= !parent
->bpf
.disallow_override
[type
];
96 effective
= rcu_dereference_protected(parent
->bpf
.effective
[type
],
97 lockdep_is_held(&cgroup_mutex
));
100 if (prog
&& effective
&& !overridable
)
101 /* if parent has non-overridable prog attached, disallow
102 * attaching new programs to descendent cgroup
106 if (prog
&& effective
&& overridable
!= new_overridable
)
107 /* if parent has overridable prog attached, only
108 * allow overridable programs in descendent cgroup
112 old_prog
= cgrp
->bpf
.prog
[type
];
115 overridable
= new_overridable
;
118 cgrp
->bpf
.disallow_override
[type
] == new_overridable
)
119 /* disallow attaching non-overridable on top
120 * of existing overridable in this cgroup
126 if (!prog
&& !old_prog
)
127 /* report error when trying to detach and nothing is attached */
130 cgrp
->bpf
.prog
[type
] = prog
;
132 css_for_each_descendant_pre(pos
, &cgrp
->self
) {
133 struct cgroup
*desc
= container_of(pos
, struct cgroup
, self
);
135 /* skip the subtree if the descendant has its own program */
136 if (desc
->bpf
.prog
[type
] && desc
!= cgrp
) {
137 pos
= css_rightmost_descendant(pos
);
139 rcu_assign_pointer(desc
->bpf
.effective
[type
],
141 desc
->bpf
.disallow_override
[type
] = !overridable
;
146 static_branch_inc(&cgroup_bpf_enabled_key
);
149 bpf_prog_put(old_prog
);
150 static_branch_dec(&cgroup_bpf_enabled_key
);
156 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
157 * @sk: The socket sending or receiving traffic
158 * @skb: The skb that is being sent or received
159 * @type: The type of program to be exectuted
161 * If no socket is passed, or the socket is not of type INET or INET6,
162 * this function does nothing and returns 0.
164 * The program type passed in via @type must be suitable for network
165 * filtering. No further check is performed to assert that.
167 * This function will return %-EPERM if any if an attached program was found
168 * and if it returned != 1 during execution. In all other cases, 0 is returned.
170 int __cgroup_bpf_run_filter_skb(struct sock
*sk
,
172 enum bpf_attach_type type
)
174 struct bpf_prog
*prog
;
178 if (!sk
|| !sk_fullsock(sk
))
181 if (sk
->sk_family
!= AF_INET
&&
182 sk
->sk_family
!= AF_INET6
)
185 cgrp
= sock_cgroup_ptr(&sk
->sk_cgrp_data
);
189 prog
= rcu_dereference(cgrp
->bpf
.effective
[type
]);
191 unsigned int offset
= skb
->data
- skb_network_header(skb
);
192 struct sock
*save_sk
= skb
->sk
;
195 __skb_push(skb
, offset
);
196 ret
= bpf_prog_run_save_cb(prog
, skb
) == 1 ? 0 : -EPERM
;
197 __skb_pull(skb
, offset
);
205 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb
);
208 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
209 * @sk: sock structure to manipulate
210 * @type: The type of program to be exectuted
212 * socket is passed is expected to be of type INET or INET6.
214 * The program type passed in via @type must be suitable for sock
215 * filtering. No further check is performed to assert that.
217 * This function will return %-EPERM if any if an attached program was found
218 * and if it returned != 1 during execution. In all other cases, 0 is returned.
220 int __cgroup_bpf_run_filter_sk(struct sock
*sk
,
221 enum bpf_attach_type type
)
223 struct cgroup
*cgrp
= sock_cgroup_ptr(&sk
->sk_cgrp_data
);
224 struct bpf_prog
*prog
;
230 prog
= rcu_dereference(cgrp
->bpf
.effective
[type
]);
232 ret
= BPF_PROG_RUN(prog
, sk
) == 1 ? 0 : -EPERM
;
238 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk
);