1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #include <linux/stat.h>
4 #include <linux/sysctl.h>
5 #include <linux/slab.h>
6 #include <linux/cred.h>
7 #include <linux/hash.h>
8 #include <linux/kmemleak.h>
9 #include <linux/user_namespace.h>
10
11 struct ucounts init_ucounts = {
12 .ns = &init_user_ns,
13 .uid = GLOBAL_ROOT_UID,
14 .count = ATOMIC_INIT(1),
15 };
16
17 #define UCOUNTS_HASHTABLE_BITS 10
18 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
19 static DEFINE_SPINLOCK(ucounts_lock);
20
21 #define ucounts_hashfn(ns, uid) \
22 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
23 UCOUNTS_HASHTABLE_BITS)
24 #define ucounts_hashentry(ns, uid) \
25 (ucounts_hashtable + ucounts_hashfn(ns, uid))
26
27
28 #ifdef CONFIG_SYSCTL
29 static struct ctl_table_set *
set_lookup(struct ctl_table_root * root)30 set_lookup(struct ctl_table_root *root)
31 {
32 return ¤t_user_ns()->set;
33 }
34
set_is_seen(struct ctl_table_set * set)35 static int set_is_seen(struct ctl_table_set *set)
36 {
37 return ¤t_user_ns()->set == set;
38 }
39
set_permissions(struct ctl_table_header * head,struct ctl_table * table)40 static int set_permissions(struct ctl_table_header *head,
41 struct ctl_table *table)
42 {
43 struct user_namespace *user_ns =
44 container_of(head->set, struct user_namespace, set);
45 int mode;
46
47 /* Allow users with CAP_SYS_RESOURCE unrestrained access */
48 if (ns_capable(user_ns, CAP_SYS_RESOURCE))
49 mode = (table->mode & S_IRWXU) >> 6;
50 else
51 /* Allow all others at most read-only access */
52 mode = table->mode & S_IROTH;
53 return (mode << 6) | (mode << 3) | mode;
54 }
55
56 static struct ctl_table_root set_root = {
57 .lookup = set_lookup,
58 .permissions = set_permissions,
59 };
60
61 static long ue_zero = 0;
62 static long ue_int_max = INT_MAX;
63
64 #define UCOUNT_ENTRY(name) \
65 { \
66 .procname = name, \
67 .maxlen = sizeof(long), \
68 .mode = 0644, \
69 .proc_handler = proc_doulongvec_minmax, \
70 .extra1 = &ue_zero, \
71 .extra2 = &ue_int_max, \
72 }
73 static struct ctl_table user_table[] = {
74 UCOUNT_ENTRY("max_user_namespaces"),
75 UCOUNT_ENTRY("max_pid_namespaces"),
76 UCOUNT_ENTRY("max_uts_namespaces"),
77 UCOUNT_ENTRY("max_ipc_namespaces"),
78 UCOUNT_ENTRY("max_net_namespaces"),
79 UCOUNT_ENTRY("max_mnt_namespaces"),
80 UCOUNT_ENTRY("max_cgroup_namespaces"),
81 UCOUNT_ENTRY("max_time_namespaces"),
82 #ifdef CONFIG_INOTIFY_USER
83 UCOUNT_ENTRY("max_inotify_instances"),
84 UCOUNT_ENTRY("max_inotify_watches"),
85 #endif
86 #ifdef CONFIG_FANOTIFY
87 UCOUNT_ENTRY("max_fanotify_groups"),
88 UCOUNT_ENTRY("max_fanotify_marks"),
89 #endif
90 { },
91 { },
92 { },
93 { },
94 { }
95 };
96 #endif /* CONFIG_SYSCTL */
97
setup_userns_sysctls(struct user_namespace * ns)98 bool setup_userns_sysctls(struct user_namespace *ns)
99 {
100 #ifdef CONFIG_SYSCTL
101 struct ctl_table *tbl;
102
103 BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
104 setup_sysctl_set(&ns->set, &set_root, set_is_seen);
105 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
106 if (tbl) {
107 int i;
108 for (i = 0; i < UCOUNT_COUNTS; i++) {
109 tbl[i].data = &ns->ucount_max[i];
110 }
111 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
112 }
113 if (!ns->sysctls) {
114 kfree(tbl);
115 retire_sysctl_set(&ns->set);
116 return false;
117 }
118 #endif
119 return true;
120 }
121
retire_userns_sysctls(struct user_namespace * ns)122 void retire_userns_sysctls(struct user_namespace *ns)
123 {
124 #ifdef CONFIG_SYSCTL
125 struct ctl_table *tbl;
126
127 tbl = ns->sysctls->ctl_table_arg;
128 unregister_sysctl_table(ns->sysctls);
129 retire_sysctl_set(&ns->set);
130 kfree(tbl);
131 #endif
132 }
133
find_ucounts(struct user_namespace * ns,kuid_t uid,struct hlist_head * hashent)134 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
135 {
136 struct ucounts *ucounts;
137
138 hlist_for_each_entry(ucounts, hashent, node) {
139 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
140 return ucounts;
141 }
142 return NULL;
143 }
144
hlist_add_ucounts(struct ucounts * ucounts)145 static void hlist_add_ucounts(struct ucounts *ucounts)
146 {
147 struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
148 spin_lock_irq(&ucounts_lock);
149 hlist_add_head(&ucounts->node, hashent);
150 spin_unlock_irq(&ucounts_lock);
151 }
152
get_ucounts_or_wrap(struct ucounts * ucounts)153 static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
154 {
155 /* Returns true on a successful get, false if the count wraps. */
156 return !atomic_add_negative(1, &ucounts->count);
157 }
158
get_ucounts(struct ucounts * ucounts)159 struct ucounts *get_ucounts(struct ucounts *ucounts)
160 {
161 if (!get_ucounts_or_wrap(ucounts)) {
162 put_ucounts(ucounts);
163 ucounts = NULL;
164 }
165 return ucounts;
166 }
167
alloc_ucounts(struct user_namespace * ns,kuid_t uid)168 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
169 {
170 struct hlist_head *hashent = ucounts_hashentry(ns, uid);
171 struct ucounts *ucounts, *new;
172 bool wrapped;
173
174 spin_lock_irq(&ucounts_lock);
175 ucounts = find_ucounts(ns, uid, hashent);
176 if (!ucounts) {
177 spin_unlock_irq(&ucounts_lock);
178
179 new = kzalloc(sizeof(*new), GFP_KERNEL);
180 if (!new)
181 return NULL;
182
183 new->ns = ns;
184 new->uid = uid;
185 atomic_set(&new->count, 1);
186
187 spin_lock_irq(&ucounts_lock);
188 ucounts = find_ucounts(ns, uid, hashent);
189 if (ucounts) {
190 kfree(new);
191 } else {
192 hlist_add_head(&new->node, hashent);
193 spin_unlock_irq(&ucounts_lock);
194 return new;
195 }
196 }
197 wrapped = !get_ucounts_or_wrap(ucounts);
198 spin_unlock_irq(&ucounts_lock);
199 if (wrapped) {
200 put_ucounts(ucounts);
201 return NULL;
202 }
203 return ucounts;
204 }
205
put_ucounts(struct ucounts * ucounts)206 void put_ucounts(struct ucounts *ucounts)
207 {
208 unsigned long flags;
209
210 if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
211 hlist_del_init(&ucounts->node);
212 spin_unlock_irqrestore(&ucounts_lock, flags);
213 kfree(ucounts);
214 }
215 }
216
atomic_long_inc_below(atomic_long_t * v,int u)217 static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
218 {
219 long c, old;
220 c = atomic_long_read(v);
221 for (;;) {
222 if (unlikely(c >= u))
223 return false;
224 old = atomic_long_cmpxchg(v, c, c+1);
225 if (likely(old == c))
226 return true;
227 c = old;
228 }
229 }
230
inc_ucount(struct user_namespace * ns,kuid_t uid,enum ucount_type type)231 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
232 enum ucount_type type)
233 {
234 struct ucounts *ucounts, *iter, *bad;
235 struct user_namespace *tns;
236 ucounts = alloc_ucounts(ns, uid);
237 for (iter = ucounts; iter; iter = tns->ucounts) {
238 long max;
239 tns = iter->ns;
240 max = READ_ONCE(tns->ucount_max[type]);
241 if (!atomic_long_inc_below(&iter->ucount[type], max))
242 goto fail;
243 }
244 return ucounts;
245 fail:
246 bad = iter;
247 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
248 atomic_long_dec(&iter->ucount[type]);
249
250 put_ucounts(ucounts);
251 return NULL;
252 }
253
dec_ucount(struct ucounts * ucounts,enum ucount_type type)254 void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
255 {
256 struct ucounts *iter;
257 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
258 long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
259 WARN_ON_ONCE(dec < 0);
260 }
261 put_ucounts(ucounts);
262 }
263
inc_rlimit_ucounts(struct ucounts * ucounts,enum ucount_type type,long v)264 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
265 {
266 struct ucounts *iter;
267 long max = LONG_MAX;
268 long ret = 0;
269
270 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
271 long new = atomic_long_add_return(v, &iter->ucount[type]);
272 if (new < 0 || new > max)
273 ret = LONG_MAX;
274 else if (iter == ucounts)
275 ret = new;
276 max = READ_ONCE(iter->ns->ucount_max[type]);
277 }
278 return ret;
279 }
280
dec_rlimit_ucounts(struct ucounts * ucounts,enum ucount_type type,long v)281 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
282 {
283 struct ucounts *iter;
284 long new = -1; /* Silence compiler warning */
285 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
286 long dec = atomic_long_sub_return(v, &iter->ucount[type]);
287 WARN_ON_ONCE(dec < 0);
288 if (iter == ucounts)
289 new = dec;
290 }
291 return (new == 0);
292 }
293
do_dec_rlimit_put_ucounts(struct ucounts * ucounts,struct ucounts * last,enum ucount_type type)294 static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
295 struct ucounts *last, enum ucount_type type)
296 {
297 struct ucounts *iter, *next;
298 for (iter = ucounts; iter != last; iter = next) {
299 long dec = atomic_long_sub_return(1, &iter->ucount[type]);
300 WARN_ON_ONCE(dec < 0);
301 next = iter->ns->ucounts;
302 if (dec == 0)
303 put_ucounts(iter);
304 }
305 }
306
dec_rlimit_put_ucounts(struct ucounts * ucounts,enum ucount_type type)307 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
308 {
309 do_dec_rlimit_put_ucounts(ucounts, NULL, type);
310 }
311
inc_rlimit_get_ucounts(struct ucounts * ucounts,enum ucount_type type)312 long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
313 {
314 /* Caller must hold a reference to ucounts */
315 struct ucounts *iter;
316 long max = LONG_MAX;
317 long dec, ret = 0;
318
319 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
320 long new = atomic_long_add_return(1, &iter->ucount[type]);
321 if (new < 0 || new > max)
322 goto unwind;
323 if (iter == ucounts)
324 ret = new;
325 max = READ_ONCE(iter->ns->ucount_max[type]);
326 /*
327 * Grab an extra ucount reference for the caller when
328 * the rlimit count was previously 0.
329 */
330 if (new != 1)
331 continue;
332 if (!get_ucounts(iter))
333 goto dec_unwind;
334 }
335 return ret;
336 dec_unwind:
337 dec = atomic_long_sub_return(1, &iter->ucount[type]);
338 WARN_ON_ONCE(dec < 0);
339 unwind:
340 do_dec_rlimit_put_ucounts(ucounts, iter, type);
341 return 0;
342 }
343
is_ucounts_overlimit(struct ucounts * ucounts,enum ucount_type type,unsigned long rlimit)344 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit)
345 {
346 struct ucounts *iter;
347 long max = rlimit;
348 if (rlimit > LONG_MAX)
349 max = LONG_MAX;
350 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
351 if (get_ucounts_value(iter, type) > max)
352 return true;
353 max = READ_ONCE(iter->ns->ucount_max[type]);
354 }
355 return false;
356 }
357
user_namespace_sysctl_init(void)358 static __init int user_namespace_sysctl_init(void)
359 {
360 #ifdef CONFIG_SYSCTL
361 static struct ctl_table_header *user_header;
362 static struct ctl_table empty[1];
363 /*
364 * It is necessary to register the user directory in the
365 * default set so that registrations in the child sets work
366 * properly.
367 */
368 user_header = register_sysctl("user", empty);
369 kmemleak_ignore(user_header);
370 BUG_ON(!user_header);
371 BUG_ON(!setup_userns_sysctls(&init_user_ns));
372 #endif
373 hlist_add_ucounts(&init_ucounts);
374 inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
375 return 0;
376 }
377 subsys_initcall(user_namespace_sysctl_init);
378