xref: /openbmc/linux/mm/util.c (revision afb46f79)
1 #include <linux/mm.h>
2 #include <linux/slab.h>
3 #include <linux/string.h>
4 #include <linux/compiler.h>
5 #include <linux/export.h>
6 #include <linux/err.h>
7 #include <linux/sched.h>
8 #include <linux/security.h>
9 #include <linux/swap.h>
10 #include <linux/swapops.h>
11 #include <linux/mman.h>
12 #include <linux/hugetlb.h>
13 
14 #include <asm/uaccess.h>
15 
16 #include "internal.h"
17 
18 #define CREATE_TRACE_POINTS
19 #include <trace/events/kmem.h>
20 
21 /**
22  * kstrdup - allocate space for and copy an existing string
23  * @s: the string to duplicate
24  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
25  */
26 char *kstrdup(const char *s, gfp_t gfp)
27 {
28 	size_t len;
29 	char *buf;
30 
31 	if (!s)
32 		return NULL;
33 
34 	len = strlen(s) + 1;
35 	buf = kmalloc_track_caller(len, gfp);
36 	if (buf)
37 		memcpy(buf, s, len);
38 	return buf;
39 }
40 EXPORT_SYMBOL(kstrdup);
41 
42 /**
43  * kstrndup - allocate space for and copy an existing string
44  * @s: the string to duplicate
45  * @max: read at most @max chars from @s
46  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
47  */
48 char *kstrndup(const char *s, size_t max, gfp_t gfp)
49 {
50 	size_t len;
51 	char *buf;
52 
53 	if (!s)
54 		return NULL;
55 
56 	len = strnlen(s, max);
57 	buf = kmalloc_track_caller(len+1, gfp);
58 	if (buf) {
59 		memcpy(buf, s, len);
60 		buf[len] = '\0';
61 	}
62 	return buf;
63 }
64 EXPORT_SYMBOL(kstrndup);
65 
66 /**
67  * kmemdup - duplicate region of memory
68  *
69  * @src: memory region to duplicate
70  * @len: memory region length
71  * @gfp: GFP mask to use
72  */
73 void *kmemdup(const void *src, size_t len, gfp_t gfp)
74 {
75 	void *p;
76 
77 	p = kmalloc_track_caller(len, gfp);
78 	if (p)
79 		memcpy(p, src, len);
80 	return p;
81 }
82 EXPORT_SYMBOL(kmemdup);
83 
84 /**
85  * memdup_user - duplicate memory region from user space
86  *
87  * @src: source address in user space
88  * @len: number of bytes to copy
89  *
90  * Returns an ERR_PTR() on failure.
91  */
92 void *memdup_user(const void __user *src, size_t len)
93 {
94 	void *p;
95 
96 	/*
97 	 * Always use GFP_KERNEL, since copy_from_user() can sleep and
98 	 * cause pagefault, which makes it pointless to use GFP_NOFS
99 	 * or GFP_ATOMIC.
100 	 */
101 	p = kmalloc_track_caller(len, GFP_KERNEL);
102 	if (!p)
103 		return ERR_PTR(-ENOMEM);
104 
105 	if (copy_from_user(p, src, len)) {
106 		kfree(p);
107 		return ERR_PTR(-EFAULT);
108 	}
109 
110 	return p;
111 }
112 EXPORT_SYMBOL(memdup_user);
113 
114 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
115 					   gfp_t flags)
116 {
117 	void *ret;
118 	size_t ks = 0;
119 
120 	if (p)
121 		ks = ksize(p);
122 
123 	if (ks >= new_size)
124 		return (void *)p;
125 
126 	ret = kmalloc_track_caller(new_size, flags);
127 	if (ret && p)
128 		memcpy(ret, p, ks);
129 
130 	return ret;
131 }
132 
133 /**
134  * __krealloc - like krealloc() but don't free @p.
135  * @p: object to reallocate memory for.
136  * @new_size: how many bytes of memory are required.
137  * @flags: the type of memory to allocate.
138  *
139  * This function is like krealloc() except it never frees the originally
140  * allocated buffer. Use this if you don't want to free the buffer immediately
141  * like, for example, with RCU.
142  */
143 void *__krealloc(const void *p, size_t new_size, gfp_t flags)
144 {
145 	if (unlikely(!new_size))
146 		return ZERO_SIZE_PTR;
147 
148 	return __do_krealloc(p, new_size, flags);
149 
150 }
151 EXPORT_SYMBOL(__krealloc);
152 
153 /**
154  * krealloc - reallocate memory. The contents will remain unchanged.
155  * @p: object to reallocate memory for.
156  * @new_size: how many bytes of memory are required.
157  * @flags: the type of memory to allocate.
158  *
159  * The contents of the object pointed to are preserved up to the
160  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
161  * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
162  * %NULL pointer, the object pointed to is freed.
163  */
164 void *krealloc(const void *p, size_t new_size, gfp_t flags)
165 {
166 	void *ret;
167 
168 	if (unlikely(!new_size)) {
169 		kfree(p);
170 		return ZERO_SIZE_PTR;
171 	}
172 
173 	ret = __do_krealloc(p, new_size, flags);
174 	if (ret && p != ret)
175 		kfree(p);
176 
177 	return ret;
178 }
179 EXPORT_SYMBOL(krealloc);
180 
181 /**
182  * kzfree - like kfree but zero memory
183  * @p: object to free memory of
184  *
185  * The memory of the object @p points to is zeroed before freed.
186  * If @p is %NULL, kzfree() does nothing.
187  *
188  * Note: this function zeroes the whole allocated buffer which can be a good
189  * deal bigger than the requested buffer size passed to kmalloc(). So be
190  * careful when using this function in performance sensitive code.
191  */
192 void kzfree(const void *p)
193 {
194 	size_t ks;
195 	void *mem = (void *)p;
196 
197 	if (unlikely(ZERO_OR_NULL_PTR(mem)))
198 		return;
199 	ks = ksize(mem);
200 	memset(mem, 0, ks);
201 	kfree(mem);
202 }
203 EXPORT_SYMBOL(kzfree);
204 
205 /*
206  * strndup_user - duplicate an existing string from user space
207  * @s: The string to duplicate
208  * @n: Maximum number of bytes to copy, including the trailing NUL.
209  */
210 char *strndup_user(const char __user *s, long n)
211 {
212 	char *p;
213 	long length;
214 
215 	length = strnlen_user(s, n);
216 
217 	if (!length)
218 		return ERR_PTR(-EFAULT);
219 
220 	if (length > n)
221 		return ERR_PTR(-EINVAL);
222 
223 	p = memdup_user(s, length);
224 
225 	if (IS_ERR(p))
226 		return p;
227 
228 	p[length - 1] = '\0';
229 
230 	return p;
231 }
232 EXPORT_SYMBOL(strndup_user);
233 
234 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
235 		struct vm_area_struct *prev, struct rb_node *rb_parent)
236 {
237 	struct vm_area_struct *next;
238 
239 	vma->vm_prev = prev;
240 	if (prev) {
241 		next = prev->vm_next;
242 		prev->vm_next = vma;
243 	} else {
244 		mm->mmap = vma;
245 		if (rb_parent)
246 			next = rb_entry(rb_parent,
247 					struct vm_area_struct, vm_rb);
248 		else
249 			next = NULL;
250 	}
251 	vma->vm_next = next;
252 	if (next)
253 		next->vm_prev = vma;
254 }
255 
256 /* Check if the vma is being used as a stack by this task */
257 static int vm_is_stack_for_task(struct task_struct *t,
258 				struct vm_area_struct *vma)
259 {
260 	return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
261 }
262 
263 /*
264  * Check if the vma is being used as a stack.
265  * If is_group is non-zero, check in the entire thread group or else
266  * just check in the current task. Returns the pid of the task that
267  * the vma is stack for.
268  */
269 pid_t vm_is_stack(struct task_struct *task,
270 		  struct vm_area_struct *vma, int in_group)
271 {
272 	pid_t ret = 0;
273 
274 	if (vm_is_stack_for_task(task, vma))
275 		return task->pid;
276 
277 	if (in_group) {
278 		struct task_struct *t;
279 		rcu_read_lock();
280 		if (!pid_alive(task))
281 			goto done;
282 
283 		t = task;
284 		do {
285 			if (vm_is_stack_for_task(t, vma)) {
286 				ret = t->pid;
287 				goto done;
288 			}
289 		} while_each_thread(task, t);
290 done:
291 		rcu_read_unlock();
292 	}
293 
294 	return ret;
295 }
296 
297 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
298 void arch_pick_mmap_layout(struct mm_struct *mm)
299 {
300 	mm->mmap_base = TASK_UNMAPPED_BASE;
301 	mm->get_unmapped_area = arch_get_unmapped_area;
302 }
303 #endif
304 
305 /*
306  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
307  * back to the regular GUP.
308  * If the architecture not support this function, simply return with no
309  * page pinned
310  */
311 int __weak __get_user_pages_fast(unsigned long start,
312 				 int nr_pages, int write, struct page **pages)
313 {
314 	return 0;
315 }
316 EXPORT_SYMBOL_GPL(__get_user_pages_fast);
317 
318 /**
319  * get_user_pages_fast() - pin user pages in memory
320  * @start:	starting user address
321  * @nr_pages:	number of pages from start to pin
322  * @write:	whether pages will be written to
323  * @pages:	array that receives pointers to the pages pinned.
324  *		Should be at least nr_pages long.
325  *
326  * Returns number of pages pinned. This may be fewer than the number
327  * requested. If nr_pages is 0 or negative, returns 0. If no pages
328  * were pinned, returns -errno.
329  *
330  * get_user_pages_fast provides equivalent functionality to get_user_pages,
331  * operating on current and current->mm, with force=0 and vma=NULL. However
332  * unlike get_user_pages, it must be called without mmap_sem held.
333  *
334  * get_user_pages_fast may take mmap_sem and page table locks, so no
335  * assumptions can be made about lack of locking. get_user_pages_fast is to be
336  * implemented in a way that is advantageous (vs get_user_pages()) when the
337  * user memory area is already faulted in and present in ptes. However if the
338  * pages have to be faulted in, it may turn out to be slightly slower so
339  * callers need to carefully consider what to use. On many architectures,
340  * get_user_pages_fast simply falls back to get_user_pages.
341  */
342 int __weak get_user_pages_fast(unsigned long start,
343 				int nr_pages, int write, struct page **pages)
344 {
345 	struct mm_struct *mm = current->mm;
346 	int ret;
347 
348 	down_read(&mm->mmap_sem);
349 	ret = get_user_pages(current, mm, start, nr_pages,
350 					write, 0, pages, NULL);
351 	up_read(&mm->mmap_sem);
352 
353 	return ret;
354 }
355 EXPORT_SYMBOL_GPL(get_user_pages_fast);
356 
357 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
358 	unsigned long len, unsigned long prot,
359 	unsigned long flag, unsigned long pgoff)
360 {
361 	unsigned long ret;
362 	struct mm_struct *mm = current->mm;
363 	unsigned long populate;
364 
365 	ret = security_mmap_file(file, prot, flag);
366 	if (!ret) {
367 		down_write(&mm->mmap_sem);
368 		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
369 				    &populate);
370 		up_write(&mm->mmap_sem);
371 		if (populate)
372 			mm_populate(ret, populate);
373 	}
374 	return ret;
375 }
376 
377 unsigned long vm_mmap(struct file *file, unsigned long addr,
378 	unsigned long len, unsigned long prot,
379 	unsigned long flag, unsigned long offset)
380 {
381 	if (unlikely(offset + PAGE_ALIGN(len) < offset))
382 		return -EINVAL;
383 	if (unlikely(offset & ~PAGE_MASK))
384 		return -EINVAL;
385 
386 	return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
387 }
388 EXPORT_SYMBOL(vm_mmap);
389 
390 struct address_space *page_mapping(struct page *page)
391 {
392 	struct address_space *mapping = page->mapping;
393 
394 	/* This happens if someone calls flush_dcache_page on slab page */
395 	if (unlikely(PageSlab(page)))
396 		return NULL;
397 
398 	if (unlikely(PageSwapCache(page))) {
399 		swp_entry_t entry;
400 
401 		entry.val = page_private(page);
402 		mapping = swap_address_space(entry);
403 	} else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
404 		mapping = NULL;
405 	return mapping;
406 }
407 
408 int overcommit_ratio_handler(struct ctl_table *table, int write,
409 			     void __user *buffer, size_t *lenp,
410 			     loff_t *ppos)
411 {
412 	int ret;
413 
414 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
415 	if (ret == 0 && write)
416 		sysctl_overcommit_kbytes = 0;
417 	return ret;
418 }
419 
420 int overcommit_kbytes_handler(struct ctl_table *table, int write,
421 			     void __user *buffer, size_t *lenp,
422 			     loff_t *ppos)
423 {
424 	int ret;
425 
426 	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
427 	if (ret == 0 && write)
428 		sysctl_overcommit_ratio = 0;
429 	return ret;
430 }
431 
432 /*
433  * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
434  */
435 unsigned long vm_commit_limit(void)
436 {
437 	unsigned long allowed;
438 
439 	if (sysctl_overcommit_kbytes)
440 		allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
441 	else
442 		allowed = ((totalram_pages - hugetlb_total_pages())
443 			   * sysctl_overcommit_ratio / 100);
444 	allowed += total_swap_pages;
445 
446 	return allowed;
447 }
448 
449 /**
450  * get_cmdline() - copy the cmdline value to a buffer.
451  * @task:     the task whose cmdline value to copy.
452  * @buffer:   the buffer to copy to.
453  * @buflen:   the length of the buffer. Larger cmdline values are truncated
454  *            to this length.
455  * Returns the size of the cmdline field copied. Note that the copy does
456  * not guarantee an ending NULL byte.
457  */
458 int get_cmdline(struct task_struct *task, char *buffer, int buflen)
459 {
460 	int res = 0;
461 	unsigned int len;
462 	struct mm_struct *mm = get_task_mm(task);
463 	if (!mm)
464 		goto out;
465 	if (!mm->arg_end)
466 		goto out_mm;	/* Shh! No looking before we're done */
467 
468 	len = mm->arg_end - mm->arg_start;
469 
470 	if (len > buflen)
471 		len = buflen;
472 
473 	res = access_process_vm(task, mm->arg_start, buffer, len, 0);
474 
475 	/*
476 	 * If the nul at the end of args has been overwritten, then
477 	 * assume application is using setproctitle(3).
478 	 */
479 	if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
480 		len = strnlen(buffer, res);
481 		if (len < res) {
482 			res = len;
483 		} else {
484 			len = mm->env_end - mm->env_start;
485 			if (len > buflen - res)
486 				len = buflen - res;
487 			res += access_process_vm(task, mm->env_start,
488 						 buffer+res, len, 0);
489 			res = strnlen(buffer, res);
490 		}
491 	}
492 out_mm:
493 	mmput(mm);
494 out:
495 	return res;
496 }
497 
498 /* Tracepoints definitions. */
499 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
500 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
501 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
502 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
503 EXPORT_TRACEPOINT_SYMBOL(kfree);
504 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
505