mm: rename mem_cgroup_migrate to mem_cgroup_replace_page
[deliverable/linux.git] / fs / binfmt_elf.c
1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <asm/uaccess.h>
39 #include <asm/param.h>
40 #include <asm/page.h>
41
42 #ifndef user_long_t
43 #define user_long_t long
44 #endif
45 #ifndef user_siginfo_t
46 #define user_siginfo_t siginfo_t
47 #endif
48
49 static int load_elf_binary(struct linux_binprm *bprm);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 int, int, unsigned long);
52
53 #ifdef CONFIG_USELIB
54 static int load_elf_library(struct file *);
55 #else
56 #define load_elf_library NULL
57 #endif
58
59 /*
60 * If we don't support core dumping, then supply a NULL so we
61 * don't even try.
62 */
63 #ifdef CONFIG_ELF_CORE
64 static int elf_core_dump(struct coredump_params *cprm);
65 #else
66 #define elf_core_dump NULL
67 #endif
68
69 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
70 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
71 #else
72 #define ELF_MIN_ALIGN PAGE_SIZE
73 #endif
74
75 #ifndef ELF_CORE_EFLAGS
76 #define ELF_CORE_EFLAGS 0
77 #endif
78
79 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
80 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
81 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
82
83 static struct linux_binfmt elf_format = {
84 .module = THIS_MODULE,
85 .load_binary = load_elf_binary,
86 .load_shlib = load_elf_library,
87 .core_dump = elf_core_dump,
88 .min_coredump = ELF_EXEC_PAGESIZE,
89 };
90
91 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
92
93 static int set_brk(unsigned long start, unsigned long end)
94 {
95 start = ELF_PAGEALIGN(start);
96 end = ELF_PAGEALIGN(end);
97 if (end > start) {
98 unsigned long addr;
99 addr = vm_brk(start, end - start);
100 if (BAD_ADDR(addr))
101 return addr;
102 }
103 current->mm->start_brk = current->mm->brk = end;
104 return 0;
105 }
106
107 /* We need to explicitly zero any fractional pages
108 after the data section (i.e. bss). This would
109 contain the junk from the file that should not
110 be in memory
111 */
112 static int padzero(unsigned long elf_bss)
113 {
114 unsigned long nbyte;
115
116 nbyte = ELF_PAGEOFFSET(elf_bss);
117 if (nbyte) {
118 nbyte = ELF_MIN_ALIGN - nbyte;
119 if (clear_user((void __user *) elf_bss, nbyte))
120 return -EFAULT;
121 }
122 return 0;
123 }
124
125 /* Let's use some macros to make this stack manipulation a little clearer */
126 #ifdef CONFIG_STACK_GROWSUP
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
128 #define STACK_ROUND(sp, items) \
129 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ \
131 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
132 old_sp; })
133 #else
134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135 #define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138 #endif
139
140 #ifndef ELF_BASE_PLATFORM
141 /*
142 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
143 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
144 * will be copied to the user stack in the same manner as AT_PLATFORM.
145 */
146 #define ELF_BASE_PLATFORM NULL
147 #endif
148
149 static int
150 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
151 unsigned long load_addr, unsigned long interp_load_addr)
152 {
153 unsigned long p = bprm->p;
154 int argc = bprm->argc;
155 int envc = bprm->envc;
156 elf_addr_t __user *argv;
157 elf_addr_t __user *envp;
158 elf_addr_t __user *sp;
159 elf_addr_t __user *u_platform;
160 elf_addr_t __user *u_base_platform;
161 elf_addr_t __user *u_rand_bytes;
162 const char *k_platform = ELF_PLATFORM;
163 const char *k_base_platform = ELF_BASE_PLATFORM;
164 unsigned char k_rand_bytes[16];
165 int items;
166 elf_addr_t *elf_info;
167 int ei_index = 0;
168 const struct cred *cred = current_cred();
169 struct vm_area_struct *vma;
170
171 /*
172 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173 * evictions by the processes running on the same package. One
174 * thing we can do is to shuffle the initial stack for them.
175 */
176
177 p = arch_align_stack(p);
178
179 /*
180 * If this architecture has a platform capability string, copy it
181 * to userspace. In some cases (Sparc), this info is impossible
182 * for userspace to get any other way, in others (i386) it is
183 * merely difficult.
184 */
185 u_platform = NULL;
186 if (k_platform) {
187 size_t len = strlen(k_platform) + 1;
188
189 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190 if (__copy_to_user(u_platform, k_platform, len))
191 return -EFAULT;
192 }
193
194 /*
195 * If this architecture has a "base" platform capability
196 * string, copy it to userspace.
197 */
198 u_base_platform = NULL;
199 if (k_base_platform) {
200 size_t len = strlen(k_base_platform) + 1;
201
202 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203 if (__copy_to_user(u_base_platform, k_base_platform, len))
204 return -EFAULT;
205 }
206
207 /*
208 * Generate 16 random bytes for userspace PRNG seeding.
209 */
210 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211 u_rand_bytes = (elf_addr_t __user *)
212 STACK_ALLOC(p, sizeof(k_rand_bytes));
213 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
214 return -EFAULT;
215
216 /* Create the ELF interpreter info */
217 elf_info = (elf_addr_t *)current->mm->saved_auxv;
218 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
219 #define NEW_AUX_ENT(id, val) \
220 do { \
221 elf_info[ei_index++] = id; \
222 elf_info[ei_index++] = val; \
223 } while (0)
224
225 #ifdef ARCH_DLINFO
226 /*
227 * ARCH_DLINFO must come first so PPC can do its special alignment of
228 * AUXV.
229 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230 * ARCH_DLINFO changes
231 */
232 ARCH_DLINFO;
233 #endif
234 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
237 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
238 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
239 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240 NEW_AUX_ENT(AT_BASE, interp_load_addr);
241 NEW_AUX_ENT(AT_FLAGS, 0);
242 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
243 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
244 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
245 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
246 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
247 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
248 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
249 #ifdef ELF_HWCAP2
250 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
251 #endif
252 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
253 if (k_platform) {
254 NEW_AUX_ENT(AT_PLATFORM,
255 (elf_addr_t)(unsigned long)u_platform);
256 }
257 if (k_base_platform) {
258 NEW_AUX_ENT(AT_BASE_PLATFORM,
259 (elf_addr_t)(unsigned long)u_base_platform);
260 }
261 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
262 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
263 }
264 #undef NEW_AUX_ENT
265 /* AT_NULL is zero; clear the rest too */
266 memset(&elf_info[ei_index], 0,
267 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
268
269 /* And advance past the AT_NULL entry. */
270 ei_index += 2;
271
272 sp = STACK_ADD(p, ei_index);
273
274 items = (argc + 1) + (envc + 1) + 1;
275 bprm->p = STACK_ROUND(sp, items);
276
277 /* Point sp at the lowest address on the stack */
278 #ifdef CONFIG_STACK_GROWSUP
279 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
280 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
281 #else
282 sp = (elf_addr_t __user *)bprm->p;
283 #endif
284
285
286 /*
287 * Grow the stack manually; some architectures have a limit on how
288 * far ahead a user-space access may be in order to grow the stack.
289 */
290 vma = find_extend_vma(current->mm, bprm->p);
291 if (!vma)
292 return -EFAULT;
293
294 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
295 if (__put_user(argc, sp++))
296 return -EFAULT;
297 argv = sp;
298 envp = argv + argc + 1;
299
300 /* Populate argv and envp */
301 p = current->mm->arg_end = current->mm->arg_start;
302 while (argc-- > 0) {
303 size_t len;
304 if (__put_user((elf_addr_t)p, argv++))
305 return -EFAULT;
306 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
307 if (!len || len > MAX_ARG_STRLEN)
308 return -EINVAL;
309 p += len;
310 }
311 if (__put_user(0, argv))
312 return -EFAULT;
313 current->mm->arg_end = current->mm->env_start = p;
314 while (envc-- > 0) {
315 size_t len;
316 if (__put_user((elf_addr_t)p, envp++))
317 return -EFAULT;
318 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
319 if (!len || len > MAX_ARG_STRLEN)
320 return -EINVAL;
321 p += len;
322 }
323 if (__put_user(0, envp))
324 return -EFAULT;
325 current->mm->env_end = p;
326
327 /* Put the elf_info on the stack in the right place. */
328 sp = (elf_addr_t __user *)envp + 1;
329 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
330 return -EFAULT;
331 return 0;
332 }
333
334 #ifndef elf_map
335
336 static unsigned long elf_map(struct file *filep, unsigned long addr,
337 struct elf_phdr *eppnt, int prot, int type,
338 unsigned long total_size)
339 {
340 unsigned long map_addr;
341 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
342 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
343 addr = ELF_PAGESTART(addr);
344 size = ELF_PAGEALIGN(size);
345
346 /* mmap() will return -EINVAL if given a zero size, but a
347 * segment with zero filesize is perfectly valid */
348 if (!size)
349 return addr;
350
351 /*
352 * total_size is the size of the ELF (interpreter) image.
353 * The _first_ mmap needs to know the full size, otherwise
354 * randomization might put this image into an overlapping
355 * position with the ELF binary image. (since size < total_size)
356 * So we first map the 'big' image - and unmap the remainder at
357 * the end. (which unmap is needed for ELF images with holes.)
358 */
359 if (total_size) {
360 total_size = ELF_PAGEALIGN(total_size);
361 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
362 if (!BAD_ADDR(map_addr))
363 vm_munmap(map_addr+size, total_size-size);
364 } else
365 map_addr = vm_mmap(filep, addr, size, prot, type, off);
366
367 return(map_addr);
368 }
369
370 #endif /* !elf_map */
371
372 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
373 {
374 int i, first_idx = -1, last_idx = -1;
375
376 for (i = 0; i < nr; i++) {
377 if (cmds[i].p_type == PT_LOAD) {
378 last_idx = i;
379 if (first_idx == -1)
380 first_idx = i;
381 }
382 }
383 if (first_idx == -1)
384 return 0;
385
386 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
387 ELF_PAGESTART(cmds[first_idx].p_vaddr);
388 }
389
390 /**
391 * load_elf_phdrs() - load ELF program headers
392 * @elf_ex: ELF header of the binary whose program headers should be loaded
393 * @elf_file: the opened ELF binary file
394 *
395 * Loads ELF program headers from the binary file elf_file, which has the ELF
396 * header pointed to by elf_ex, into a newly allocated array. The caller is
397 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
398 */
399 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
400 struct file *elf_file)
401 {
402 struct elf_phdr *elf_phdata = NULL;
403 int retval, size, err = -1;
404
405 /*
406 * If the size of this structure has changed, then punt, since
407 * we will be doing the wrong thing.
408 */
409 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
410 goto out;
411
412 /* Sanity check the number of program headers... */
413 if (elf_ex->e_phnum < 1 ||
414 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
415 goto out;
416
417 /* ...and their total size. */
418 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
419 if (size > ELF_MIN_ALIGN)
420 goto out;
421
422 elf_phdata = kmalloc(size, GFP_KERNEL);
423 if (!elf_phdata)
424 goto out;
425
426 /* Read in the program headers */
427 retval = kernel_read(elf_file, elf_ex->e_phoff,
428 (char *)elf_phdata, size);
429 if (retval != size) {
430 err = (retval < 0) ? retval : -EIO;
431 goto out;
432 }
433
434 /* Success! */
435 err = 0;
436 out:
437 if (err) {
438 kfree(elf_phdata);
439 elf_phdata = NULL;
440 }
441 return elf_phdata;
442 }
443
444 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
445
446 /**
447 * struct arch_elf_state - arch-specific ELF loading state
448 *
449 * This structure is used to preserve architecture specific data during
450 * the loading of an ELF file, throughout the checking of architecture
451 * specific ELF headers & through to the point where the ELF load is
452 * known to be proceeding (ie. SET_PERSONALITY).
453 *
454 * This implementation is a dummy for architectures which require no
455 * specific state.
456 */
457 struct arch_elf_state {
458 };
459
460 #define INIT_ARCH_ELF_STATE {}
461
462 /**
463 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
464 * @ehdr: The main ELF header
465 * @phdr: The program header to check
466 * @elf: The open ELF file
467 * @is_interp: True if the phdr is from the interpreter of the ELF being
468 * loaded, else false.
469 * @state: Architecture-specific state preserved throughout the process
470 * of loading the ELF.
471 *
472 * Inspects the program header phdr to validate its correctness and/or
473 * suitability for the system. Called once per ELF program header in the
474 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
475 * interpreter.
476 *
477 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
478 * with that return code.
479 */
480 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
481 struct elf_phdr *phdr,
482 struct file *elf, bool is_interp,
483 struct arch_elf_state *state)
484 {
485 /* Dummy implementation, always proceed */
486 return 0;
487 }
488
489 /**
490 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
491 * @ehdr: The main ELF header
492 * @has_interp: True if the ELF has an interpreter, else false.
493 * @state: Architecture-specific state preserved throughout the process
494 * of loading the ELF.
495 *
496 * Provides a final opportunity for architecture code to reject the loading
497 * of the ELF & cause an exec syscall to return an error. This is called after
498 * all program headers to be checked by arch_elf_pt_proc have been.
499 *
500 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
501 * with that return code.
502 */
503 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
504 struct arch_elf_state *state)
505 {
506 /* Dummy implementation, always proceed */
507 return 0;
508 }
509
510 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
511
512 /* This is much more generalized than the library routine read function,
513 so we keep this separate. Technically the library read function
514 is only provided so that we can read a.out libraries that have
515 an ELF header */
516
517 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
518 struct file *interpreter, unsigned long *interp_map_addr,
519 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
520 {
521 struct elf_phdr *eppnt;
522 unsigned long load_addr = 0;
523 int load_addr_set = 0;
524 unsigned long last_bss = 0, elf_bss = 0;
525 unsigned long error = ~0UL;
526 unsigned long total_size;
527 int i;
528
529 /* First of all, some simple consistency checks */
530 if (interp_elf_ex->e_type != ET_EXEC &&
531 interp_elf_ex->e_type != ET_DYN)
532 goto out;
533 if (!elf_check_arch(interp_elf_ex))
534 goto out;
535 if (!interpreter->f_op->mmap)
536 goto out;
537
538 total_size = total_mapping_size(interp_elf_phdata,
539 interp_elf_ex->e_phnum);
540 if (!total_size) {
541 error = -EINVAL;
542 goto out;
543 }
544
545 eppnt = interp_elf_phdata;
546 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
547 if (eppnt->p_type == PT_LOAD) {
548 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
549 int elf_prot = 0;
550 unsigned long vaddr = 0;
551 unsigned long k, map_addr;
552
553 if (eppnt->p_flags & PF_R)
554 elf_prot = PROT_READ;
555 if (eppnt->p_flags & PF_W)
556 elf_prot |= PROT_WRITE;
557 if (eppnt->p_flags & PF_X)
558 elf_prot |= PROT_EXEC;
559 vaddr = eppnt->p_vaddr;
560 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
561 elf_type |= MAP_FIXED;
562 else if (no_base && interp_elf_ex->e_type == ET_DYN)
563 load_addr = -vaddr;
564
565 map_addr = elf_map(interpreter, load_addr + vaddr,
566 eppnt, elf_prot, elf_type, total_size);
567 total_size = 0;
568 if (!*interp_map_addr)
569 *interp_map_addr = map_addr;
570 error = map_addr;
571 if (BAD_ADDR(map_addr))
572 goto out;
573
574 if (!load_addr_set &&
575 interp_elf_ex->e_type == ET_DYN) {
576 load_addr = map_addr - ELF_PAGESTART(vaddr);
577 load_addr_set = 1;
578 }
579
580 /*
581 * Check to see if the section's size will overflow the
582 * allowed task size. Note that p_filesz must always be
583 * <= p_memsize so it's only necessary to check p_memsz.
584 */
585 k = load_addr + eppnt->p_vaddr;
586 if (BAD_ADDR(k) ||
587 eppnt->p_filesz > eppnt->p_memsz ||
588 eppnt->p_memsz > TASK_SIZE ||
589 TASK_SIZE - eppnt->p_memsz < k) {
590 error = -ENOMEM;
591 goto out;
592 }
593
594 /*
595 * Find the end of the file mapping for this phdr, and
596 * keep track of the largest address we see for this.
597 */
598 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
599 if (k > elf_bss)
600 elf_bss = k;
601
602 /*
603 * Do the same thing for the memory mapping - between
604 * elf_bss and last_bss is the bss section.
605 */
606 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
607 if (k > last_bss)
608 last_bss = k;
609 }
610 }
611
612 if (last_bss > elf_bss) {
613 /*
614 * Now fill out the bss section. First pad the last page up
615 * to the page boundary, and then perform a mmap to make sure
616 * that there are zero-mapped pages up to and including the
617 * last bss page.
618 */
619 if (padzero(elf_bss)) {
620 error = -EFAULT;
621 goto out;
622 }
623
624 /* What we have mapped so far */
625 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
626
627 /* Map the last of the bss segment */
628 error = vm_brk(elf_bss, last_bss - elf_bss);
629 if (BAD_ADDR(error))
630 goto out;
631 }
632
633 error = load_addr;
634 out:
635 return error;
636 }
637
638 /*
639 * These are the functions used to load ELF style executables and shared
640 * libraries. There is no binary dependent code anywhere else.
641 */
642
643 #ifndef STACK_RND_MASK
644 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
645 #endif
646
647 static unsigned long randomize_stack_top(unsigned long stack_top)
648 {
649 unsigned long random_variable = 0;
650
651 if ((current->flags & PF_RANDOMIZE) &&
652 !(current->personality & ADDR_NO_RANDOMIZE)) {
653 random_variable = (unsigned long) get_random_int();
654 random_variable &= STACK_RND_MASK;
655 random_variable <<= PAGE_SHIFT;
656 }
657 #ifdef CONFIG_STACK_GROWSUP
658 return PAGE_ALIGN(stack_top) + random_variable;
659 #else
660 return PAGE_ALIGN(stack_top) - random_variable;
661 #endif
662 }
663
664 static int load_elf_binary(struct linux_binprm *bprm)
665 {
666 struct file *interpreter = NULL; /* to shut gcc up */
667 unsigned long load_addr = 0, load_bias = 0;
668 int load_addr_set = 0;
669 char * elf_interpreter = NULL;
670 unsigned long error;
671 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
672 unsigned long elf_bss, elf_brk;
673 int retval, i;
674 unsigned long elf_entry;
675 unsigned long interp_load_addr = 0;
676 unsigned long start_code, end_code, start_data, end_data;
677 unsigned long reloc_func_desc __maybe_unused = 0;
678 int executable_stack = EXSTACK_DEFAULT;
679 struct pt_regs *regs = current_pt_regs();
680 struct {
681 struct elfhdr elf_ex;
682 struct elfhdr interp_elf_ex;
683 } *loc;
684 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
685
686 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
687 if (!loc) {
688 retval = -ENOMEM;
689 goto out_ret;
690 }
691
692 /* Get the exec-header */
693 loc->elf_ex = *((struct elfhdr *)bprm->buf);
694
695 retval = -ENOEXEC;
696 /* First of all, some simple consistency checks */
697 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698 goto out;
699
700 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
701 goto out;
702 if (!elf_check_arch(&loc->elf_ex))
703 goto out;
704 if (!bprm->file->f_op->mmap)
705 goto out;
706
707 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
708 if (!elf_phdata)
709 goto out;
710
711 elf_ppnt = elf_phdata;
712 elf_bss = 0;
713 elf_brk = 0;
714
715 start_code = ~0UL;
716 end_code = 0;
717 start_data = 0;
718 end_data = 0;
719
720 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
721 if (elf_ppnt->p_type == PT_INTERP) {
722 /* This is the program interpreter used for
723 * shared libraries - for now assume that this
724 * is an a.out format binary
725 */
726 retval = -ENOEXEC;
727 if (elf_ppnt->p_filesz > PATH_MAX ||
728 elf_ppnt->p_filesz < 2)
729 goto out_free_ph;
730
731 retval = -ENOMEM;
732 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
733 GFP_KERNEL);
734 if (!elf_interpreter)
735 goto out_free_ph;
736
737 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
738 elf_interpreter,
739 elf_ppnt->p_filesz);
740 if (retval != elf_ppnt->p_filesz) {
741 if (retval >= 0)
742 retval = -EIO;
743 goto out_free_interp;
744 }
745 /* make sure path is NULL terminated */
746 retval = -ENOEXEC;
747 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
748 goto out_free_interp;
749
750 interpreter = open_exec(elf_interpreter);
751 retval = PTR_ERR(interpreter);
752 if (IS_ERR(interpreter))
753 goto out_free_interp;
754
755 /*
756 * If the binary is not readable then enforce
757 * mm->dumpable = 0 regardless of the interpreter's
758 * permissions.
759 */
760 would_dump(bprm, interpreter);
761
762 retval = kernel_read(interpreter, 0, bprm->buf,
763 BINPRM_BUF_SIZE);
764 if (retval != BINPRM_BUF_SIZE) {
765 if (retval >= 0)
766 retval = -EIO;
767 goto out_free_dentry;
768 }
769
770 /* Get the exec headers */
771 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
772 break;
773 }
774 elf_ppnt++;
775 }
776
777 elf_ppnt = elf_phdata;
778 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
779 switch (elf_ppnt->p_type) {
780 case PT_GNU_STACK:
781 if (elf_ppnt->p_flags & PF_X)
782 executable_stack = EXSTACK_ENABLE_X;
783 else
784 executable_stack = EXSTACK_DISABLE_X;
785 break;
786
787 case PT_LOPROC ... PT_HIPROC:
788 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
789 bprm->file, false,
790 &arch_state);
791 if (retval)
792 goto out_free_dentry;
793 break;
794 }
795
796 /* Some simple consistency checks for the interpreter */
797 if (elf_interpreter) {
798 retval = -ELIBBAD;
799 /* Not an ELF interpreter */
800 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
801 goto out_free_dentry;
802 /* Verify the interpreter has a valid arch */
803 if (!elf_check_arch(&loc->interp_elf_ex))
804 goto out_free_dentry;
805
806 /* Load the interpreter program headers */
807 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
808 interpreter);
809 if (!interp_elf_phdata)
810 goto out_free_dentry;
811
812 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
813 elf_ppnt = interp_elf_phdata;
814 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
815 switch (elf_ppnt->p_type) {
816 case PT_LOPROC ... PT_HIPROC:
817 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
818 elf_ppnt, interpreter,
819 true, &arch_state);
820 if (retval)
821 goto out_free_dentry;
822 break;
823 }
824 }
825
826 /*
827 * Allow arch code to reject the ELF at this point, whilst it's
828 * still possible to return an error to the code that invoked
829 * the exec syscall.
830 */
831 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
832 if (retval)
833 goto out_free_dentry;
834
835 /* Flush all traces of the currently running executable */
836 retval = flush_old_exec(bprm);
837 if (retval)
838 goto out_free_dentry;
839
840 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 may depend on the personality. */
842 SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 current->personality |= READ_IMPLIES_EXEC;
845
846 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 current->flags |= PF_RANDOMIZE;
848
849 setup_new_exec(bprm);
850
851 /* Do this so that we can load the interpreter, if need be. We will
852 change some of these later */
853 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
854 executable_stack);
855 if (retval < 0)
856 goto out_free_dentry;
857
858 current->mm->start_stack = bprm->p;
859
860 /* Now we do a little grungy work by mmapping the ELF image into
861 the correct location in memory. */
862 for(i = 0, elf_ppnt = elf_phdata;
863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
864 int elf_prot = 0, elf_flags;
865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
867
868 if (elf_ppnt->p_type != PT_LOAD)
869 continue;
870
871 if (unlikely (elf_brk > elf_bss)) {
872 unsigned long nbyte;
873
874 /* There was a PT_LOAD segment with p_memsz > p_filesz
875 before this one. Map anonymous pages, if needed,
876 and clear the area. */
877 retval = set_brk(elf_bss + load_bias,
878 elf_brk + load_bias);
879 if (retval)
880 goto out_free_dentry;
881 nbyte = ELF_PAGEOFFSET(elf_bss);
882 if (nbyte) {
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
887 load_bias, nbyte)) {
888 /*
889 * This bss-zeroing can fail if the ELF
890 * file specifies odd protections. So
891 * we don't check the return value
892 */
893 }
894 }
895 }
896
897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
903
904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
905
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
914 load_bias = ELF_ET_DYN_BASE - vaddr;
915 if (current->flags & PF_RANDOMIZE)
916 load_bias += arch_mmap_rnd();
917 load_bias = ELF_PAGESTART(load_bias);
918 total_size = total_mapping_size(elf_phdata,
919 loc->elf_ex.e_phnum);
920 if (!total_size) {
921 retval = -EINVAL;
922 goto out_free_dentry;
923 }
924 }
925
926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
927 elf_prot, elf_flags, total_size);
928 if (BAD_ADDR(error)) {
929 retval = IS_ERR((void *)error) ?
930 PTR_ERR((void*)error) : -EINVAL;
931 goto out_free_dentry;
932 }
933
934 if (!load_addr_set) {
935 load_addr_set = 1;
936 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
937 if (loc->elf_ex.e_type == ET_DYN) {
938 load_bias += error -
939 ELF_PAGESTART(load_bias + vaddr);
940 load_addr += load_bias;
941 reloc_func_desc = load_bias;
942 }
943 }
944 k = elf_ppnt->p_vaddr;
945 if (k < start_code)
946 start_code = k;
947 if (start_data < k)
948 start_data = k;
949
950 /*
951 * Check to see if the section's size will overflow the
952 * allowed task size. Note that p_filesz must always be
953 * <= p_memsz so it is only necessary to check p_memsz.
954 */
955 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
956 elf_ppnt->p_memsz > TASK_SIZE ||
957 TASK_SIZE - elf_ppnt->p_memsz < k) {
958 /* set_brk can never work. Avoid overflows. */
959 retval = -EINVAL;
960 goto out_free_dentry;
961 }
962
963 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
964
965 if (k > elf_bss)
966 elf_bss = k;
967 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
968 end_code = k;
969 if (end_data < k)
970 end_data = k;
971 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
972 if (k > elf_brk)
973 elf_brk = k;
974 }
975
976 loc->elf_ex.e_entry += load_bias;
977 elf_bss += load_bias;
978 elf_brk += load_bias;
979 start_code += load_bias;
980 end_code += load_bias;
981 start_data += load_bias;
982 end_data += load_bias;
983
984 /* Calling set_brk effectively mmaps the pages that we need
985 * for the bss and break sections. We must do this before
986 * mapping in the interpreter, to make sure it doesn't wind
987 * up getting placed where the bss needs to go.
988 */
989 retval = set_brk(elf_bss, elf_brk);
990 if (retval)
991 goto out_free_dentry;
992 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
995 }
996
997 if (elf_interpreter) {
998 unsigned long interp_map_addr = 0;
999
1000 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1001 interpreter,
1002 &interp_map_addr,
1003 load_bias, interp_elf_phdata);
1004 if (!IS_ERR((void *)elf_entry)) {
1005 /*
1006 * load_elf_interp() returns relocation
1007 * adjustment
1008 */
1009 interp_load_addr = elf_entry;
1010 elf_entry += loc->interp_elf_ex.e_entry;
1011 }
1012 if (BAD_ADDR(elf_entry)) {
1013 retval = IS_ERR((void *)elf_entry) ?
1014 (int)elf_entry : -EINVAL;
1015 goto out_free_dentry;
1016 }
1017 reloc_func_desc = interp_load_addr;
1018
1019 allow_write_access(interpreter);
1020 fput(interpreter);
1021 kfree(elf_interpreter);
1022 } else {
1023 elf_entry = loc->elf_ex.e_entry;
1024 if (BAD_ADDR(elf_entry)) {
1025 retval = -EINVAL;
1026 goto out_free_dentry;
1027 }
1028 }
1029
1030 kfree(interp_elf_phdata);
1031 kfree(elf_phdata);
1032
1033 set_binfmt(&elf_format);
1034
1035 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1037 if (retval < 0)
1038 goto out;
1039 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1040
1041 install_exec_creds(bprm);
1042 retval = create_elf_tables(bprm, &loc->elf_ex,
1043 load_addr, interp_load_addr);
1044 if (retval < 0)
1045 goto out;
1046 /* N.B. passed_fileno might not be initialized? */
1047 current->mm->end_code = end_code;
1048 current->mm->start_code = start_code;
1049 current->mm->start_data = start_data;
1050 current->mm->end_data = end_data;
1051 current->mm->start_stack = bprm->p;
1052
1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054 current->mm->brk = current->mm->start_brk =
1055 arch_randomize_brk(current->mm);
1056 #ifdef compat_brk_randomized
1057 current->brk_randomized = 1;
1058 #endif
1059 }
1060
1061 if (current->personality & MMAP_PAGE_ZERO) {
1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1063 and some applications "depend" upon this behavior.
1064 Since we do not have the power to recompile these, we
1065 emulate the SVr4 behavior. Sigh. */
1066 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067 MAP_FIXED | MAP_PRIVATE, 0);
1068 }
1069
1070 #ifdef ELF_PLAT_INIT
1071 /*
1072 * The ABI may specify that certain registers be set up in special
1073 * ways (on i386 %edx is the address of a DT_FINI function, for
1074 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1075 * that the e_entry field is the address of the function descriptor
1076 * for the startup routine, rather than the address of the startup
1077 * routine itself. This macro performs whatever initialization to
1078 * the regs structure is required as well as any relocations to the
1079 * function descriptor entries when executing dynamically links apps.
1080 */
1081 ELF_PLAT_INIT(regs, reloc_func_desc);
1082 #endif
1083
1084 start_thread(regs, elf_entry, bprm->p);
1085 retval = 0;
1086 out:
1087 kfree(loc);
1088 out_ret:
1089 return retval;
1090
1091 /* error cleanup */
1092 out_free_dentry:
1093 kfree(interp_elf_phdata);
1094 allow_write_access(interpreter);
1095 if (interpreter)
1096 fput(interpreter);
1097 out_free_interp:
1098 kfree(elf_interpreter);
1099 out_free_ph:
1100 kfree(elf_phdata);
1101 goto out;
1102 }
1103
1104 #ifdef CONFIG_USELIB
1105 /* This is really simpleminded and specialized - we are loading an
1106 a.out library that is given an ELF header. */
1107 static int load_elf_library(struct file *file)
1108 {
1109 struct elf_phdr *elf_phdata;
1110 struct elf_phdr *eppnt;
1111 unsigned long elf_bss, bss, len;
1112 int retval, error, i, j;
1113 struct elfhdr elf_ex;
1114
1115 error = -ENOEXEC;
1116 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117 if (retval != sizeof(elf_ex))
1118 goto out;
1119
1120 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1121 goto out;
1122
1123 /* First of all, some simple consistency checks */
1124 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1126 goto out;
1127
1128 /* Now read in all of the header information */
1129
1130 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1132
1133 error = -ENOMEM;
1134 elf_phdata = kmalloc(j, GFP_KERNEL);
1135 if (!elf_phdata)
1136 goto out;
1137
1138 eppnt = elf_phdata;
1139 error = -ENOEXEC;
1140 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1141 if (retval != j)
1142 goto out_free_ph;
1143
1144 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145 if ((eppnt + i)->p_type == PT_LOAD)
1146 j++;
1147 if (j != 1)
1148 goto out_free_ph;
1149
1150 while (eppnt->p_type != PT_LOAD)
1151 eppnt++;
1152
1153 /* Now use mmap to map the library into memory. */
1154 error = vm_mmap(file,
1155 ELF_PAGESTART(eppnt->p_vaddr),
1156 (eppnt->p_filesz +
1157 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158 PROT_READ | PROT_WRITE | PROT_EXEC,
1159 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1160 (eppnt->p_offset -
1161 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1163 goto out_free_ph;
1164
1165 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166 if (padzero(elf_bss)) {
1167 error = -EFAULT;
1168 goto out_free_ph;
1169 }
1170
1171 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1172 ELF_MIN_ALIGN - 1);
1173 bss = eppnt->p_memsz + eppnt->p_vaddr;
1174 if (bss > len)
1175 vm_brk(len, bss - len);
1176 error = 0;
1177
1178 out_free_ph:
1179 kfree(elf_phdata);
1180 out:
1181 return error;
1182 }
1183 #endif /* #ifdef CONFIG_USELIB */
1184
1185 #ifdef CONFIG_ELF_CORE
1186 /*
1187 * ELF core dumper
1188 *
1189 * Modelled on fs/exec.c:aout_core_dump()
1190 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1191 */
1192
1193 /*
1194 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195 * that are useful for post-mortem analysis are included in every core dump.
1196 * In that way we ensure that the core dump is fully interpretable later
1197 * without matching up the same kernel and hardware config to see what PC values
1198 * meant. These special mappings include - vDSO, vsyscall, and other
1199 * architecture specific mappings
1200 */
1201 static bool always_dump_vma(struct vm_area_struct *vma)
1202 {
1203 /* Any vsyscall mappings? */
1204 if (vma == get_gate_vma(vma->vm_mm))
1205 return true;
1206
1207 /*
1208 * Assume that all vmas with a .name op should always be dumped.
1209 * If this changes, a new vm_ops field can easily be added.
1210 */
1211 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1212 return true;
1213
1214 /*
1215 * arch_vma_name() returns non-NULL for special architecture mappings,
1216 * such as vDSO sections.
1217 */
1218 if (arch_vma_name(vma))
1219 return true;
1220
1221 return false;
1222 }
1223
1224 /*
1225 * Decide what to dump of a segment, part, all or none.
1226 */
1227 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228 unsigned long mm_flags)
1229 {
1230 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1231
1232 /* always dump the vdso and vsyscall sections */
1233 if (always_dump_vma(vma))
1234 goto whole;
1235
1236 if (vma->vm_flags & VM_DONTDUMP)
1237 return 0;
1238
1239 /* Hugetlb memory check */
1240 if (vma->vm_flags & VM_HUGETLB) {
1241 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1242 goto whole;
1243 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1244 goto whole;
1245 return 0;
1246 }
1247
1248 /* Do not dump I/O mapped devices or special mappings */
1249 if (vma->vm_flags & VM_IO)
1250 return 0;
1251
1252 /* By default, dump shared memory if mapped from an anonymous file. */
1253 if (vma->vm_flags & VM_SHARED) {
1254 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1256 goto whole;
1257 return 0;
1258 }
1259
1260 /* Dump segments that have been written to. */
1261 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1262 goto whole;
1263 if (vma->vm_file == NULL)
1264 return 0;
1265
1266 if (FILTER(MAPPED_PRIVATE))
1267 goto whole;
1268
1269 /*
1270 * If this looks like the beginning of a DSO or executable mapping,
1271 * check for an ELF header. If we find one, dump the first page to
1272 * aid in determining what was mapped here.
1273 */
1274 if (FILTER(ELF_HEADERS) &&
1275 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276 u32 __user *header = (u32 __user *) vma->vm_start;
1277 u32 word;
1278 mm_segment_t fs = get_fs();
1279 /*
1280 * Doing it this way gets the constant folded by GCC.
1281 */
1282 union {
1283 u32 cmp;
1284 char elfmag[SELFMAG];
1285 } magic;
1286 BUILD_BUG_ON(SELFMAG != sizeof word);
1287 magic.elfmag[EI_MAG0] = ELFMAG0;
1288 magic.elfmag[EI_MAG1] = ELFMAG1;
1289 magic.elfmag[EI_MAG2] = ELFMAG2;
1290 magic.elfmag[EI_MAG3] = ELFMAG3;
1291 /*
1292 * Switch to the user "segment" for get_user(),
1293 * then put back what elf_core_dump() had in place.
1294 */
1295 set_fs(USER_DS);
1296 if (unlikely(get_user(word, header)))
1297 word = 0;
1298 set_fs(fs);
1299 if (word == magic.cmp)
1300 return PAGE_SIZE;
1301 }
1302
1303 #undef FILTER
1304
1305 return 0;
1306
1307 whole:
1308 return vma->vm_end - vma->vm_start;
1309 }
1310
1311 /* An ELF note in memory */
1312 struct memelfnote
1313 {
1314 const char *name;
1315 int type;
1316 unsigned int datasz;
1317 void *data;
1318 };
1319
1320 static int notesize(struct memelfnote *en)
1321 {
1322 int sz;
1323
1324 sz = sizeof(struct elf_note);
1325 sz += roundup(strlen(en->name) + 1, 4);
1326 sz += roundup(en->datasz, 4);
1327
1328 return sz;
1329 }
1330
1331 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1332 {
1333 struct elf_note en;
1334 en.n_namesz = strlen(men->name) + 1;
1335 en.n_descsz = men->datasz;
1336 en.n_type = men->type;
1337
1338 return dump_emit(cprm, &en, sizeof(en)) &&
1339 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1341 }
1342
1343 static void fill_elf_header(struct elfhdr *elf, int segs,
1344 u16 machine, u32 flags)
1345 {
1346 memset(elf, 0, sizeof(*elf));
1347
1348 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349 elf->e_ident[EI_CLASS] = ELF_CLASS;
1350 elf->e_ident[EI_DATA] = ELF_DATA;
1351 elf->e_ident[EI_VERSION] = EV_CURRENT;
1352 elf->e_ident[EI_OSABI] = ELF_OSABI;
1353
1354 elf->e_type = ET_CORE;
1355 elf->e_machine = machine;
1356 elf->e_version = EV_CURRENT;
1357 elf->e_phoff = sizeof(struct elfhdr);
1358 elf->e_flags = flags;
1359 elf->e_ehsize = sizeof(struct elfhdr);
1360 elf->e_phentsize = sizeof(struct elf_phdr);
1361 elf->e_phnum = segs;
1362
1363 return;
1364 }
1365
1366 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1367 {
1368 phdr->p_type = PT_NOTE;
1369 phdr->p_offset = offset;
1370 phdr->p_vaddr = 0;
1371 phdr->p_paddr = 0;
1372 phdr->p_filesz = sz;
1373 phdr->p_memsz = 0;
1374 phdr->p_flags = 0;
1375 phdr->p_align = 0;
1376 return;
1377 }
1378
1379 static void fill_note(struct memelfnote *note, const char *name, int type,
1380 unsigned int sz, void *data)
1381 {
1382 note->name = name;
1383 note->type = type;
1384 note->datasz = sz;
1385 note->data = data;
1386 return;
1387 }
1388
1389 /*
1390 * fill up all the fields in prstatus from the given task struct, except
1391 * registers which need to be filled up separately.
1392 */
1393 static void fill_prstatus(struct elf_prstatus *prstatus,
1394 struct task_struct *p, long signr)
1395 {
1396 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397 prstatus->pr_sigpend = p->pending.signal.sig[0];
1398 prstatus->pr_sighold = p->blocked.sig[0];
1399 rcu_read_lock();
1400 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1401 rcu_read_unlock();
1402 prstatus->pr_pid = task_pid_vnr(p);
1403 prstatus->pr_pgrp = task_pgrp_vnr(p);
1404 prstatus->pr_sid = task_session_vnr(p);
1405 if (thread_group_leader(p)) {
1406 struct task_cputime cputime;
1407
1408 /*
1409 * This is the record for the group leader. It shows the
1410 * group-wide total, not its individual thread total.
1411 */
1412 thread_group_cputime(p, &cputime);
1413 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1415 } else {
1416 cputime_t utime, stime;
1417
1418 task_cputime(p, &utime, &stime);
1419 cputime_to_timeval(utime, &prstatus->pr_utime);
1420 cputime_to_timeval(stime, &prstatus->pr_stime);
1421 }
1422 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1424 }
1425
1426 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427 struct mm_struct *mm)
1428 {
1429 const struct cred *cred;
1430 unsigned int i, len;
1431
1432 /* first copy the parameters from user space */
1433 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1434
1435 len = mm->arg_end - mm->arg_start;
1436 if (len >= ELF_PRARGSZ)
1437 len = ELF_PRARGSZ-1;
1438 if (copy_from_user(&psinfo->pr_psargs,
1439 (const char __user *)mm->arg_start, len))
1440 return -EFAULT;
1441 for(i = 0; i < len; i++)
1442 if (psinfo->pr_psargs[i] == 0)
1443 psinfo->pr_psargs[i] = ' ';
1444 psinfo->pr_psargs[len] = 0;
1445
1446 rcu_read_lock();
1447 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1448 rcu_read_unlock();
1449 psinfo->pr_pid = task_pid_vnr(p);
1450 psinfo->pr_pgrp = task_pgrp_vnr(p);
1451 psinfo->pr_sid = task_session_vnr(p);
1452
1453 i = p->state ? ffz(~p->state) + 1 : 0;
1454 psinfo->pr_state = i;
1455 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457 psinfo->pr_nice = task_nice(p);
1458 psinfo->pr_flag = p->flags;
1459 rcu_read_lock();
1460 cred = __task_cred(p);
1461 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1463 rcu_read_unlock();
1464 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1465
1466 return 0;
1467 }
1468
1469 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1470 {
1471 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1472 int i = 0;
1473 do
1474 i += 2;
1475 while (auxv[i - 2] != AT_NULL);
1476 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1477 }
1478
1479 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480 const siginfo_t *siginfo)
1481 {
1482 mm_segment_t old_fs = get_fs();
1483 set_fs(KERNEL_DS);
1484 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1485 set_fs(old_fs);
1486 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1487 }
1488
1489 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1490 /*
1491 * Format of NT_FILE note:
1492 *
1493 * long count -- how many files are mapped
1494 * long page_size -- units for file_ofs
1495 * array of [COUNT] elements of
1496 * long start
1497 * long end
1498 * long file_ofs
1499 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1500 */
1501 static int fill_files_note(struct memelfnote *note)
1502 {
1503 struct vm_area_struct *vma;
1504 unsigned count, size, names_ofs, remaining, n;
1505 user_long_t *data;
1506 user_long_t *start_end_ofs;
1507 char *name_base, *name_curpos;
1508
1509 /* *Estimated* file count and total data size needed */
1510 count = current->mm->map_count;
1511 size = count * 64;
1512
1513 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1514 alloc:
1515 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1516 return -EINVAL;
1517 size = round_up(size, PAGE_SIZE);
1518 data = vmalloc(size);
1519 if (!data)
1520 return -ENOMEM;
1521
1522 start_end_ofs = data + 2;
1523 name_base = name_curpos = ((char *)data) + names_ofs;
1524 remaining = size - names_ofs;
1525 count = 0;
1526 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1527 struct file *file;
1528 const char *filename;
1529
1530 file = vma->vm_file;
1531 if (!file)
1532 continue;
1533 filename = file_path(file, name_curpos, remaining);
1534 if (IS_ERR(filename)) {
1535 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1536 vfree(data);
1537 size = size * 5 / 4;
1538 goto alloc;
1539 }
1540 continue;
1541 }
1542
1543 /* file_path() fills at the end, move name down */
1544 /* n = strlen(filename) + 1: */
1545 n = (name_curpos + remaining) - filename;
1546 remaining = filename - name_curpos;
1547 memmove(name_curpos, filename, n);
1548 name_curpos += n;
1549
1550 *start_end_ofs++ = vma->vm_start;
1551 *start_end_ofs++ = vma->vm_end;
1552 *start_end_ofs++ = vma->vm_pgoff;
1553 count++;
1554 }
1555
1556 /* Now we know exact count of files, can store it */
1557 data[0] = count;
1558 data[1] = PAGE_SIZE;
1559 /*
1560 * Count usually is less than current->mm->map_count,
1561 * we need to move filenames down.
1562 */
1563 n = current->mm->map_count - count;
1564 if (n != 0) {
1565 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566 memmove(name_base - shift_bytes, name_base,
1567 name_curpos - name_base);
1568 name_curpos -= shift_bytes;
1569 }
1570
1571 size = name_curpos - (char *)data;
1572 fill_note(note, "CORE", NT_FILE, size, data);
1573 return 0;
1574 }
1575
1576 #ifdef CORE_DUMP_USE_REGSET
1577 #include <linux/regset.h>
1578
1579 struct elf_thread_core_info {
1580 struct elf_thread_core_info *next;
1581 struct task_struct *task;
1582 struct elf_prstatus prstatus;
1583 struct memelfnote notes[0];
1584 };
1585
1586 struct elf_note_info {
1587 struct elf_thread_core_info *thread;
1588 struct memelfnote psinfo;
1589 struct memelfnote signote;
1590 struct memelfnote auxv;
1591 struct memelfnote files;
1592 user_siginfo_t csigdata;
1593 size_t size;
1594 int thread_notes;
1595 };
1596
1597 /*
1598 * When a regset has a writeback hook, we call it on each thread before
1599 * dumping user memory. On register window machines, this makes sure the
1600 * user memory backing the register data is up to date before we read it.
1601 */
1602 static void do_thread_regset_writeback(struct task_struct *task,
1603 const struct user_regset *regset)
1604 {
1605 if (regset->writeback)
1606 regset->writeback(task, regset, 1);
1607 }
1608
1609 #ifndef PR_REG_SIZE
1610 #define PR_REG_SIZE(S) sizeof(S)
1611 #endif
1612
1613 #ifndef PRSTATUS_SIZE
1614 #define PRSTATUS_SIZE(S) sizeof(S)
1615 #endif
1616
1617 #ifndef PR_REG_PTR
1618 #define PR_REG_PTR(S) (&((S)->pr_reg))
1619 #endif
1620
1621 #ifndef SET_PR_FPVALID
1622 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1623 #endif
1624
1625 static int fill_thread_core_info(struct elf_thread_core_info *t,
1626 const struct user_regset_view *view,
1627 long signr, size_t *total)
1628 {
1629 unsigned int i;
1630
1631 /*
1632 * NT_PRSTATUS is the one special case, because the regset data
1633 * goes into the pr_reg field inside the note contents, rather
1634 * than being the whole note contents. We fill the reset in here.
1635 * We assume that regset 0 is NT_PRSTATUS.
1636 */
1637 fill_prstatus(&t->prstatus, t->task, signr);
1638 (void) view->regsets[0].get(t->task, &view->regsets[0],
1639 0, PR_REG_SIZE(t->prstatus.pr_reg),
1640 PR_REG_PTR(&t->prstatus), NULL);
1641
1642 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644 *total += notesize(&t->notes[0]);
1645
1646 do_thread_regset_writeback(t->task, &view->regsets[0]);
1647
1648 /*
1649 * Each other regset might generate a note too. For each regset
1650 * that has no core_note_type or is inactive, we leave t->notes[i]
1651 * all zero and we'll know to skip writing it later.
1652 */
1653 for (i = 1; i < view->n; ++i) {
1654 const struct user_regset *regset = &view->regsets[i];
1655 do_thread_regset_writeback(t->task, regset);
1656 if (regset->core_note_type && regset->get &&
1657 (!regset->active || regset->active(t->task, regset))) {
1658 int ret;
1659 size_t size = regset->n * regset->size;
1660 void *data = kmalloc(size, GFP_KERNEL);
1661 if (unlikely(!data))
1662 return 0;
1663 ret = regset->get(t->task, regset,
1664 0, size, data, NULL);
1665 if (unlikely(ret))
1666 kfree(data);
1667 else {
1668 if (regset->core_note_type != NT_PRFPREG)
1669 fill_note(&t->notes[i], "LINUX",
1670 regset->core_note_type,
1671 size, data);
1672 else {
1673 SET_PR_FPVALID(&t->prstatus, 1);
1674 fill_note(&t->notes[i], "CORE",
1675 NT_PRFPREG, size, data);
1676 }
1677 *total += notesize(&t->notes[i]);
1678 }
1679 }
1680 }
1681
1682 return 1;
1683 }
1684
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 struct elf_note_info *info,
1687 const siginfo_t *siginfo, struct pt_regs *regs)
1688 {
1689 struct task_struct *dump_task = current;
1690 const struct user_regset_view *view = task_user_regset_view(dump_task);
1691 struct elf_thread_core_info *t;
1692 struct elf_prpsinfo *psinfo;
1693 struct core_thread *ct;
1694 unsigned int i;
1695
1696 info->size = 0;
1697 info->thread = NULL;
1698
1699 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700 if (psinfo == NULL) {
1701 info->psinfo.data = NULL; /* So we don't free this wrongly */
1702 return 0;
1703 }
1704
1705 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1706
1707 /*
1708 * Figure out how many notes we're going to need for each thread.
1709 */
1710 info->thread_notes = 0;
1711 for (i = 0; i < view->n; ++i)
1712 if (view->regsets[i].core_note_type != 0)
1713 ++info->thread_notes;
1714
1715 /*
1716 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1717 * since it is our one special case.
1718 */
1719 if (unlikely(info->thread_notes == 0) ||
1720 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1721 WARN_ON(1);
1722 return 0;
1723 }
1724
1725 /*
1726 * Initialize the ELF file header.
1727 */
1728 fill_elf_header(elf, phdrs,
1729 view->e_machine, view->e_flags);
1730
1731 /*
1732 * Allocate a structure for each thread.
1733 */
1734 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735 t = kzalloc(offsetof(struct elf_thread_core_info,
1736 notes[info->thread_notes]),
1737 GFP_KERNEL);
1738 if (unlikely(!t))
1739 return 0;
1740
1741 t->task = ct->task;
1742 if (ct->task == dump_task || !info->thread) {
1743 t->next = info->thread;
1744 info->thread = t;
1745 } else {
1746 /*
1747 * Make sure to keep the original task at
1748 * the head of the list.
1749 */
1750 t->next = info->thread->next;
1751 info->thread->next = t;
1752 }
1753 }
1754
1755 /*
1756 * Now fill in each thread's information.
1757 */
1758 for (t = info->thread; t != NULL; t = t->next)
1759 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1760 return 0;
1761
1762 /*
1763 * Fill in the two process-wide notes.
1764 */
1765 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766 info->size += notesize(&info->psinfo);
1767
1768 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769 info->size += notesize(&info->signote);
1770
1771 fill_auxv_note(&info->auxv, current->mm);
1772 info->size += notesize(&info->auxv);
1773
1774 if (fill_files_note(&info->files) == 0)
1775 info->size += notesize(&info->files);
1776
1777 return 1;
1778 }
1779
1780 static size_t get_note_info_size(struct elf_note_info *info)
1781 {
1782 return info->size;
1783 }
1784
1785 /*
1786 * Write all the notes for each thread. When writing the first thread, the
1787 * process-wide notes are interleaved after the first thread-specific note.
1788 */
1789 static int write_note_info(struct elf_note_info *info,
1790 struct coredump_params *cprm)
1791 {
1792 bool first = true;
1793 struct elf_thread_core_info *t = info->thread;
1794
1795 do {
1796 int i;
1797
1798 if (!writenote(&t->notes[0], cprm))
1799 return 0;
1800
1801 if (first && !writenote(&info->psinfo, cprm))
1802 return 0;
1803 if (first && !writenote(&info->signote, cprm))
1804 return 0;
1805 if (first && !writenote(&info->auxv, cprm))
1806 return 0;
1807 if (first && info->files.data &&
1808 !writenote(&info->files, cprm))
1809 return 0;
1810
1811 for (i = 1; i < info->thread_notes; ++i)
1812 if (t->notes[i].data &&
1813 !writenote(&t->notes[i], cprm))
1814 return 0;
1815
1816 first = false;
1817 t = t->next;
1818 } while (t);
1819
1820 return 1;
1821 }
1822
1823 static void free_note_info(struct elf_note_info *info)
1824 {
1825 struct elf_thread_core_info *threads = info->thread;
1826 while (threads) {
1827 unsigned int i;
1828 struct elf_thread_core_info *t = threads;
1829 threads = t->next;
1830 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831 for (i = 1; i < info->thread_notes; ++i)
1832 kfree(t->notes[i].data);
1833 kfree(t);
1834 }
1835 kfree(info->psinfo.data);
1836 vfree(info->files.data);
1837 }
1838
1839 #else
1840
1841 /* Here is the structure in which status of each thread is captured. */
1842 struct elf_thread_status
1843 {
1844 struct list_head list;
1845 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1846 elf_fpregset_t fpu; /* NT_PRFPREG */
1847 struct task_struct *thread;
1848 #ifdef ELF_CORE_COPY_XFPREGS
1849 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1850 #endif
1851 struct memelfnote notes[3];
1852 int num_notes;
1853 };
1854
1855 /*
1856 * In order to add the specific thread information for the elf file format,
1857 * we need to keep a linked list of every threads pr_status and then create
1858 * a single section for them in the final core file.
1859 */
1860 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1861 {
1862 int sz = 0;
1863 struct task_struct *p = t->thread;
1864 t->num_notes = 0;
1865
1866 fill_prstatus(&t->prstatus, p, signr);
1867 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1868
1869 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1870 &(t->prstatus));
1871 t->num_notes++;
1872 sz += notesize(&t->notes[0]);
1873
1874 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1875 &t->fpu))) {
1876 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1877 &(t->fpu));
1878 t->num_notes++;
1879 sz += notesize(&t->notes[1]);
1880 }
1881
1882 #ifdef ELF_CORE_COPY_XFPREGS
1883 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885 sizeof(t->xfpu), &t->xfpu);
1886 t->num_notes++;
1887 sz += notesize(&t->notes[2]);
1888 }
1889 #endif
1890 return sz;
1891 }
1892
1893 struct elf_note_info {
1894 struct memelfnote *notes;
1895 struct memelfnote *notes_files;
1896 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1897 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1898 struct list_head thread_list;
1899 elf_fpregset_t *fpu;
1900 #ifdef ELF_CORE_COPY_XFPREGS
1901 elf_fpxregset_t *xfpu;
1902 #endif
1903 user_siginfo_t csigdata;
1904 int thread_status_size;
1905 int numnote;
1906 };
1907
1908 static int elf_note_info_init(struct elf_note_info *info)
1909 {
1910 memset(info, 0, sizeof(*info));
1911 INIT_LIST_HEAD(&info->thread_list);
1912
1913 /* Allocate space for ELF notes */
1914 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1915 if (!info->notes)
1916 return 0;
1917 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1918 if (!info->psinfo)
1919 return 0;
1920 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921 if (!info->prstatus)
1922 return 0;
1923 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1924 if (!info->fpu)
1925 return 0;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1928 if (!info->xfpu)
1929 return 0;
1930 #endif
1931 return 1;
1932 }
1933
1934 static int fill_note_info(struct elfhdr *elf, int phdrs,
1935 struct elf_note_info *info,
1936 const siginfo_t *siginfo, struct pt_regs *regs)
1937 {
1938 struct list_head *t;
1939 struct core_thread *ct;
1940 struct elf_thread_status *ets;
1941
1942 if (!elf_note_info_init(info))
1943 return 0;
1944
1945 for (ct = current->mm->core_state->dumper.next;
1946 ct; ct = ct->next) {
1947 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1948 if (!ets)
1949 return 0;
1950
1951 ets->thread = ct->task;
1952 list_add(&ets->list, &info->thread_list);
1953 }
1954
1955 list_for_each(t, &info->thread_list) {
1956 int sz;
1957
1958 ets = list_entry(t, struct elf_thread_status, list);
1959 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960 info->thread_status_size += sz;
1961 }
1962 /* now collect the dump for the current */
1963 memset(info->prstatus, 0, sizeof(*info->prstatus));
1964 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1966
1967 /* Set up header */
1968 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1969
1970 /*
1971 * Set up the notes in similar form to SVR4 core dumps made
1972 * with info from their /proc.
1973 */
1974
1975 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976 sizeof(*info->prstatus), info->prstatus);
1977 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979 sizeof(*info->psinfo), info->psinfo);
1980
1981 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982 fill_auxv_note(info->notes + 3, current->mm);
1983 info->numnote = 4;
1984
1985 if (fill_files_note(info->notes + info->numnote) == 0) {
1986 info->notes_files = info->notes + info->numnote;
1987 info->numnote++;
1988 }
1989
1990 /* Try to dump the FPU. */
1991 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1992 info->fpu);
1993 if (info->prstatus->pr_fpvalid)
1994 fill_note(info->notes + info->numnote++,
1995 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996 #ifdef ELF_CORE_COPY_XFPREGS
1997 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998 fill_note(info->notes + info->numnote++,
1999 "LINUX", ELF_CORE_XFPREG_TYPE,
2000 sizeof(*info->xfpu), info->xfpu);
2001 #endif
2002
2003 return 1;
2004 }
2005
2006 static size_t get_note_info_size(struct elf_note_info *info)
2007 {
2008 int sz = 0;
2009 int i;
2010
2011 for (i = 0; i < info->numnote; i++)
2012 sz += notesize(info->notes + i);
2013
2014 sz += info->thread_status_size;
2015
2016 return sz;
2017 }
2018
2019 static int write_note_info(struct elf_note_info *info,
2020 struct coredump_params *cprm)
2021 {
2022 int i;
2023 struct list_head *t;
2024
2025 for (i = 0; i < info->numnote; i++)
2026 if (!writenote(info->notes + i, cprm))
2027 return 0;
2028
2029 /* write out the thread status notes section */
2030 list_for_each(t, &info->thread_list) {
2031 struct elf_thread_status *tmp =
2032 list_entry(t, struct elf_thread_status, list);
2033
2034 for (i = 0; i < tmp->num_notes; i++)
2035 if (!writenote(&tmp->notes[i], cprm))
2036 return 0;
2037 }
2038
2039 return 1;
2040 }
2041
2042 static void free_note_info(struct elf_note_info *info)
2043 {
2044 while (!list_empty(&info->thread_list)) {
2045 struct list_head *tmp = info->thread_list.next;
2046 list_del(tmp);
2047 kfree(list_entry(tmp, struct elf_thread_status, list));
2048 }
2049
2050 /* Free data possibly allocated by fill_files_note(): */
2051 if (info->notes_files)
2052 vfree(info->notes_files->data);
2053
2054 kfree(info->prstatus);
2055 kfree(info->psinfo);
2056 kfree(info->notes);
2057 kfree(info->fpu);
2058 #ifdef ELF_CORE_COPY_XFPREGS
2059 kfree(info->xfpu);
2060 #endif
2061 }
2062
2063 #endif
2064
2065 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066 struct vm_area_struct *gate_vma)
2067 {
2068 struct vm_area_struct *ret = tsk->mm->mmap;
2069
2070 if (ret)
2071 return ret;
2072 return gate_vma;
2073 }
2074 /*
2075 * Helper function for iterating across a vma list. It ensures that the caller
2076 * will visit `gate_vma' prior to terminating the search.
2077 */
2078 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079 struct vm_area_struct *gate_vma)
2080 {
2081 struct vm_area_struct *ret;
2082
2083 ret = this_vma->vm_next;
2084 if (ret)
2085 return ret;
2086 if (this_vma == gate_vma)
2087 return NULL;
2088 return gate_vma;
2089 }
2090
2091 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092 elf_addr_t e_shoff, int segs)
2093 {
2094 elf->e_shoff = e_shoff;
2095 elf->e_shentsize = sizeof(*shdr4extnum);
2096 elf->e_shnum = 1;
2097 elf->e_shstrndx = SHN_UNDEF;
2098
2099 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2100
2101 shdr4extnum->sh_type = SHT_NULL;
2102 shdr4extnum->sh_size = elf->e_shnum;
2103 shdr4extnum->sh_link = elf->e_shstrndx;
2104 shdr4extnum->sh_info = segs;
2105 }
2106
2107 /*
2108 * Actual dumper
2109 *
2110 * This is a two-pass process; first we find the offsets of the bits,
2111 * and then they are actually written out. If we run out of core limit
2112 * we just truncate.
2113 */
2114 static int elf_core_dump(struct coredump_params *cprm)
2115 {
2116 int has_dumped = 0;
2117 mm_segment_t fs;
2118 int segs, i;
2119 size_t vma_data_size = 0;
2120 struct vm_area_struct *vma, *gate_vma;
2121 struct elfhdr *elf = NULL;
2122 loff_t offset = 0, dataoff;
2123 struct elf_note_info info = { };
2124 struct elf_phdr *phdr4note = NULL;
2125 struct elf_shdr *shdr4extnum = NULL;
2126 Elf_Half e_phnum;
2127 elf_addr_t e_shoff;
2128 elf_addr_t *vma_filesz = NULL;
2129
2130 /*
2131 * We no longer stop all VM operations.
2132 *
2133 * This is because those proceses that could possibly change map_count
2134 * or the mmap / vma pages are now blocked in do_exit on current
2135 * finishing this core dump.
2136 *
2137 * Only ptrace can touch these memory addresses, but it doesn't change
2138 * the map_count or the pages allocated. So no possibility of crashing
2139 * exists while dumping the mm->vm_next areas to the core file.
2140 */
2141
2142 /* alloc memory for large data structures: too large to be on stack */
2143 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2144 if (!elf)
2145 goto out;
2146 /*
2147 * The number of segs are recored into ELF header as 16bit value.
2148 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2149 */
2150 segs = current->mm->map_count;
2151 segs += elf_core_extra_phdrs();
2152
2153 gate_vma = get_gate_vma(current->mm);
2154 if (gate_vma != NULL)
2155 segs++;
2156
2157 /* for notes section */
2158 segs++;
2159
2160 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161 * this, kernel supports extended numbering. Have a look at
2162 * include/linux/elf.h for further information. */
2163 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2164
2165 /*
2166 * Collect all the non-memory information about the process for the
2167 * notes. This also sets up the file header.
2168 */
2169 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2170 goto cleanup;
2171
2172 has_dumped = 1;
2173
2174 fs = get_fs();
2175 set_fs(KERNEL_DS);
2176
2177 offset += sizeof(*elf); /* Elf header */
2178 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2179
2180 /* Write notes phdr entry */
2181 {
2182 size_t sz = get_note_info_size(&info);
2183
2184 sz += elf_coredump_extra_notes_size();
2185
2186 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2187 if (!phdr4note)
2188 goto end_coredump;
2189
2190 fill_elf_note_phdr(phdr4note, sz, offset);
2191 offset += sz;
2192 }
2193
2194 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2195
2196 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2197 if (!vma_filesz)
2198 goto end_coredump;
2199
2200 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201 vma = next_vma(vma, gate_vma)) {
2202 unsigned long dump_size;
2203
2204 dump_size = vma_dump_size(vma, cprm->mm_flags);
2205 vma_filesz[i++] = dump_size;
2206 vma_data_size += dump_size;
2207 }
2208
2209 offset += vma_data_size;
2210 offset += elf_core_extra_data_size();
2211 e_shoff = offset;
2212
2213 if (e_phnum == PN_XNUM) {
2214 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2215 if (!shdr4extnum)
2216 goto end_coredump;
2217 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2218 }
2219
2220 offset = dataoff;
2221
2222 if (!dump_emit(cprm, elf, sizeof(*elf)))
2223 goto end_coredump;
2224
2225 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2226 goto end_coredump;
2227
2228 /* Write program headers for segments dump */
2229 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230 vma = next_vma(vma, gate_vma)) {
2231 struct elf_phdr phdr;
2232
2233 phdr.p_type = PT_LOAD;
2234 phdr.p_offset = offset;
2235 phdr.p_vaddr = vma->vm_start;
2236 phdr.p_paddr = 0;
2237 phdr.p_filesz = vma_filesz[i++];
2238 phdr.p_memsz = vma->vm_end - vma->vm_start;
2239 offset += phdr.p_filesz;
2240 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241 if (vma->vm_flags & VM_WRITE)
2242 phdr.p_flags |= PF_W;
2243 if (vma->vm_flags & VM_EXEC)
2244 phdr.p_flags |= PF_X;
2245 phdr.p_align = ELF_EXEC_PAGESIZE;
2246
2247 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2248 goto end_coredump;
2249 }
2250
2251 if (!elf_core_write_extra_phdrs(cprm, offset))
2252 goto end_coredump;
2253
2254 /* write out the notes section */
2255 if (!write_note_info(&info, cprm))
2256 goto end_coredump;
2257
2258 if (elf_coredump_extra_notes_write(cprm))
2259 goto end_coredump;
2260
2261 /* Align to page */
2262 if (!dump_skip(cprm, dataoff - cprm->written))
2263 goto end_coredump;
2264
2265 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266 vma = next_vma(vma, gate_vma)) {
2267 unsigned long addr;
2268 unsigned long end;
2269
2270 end = vma->vm_start + vma_filesz[i++];
2271
2272 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2273 struct page *page;
2274 int stop;
2275
2276 page = get_dump_page(addr);
2277 if (page) {
2278 void *kaddr = kmap(page);
2279 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2280 kunmap(page);
2281 page_cache_release(page);
2282 } else
2283 stop = !dump_skip(cprm, PAGE_SIZE);
2284 if (stop)
2285 goto end_coredump;
2286 }
2287 }
2288
2289 if (!elf_core_write_extra_data(cprm))
2290 goto end_coredump;
2291
2292 if (e_phnum == PN_XNUM) {
2293 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2294 goto end_coredump;
2295 }
2296
2297 end_coredump:
2298 set_fs(fs);
2299
2300 cleanup:
2301 free_note_info(&info);
2302 kfree(shdr4extnum);
2303 kfree(vma_filesz);
2304 kfree(phdr4note);
2305 kfree(elf);
2306 out:
2307 return has_dumped;
2308 }
2309
2310 #endif /* CONFIG_ELF_CORE */
2311
2312 static int __init init_elf_binfmt(void)
2313 {
2314 register_binfmt(&elf_format);
2315 return 0;
2316 }
2317
2318 static void __exit exit_elf_binfmt(void)
2319 {
2320 /* Remove the COFF and ELF loaders. */
2321 unregister_binfmt(&elf_format);
2322 }
2323
2324 core_initcall(init_elf_binfmt);
2325 module_exit(exit_elf_binfmt);
2326 MODULE_LICENSE("GPL");
This page took 0.091432 seconds and 5 git commands to generate.