imgact_elf.c 56 KB
Newer Older
1
/*-
2
 * Copyright (c) 2000 David O'Brien
uqs's avatar
uqs committed
3
 * Copyright (c) 1995-1996 Søren Schmidt
4
 * Copyright (c) 1996 Peter Wemm
5
6
7
8
9
10
11
12
13
14
15
16
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
17
 *    derived from this software without specific prior written permission
18
19
20
21
22
23
24
25
26
27
28
29
30
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

obrien's avatar
obrien committed
31
32
33
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

34
#include "opt_capsicum.h"
35
#include "opt_compat.h"
36
#include "opt_pax.h"
37
#include "opt_gzio.h"
38

39
#include <sys/param.h>
40
#include <sys/capsicum.h>
41
#include <sys/exec.h>
42
#include <sys/fcntl.h>
43
#include <sys/gzio.h>
44
45
#include <sys/imgact.h>
#include <sys/imgact_elf.h>
46
#include <sys/jail.h>
47
#include <sys/kernel.h>
jhb's avatar
jhb committed
48
#include <sys/lock.h>
49
#include <sys/malloc.h>
50
#include <sys/mount.h>
51
#include <sys/mman.h>
52
#include <sys/namei.h>
Shawn Webb's avatar
Shawn Webb committed
53
#include <sys/pax.h>
54
#include <sys/pioctl.h>
55
#include <sys/proc.h>
56
#include <sys/procfs.h>
57
#include <sys/racct.h>
58
#include <sys/resourcevar.h>
59
#include <sys/rwlock.h>
60
#include <sys/sbuf.h>
61
#include <sys/sf_buf.h>
62
#include <sys/smp.h>
63
#include <sys/systm.h>
64
#include <sys/signalvar.h>
65
#include <sys/stat.h>
66
#include <sys/sx.h>
67
#include <sys/syscall.h>
68
#include <sys/sysctl.h>
69
#include <sys/sysent.h>
70
#include <sys/vnode.h>
71
72
#include <sys/syslog.h>
#include <sys/eventhandler.h>
73
#include <sys/user.h>
74

75
76
77
78
79
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
80
#include <vm/vm_object.h>
81
82
#include <vm/vm_extern.h>

peter's avatar
peter committed
83
#include <machine/elf.h>
84
85
#include <machine/md_var.h>

86
#define ELF_NOTE_ROUNDSIZE	4
87
88
#define OLD_EI_BRAND	8

89
static int __elfN(check_header)(const Elf_Ehdr *hdr);
90
static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
91
    const char *interp, int interp_name_len, int32_t *osrel);
92
93
static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
    u_long *entry, size_t pagesize);
94
95
96
static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
    caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
    size_t pagesize);
97
static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
98
99
100
static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
    int32_t *osrel);
static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
101
102
static boolean_t __elfN(check_note)(struct image_params *imgp,
    Elf_Brandnote *checknote, int32_t *osrel);
103
104
static vm_prot_t __elfN(trans_prot)(Elf_Word);
static Elf_Word __elfN(untrans_prot)(vm_prot_t);
105

106
107
108
SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
    "");

109
#define	CORE_BUF_SIZE	(16 * 1024)
110

111
112
int __elfN(fallback_brand) = -1;
SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
113
    fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
114
115
    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");

116
static int elf_legacy_coredump = 0;
117
SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 
118
    &elf_legacy_coredump, 0, "");
119

120
121
122
123
124
125
int __elfN(nxstack) =
#if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */
	1;
#else
	0;
#endif
126
127
128
129
SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
    nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");

130
static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
131

132
133
134
135
#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))

136
137
138
139
140
141
142
static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";

Elf_Brandnote __elfN(freebsd_brandnote) = {
	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
	.hdr.n_descsz	= sizeof(int32_t),
	.hdr.n_type	= 1,
	.vendor		= FREEBSD_ABI_VENDOR,
143
144
	.flags		= BN_TRANSLATE_OSREL,
	.trans_osrel	= __elfN(freebsd_trans_osrel)
145
146
};

147
148
149
150
151
152
static boolean_t
__elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
{
	uintptr_t p;

	p = (uintptr_t)(note + 1);
153
	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
	*osrel = *(const int32_t *)(p);

	return (TRUE);
}

static const char GNU_ABI_VENDOR[] = "GNU";
static int GNU_KFREEBSD_ABI_DESC = 3;

Elf_Brandnote __elfN(kfreebsd_brandnote) = {
	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
	.hdr.n_descsz	= 16,	/* XXX at least 16 */
	.hdr.n_type	= 1,
	.vendor		= GNU_ABI_VENDOR,
	.flags		= BN_TRANSLATE_OSREL,
	.trans_osrel	= kfreebsd_trans_osrel
};

static boolean_t
kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
{
	const Elf32_Word *desc;
	uintptr_t p;

	p = (uintptr_t)(note + 1);
178
	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
179
180
181
182
183
184
185
186
187
188
189
190
191
192

	desc = (const Elf32_Word *)p;
	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
		return (FALSE);

	/*
	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
	 */
	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];

	return (TRUE);
}

193
int
194
__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
195
196
197
{
	int i;

198
	for (i = 0; i < MAX_BRANDS; i++) {
199
200
		if (elf_brand_list[i] == NULL) {
			elf_brand_list[i] = entry;
201
202
203
			break;
		}
	}
204
205
206
	if (i == MAX_BRANDS) {
		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
			__func__, entry);
jake's avatar
jake committed
207
		return (-1);
208
	}
jake's avatar
jake committed
209
	return (0);
210
211
212
}

int
213
__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
214
215
216
{
	int i;

217
	for (i = 0; i < MAX_BRANDS; i++) {
218
219
		if (elf_brand_list[i] == entry) {
			elf_brand_list[i] = NULL;
220
221
222
			break;
		}
	}
223
	if (i == MAX_BRANDS)
jake's avatar
jake committed
224
225
		return (-1);
	return (0);
226
227
}

228
int
229
__elfN(brand_inuse)(Elf_Brandinfo *entry)
230
231
{
	struct proc *p;
232
	int rval = FALSE;
233

234
	sx_slock(&allproc_lock);
235
	FOREACH_PROC_IN_SYSTEM(p) {
236
237
238
239
		if (p->p_sysent == entry->sysvec) {
			rval = TRUE;
			break;
		}
240
	}
241
	sx_sunlock(&allproc_lock);
242

243
	return (rval);
244
245
}

246
static Elf_Brandinfo *
247
__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
248
    int interp_name_len, int32_t *osrel)
249
{
250
	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
251
	Elf_Brandinfo *bi;
252
	boolean_t ret;
253
254
255
	int i;

	/*
256
	 * We support four types of branding -- (1) the ELF EI_OSABI field
257
	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
258
259
	 * branding w/in the ELF header, (3) path of the `interp_path'
	 * field, and (4) the ".note.ABI-tag" ELF section.
260
261
	 */

262
263
264
	/* Look for an ".note.ABI-tag" ELF section */
	for (i = 0; i < MAX_BRANDS; i++) {
		bi = elf_brand_list[i];
265
266
267
268
		if (bi == NULL)
			continue;
		if (hdr->e_machine == bi->machine && (bi->flags &
		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
269
270
271
272
273
274
			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
			if (ret)
				return (bi);
		}
	}

275
276
277
	/* If the executable has a brand, search for it in the brand list. */
	for (i = 0; i < MAX_BRANDS; i++) {
		bi = elf_brand_list[i];
278
279
280
		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
			continue;
		if (hdr->e_machine == bi->machine &&
281
282
283
284
285
286
		    (hdr->e_ident[EI_OSABI] == bi->brand ||
		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
			return (bi);
	}

287
288
289
290
291
292
293
294
295
296
297
298
299
	/* No known brand, see if the header is recognized by any brand */
	for (i = 0; i < MAX_BRANDS; i++) {
		bi = elf_brand_list[i];
		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY ||
		    bi->header_supported == NULL)
			continue;
		if (hdr->e_machine == bi->machine) {
			ret = bi->header_supported(imgp);
			if (ret)
				return (bi);
		}
	}

300
301
302
303
	/* Lacking a known brand, search for a recognized interpreter. */
	if (interp != NULL) {
		for (i = 0; i < MAX_BRANDS; i++) {
			bi = elf_brand_list[i];
304
305
306
			if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
				continue;
			if (hdr->e_machine == bi->machine &&
307
308
309
310
			    /* ELF image p_filesz includes terminating zero */
			    strlen(bi->interp_path) + 1 == interp_name_len &&
			    strncmp(interp, bi->interp_path, interp_name_len)
			    == 0)
311
312
313
314
315
316
317
				return (bi);
		}
	}

	/* Lacking a recognized interpreter, try the default brand */
	for (i = 0; i < MAX_BRANDS; i++) {
		bi = elf_brand_list[i];
318
319
320
		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
			continue;
		if (hdr->e_machine == bi->machine &&
321
		    __elfN(fallback_brand) == bi->brand)
322
323
324
325
326
			return (bi);
	}
	return (NULL);
}

327
static int
328
__elfN(check_header)(const Elf_Ehdr *hdr)
329
{
330
	Elf_Brandinfo *bi;
331
332
	int i;

peter's avatar
peter committed
333
334
335
	if (!IS_ELF(*hdr) ||
	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
336
337
338
	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
	    hdr->e_version != ELF_TARG_VER)
jake's avatar
jake committed
339
		return (ENOEXEC);
340

341
342
343
344
	/*
	 * Make sure we have at least one brand for this machine.
	 */

345
	for (i = 0; i < MAX_BRANDS; i++) {
346
347
		bi = elf_brand_list[i];
		if (bi != NULL && bi->machine == hdr->e_machine)
348
349
350
			break;
	}
	if (i == MAX_BRANDS)
jake's avatar
jake committed
351
		return (ENOEXEC);
352

jake's avatar
jake committed
353
	return (0);
354
355
356
}

static int
357
__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
358
    vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t maxprot)
359
{
360
361
	struct sf_buf *sf;
	int error;
362
363
364
365
366
367
	vm_offset_t off;

	/*
	 * Create the page if it doesn't exist yet. Ignore errors.
	 */
	vm_map_lock(map);
368
369
	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
	    VM_PROT_ALL, VM_PROT_ALL, 0);
370
371
372
373
374
375
	vm_map_unlock(map);

	/*
	 * Find the page from the underlying object.
	 */
	if (object) {
376
377
378
		sf = vm_imgact_map_page(object, offset);
		if (sf == NULL)
			return (KERN_FAILURE);
379
		off = offset - trunc_page(offset);
380
		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
381
		    end - start);
attilio's avatar
attilio committed
382
		vm_imgact_unmap_page(sf);
383
		if (error) {
jake's avatar
jake committed
384
			return (KERN_FAILURE);
385
386
387
		}
	}

jake's avatar
jake committed
388
	return (KERN_SUCCESS);
389
390
391
392
}

static int
__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
393
394
    vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t maxprot,
    int cow)
395
{
396
397
	struct sf_buf *sf;
	vm_offset_t off;
398
399
	vm_size_t sz;
	int error, rv;
400
401

	if (start != trunc_page(start)) {
jake's avatar
jake committed
402
		rv = __elfN(map_partial)(map, object, offset, start,
403
		    round_page(start), prot, maxprot);
404
		if (rv)
jake's avatar
jake committed
405
			return (rv);
406
407
408
409
		offset += round_page(start) - start;
		start = round_page(start);
	}
	if (end != round_page(end)) {
jake's avatar
jake committed
410
		rv = __elfN(map_partial)(map, object, offset +
411
412
		    trunc_page(end) - start, trunc_page(end), end, prot,
		    maxprot);
413
		if (rv)
jake's avatar
jake committed
414
			return (rv);
415
416
417
418
419
420
421
422
		end = trunc_page(end);
	}
	if (end > start) {
		if (offset & PAGE_MASK) {
			/*
			 * The mapping is not page aligned. This means we have
			 * to copy the data. Sigh.
			 */
423
			rv = vm_map_find(map, NULL, 0, &start, end - start, 0,
424
			    VMFS_NO_SPACE, prot | VM_PROT_WRITE, maxprot, 0);
425
			if (rv)
jake's avatar
jake committed
426
				return (rv);
427
428
429
430
431
432
			if (object == NULL)
				return (KERN_SUCCESS);
			for (; start < end; start += sz) {
				sf = vm_imgact_map_page(object, offset);
				if (sf == NULL)
					return (KERN_FAILURE);
433
434
				off = offset - trunc_page(offset);
				sz = end - start;
435
436
437
				if (sz > PAGE_SIZE - off)
					sz = PAGE_SIZE - off;
				error = copyout((caddr_t)sf_buf_kva(sf) + off,
jake's avatar
jake committed
438
				    (caddr_t)start, sz);
attilio's avatar
attilio committed
439
				vm_imgact_unmap_page(sf);
440
				if (error) {
jake's avatar
jake committed
441
					return (KERN_FAILURE);
442
				}
443
				offset += sz;
444
445
446
			}
			rv = KERN_SUCCESS;
		} else {
447
			vm_object_reference(object);
448
			vm_map_lock(map);
449
			rv = vm_map_insert(map, object, offset, start, end,
450
			    prot, maxprot, cow);
451
			vm_map_unlock(map);
452
453
			if (rv != KERN_SUCCESS)
				vm_object_deallocate(object);
454
		}
jake's avatar
jake committed
455
		return (rv);
456
	} else {
jake's avatar
jake committed
457
		return (KERN_SUCCESS);
458
459
460
461
	}
}

static int
462
463
464
__elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
    caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
    size_t pagesize)
465
{
466
	struct sf_buf *sf;
467
	size_t map_len;
468
469
	vm_map_t map;
	vm_object_t object;
470
	vm_offset_t map_addr;
471
	int error, rv, cow;
472
	size_t copy_len;
peter's avatar
peter committed
473
474
	vm_offset_t file_addr;

475
476
477
478
479
480
481
482
483
	/*
	 * It's necessary to fail if the filsz + offset taken from the
	 * header is greater than the actual file pager object's size.
	 * If we were to allow this, then the vm_map_find() below would
	 * walk right off the end of the file object and into the ether.
	 *
	 * While I'm here, might as well check for something else that
	 * is invalid: filsz cannot be greater than memsz.
	 */
484
	if ((off_t)filsz + offset > imgp->attr->va_size || filsz > memsz) {
485
486
487
488
		uprintf("elf_load_section: truncated ELF file\n");
		return (ENOEXEC);
	}

489
490
	object = imgp->object;
	map = &imgp->proc->p_vmspace->vm_map;
491
492
	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
	file_addr = trunc_page_ps(offset, pagesize);
493

peter's avatar
peter committed
494
495
496
497
498
499
	/*
	 * We have two choices.  We can either clear the data in the last page
	 * of an oversized mapping, or we can start the anon mapping a page
	 * early and copy the initialized data into that first page.  We
	 * choose the second..
	 */
500
	if (memsz > filsz)
501
		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
502
	else
503
		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
peter's avatar
peter committed
504
505

	if (map_len != 0) {
506
507
508
509
		/* cow flags: don't dump readonly sections in core */
		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);

510
		rv = __elfN(map_insert)(map,
peter's avatar
peter committed
511
512
513
514
515
				      object,
				      file_addr,	/* file offset */
				      map_addr,		/* virtual start */
				      map_addr + map_len,/* virtual end */
				      prot,
516
				      VM_PROT_ALL,
517
				      cow);
518
		if (rv != KERN_SUCCESS)
jake's avatar
jake committed
519
			return (EINVAL);
peter's avatar
peter committed
520
521

		/* we can stop now if we've covered it all */
522
		if (memsz == filsz) {
jake's avatar
jake committed
523
			return (0);
524
		}
peter's avatar
peter committed
525
	}
526
527
528


	/*
peter's avatar
peter committed
529
530
531
532
	 * We have to get the remaining bit of the file into the first part
	 * of the oversized map segment.  This is normally because the .data
	 * segment in the file is extended to provide bss.  It's a neat idea
	 * to try and save a page, but it's a pain in the behind to implement.
533
	 */
534
535
	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
536
537
	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
	    map_addr;
538

peter's avatar
peter committed
539
	/* This had damn well better be true! */
540
	if (map_len != 0) {
541
		rv = __elfN(map_insert)(map, NULL, 0, map_addr, map_addr +
542
		    map_len, VM_PROT_ALL, VM_PROT_ALL, 0);
543
		if (rv != KERN_SUCCESS) {
jake's avatar
jake committed
544
			return (EINVAL);
mp's avatar
mp committed
545
		}
546
	}
547

peter's avatar
peter committed
548
	if (copy_len != 0) {
549
		vm_offset_t off;
550
551
552
553

		sf = vm_imgact_map_page(object, offset + filsz);
		if (sf == NULL)
			return (EIO);
peter's avatar
peter committed
554
555

		/* send the page fragment to user space */
jake's avatar
jake committed
556
557
		off = trunc_page_ps(offset + filsz, pagesize) -
		    trunc_page(offset + filsz);
558
559
		error = copyout((caddr_t)sf_buf_kva(sf) + off,
		    (caddr_t)map_addr, copy_len);
attilio's avatar
attilio committed
560
		vm_imgact_unmap_page(sf);
561
		if (error) {
peter's avatar
peter committed
562
			return (error);
563
		}
peter's avatar
peter committed
564
	}
565
566

	/*
567
568
	 * set it to the specified protection.
	 * XXX had better undo the damage from pasting over the cracks here!
569
	 */
570
571
	vm_map_protect(map, trunc_page(map_addr), round_page(map_addr +
	    map_len), prot, FALSE);
572

573
	return (0);
574
575
}

576
577
578
579
580
581
582
583
584
585
586
587
/*
 * Load the file "file" into memory.  It may be either a shared object
 * or an executable.
 *
 * The "addr" reference parameter is in/out.  On entry, it specifies
 * the address where a shared object should be loaded.  If the file is
 * an executable, this value is ignored.  On exit, "addr" specifies
 * where the file was actually loaded.
 *
 * The "entry" reference parameter is out only.  On exit, it specifies
 * the entry point for the loaded file.
 */
588
static int
589
590
__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
	u_long *entry, size_t pagesize)
591
{
mp's avatar
mp committed
592
593
594
595
596
	struct {
		struct nameidata nd;
		struct vattr attr;
		struct image_params image_params;
	} *tempdata;
597
598
	const Elf_Ehdr *hdr = NULL;
	const Elf_Phdr *phdr = NULL;
mp's avatar
mp committed
599
600
601
	struct nameidata *nd;
	struct vattr *attr;
	struct image_params *imgp;
peter's avatar
peter committed
602
	vm_prot_t prot;
603
604
	u_long rbase;
	u_long base_addr = 0;
605
	int error, i, numsegs;
606

607
608
609
610
611
612
613
614
615
#ifdef CAPABILITY_MODE
	/*
	 * XXXJA: This check can go away once we are sufficiently confident
	 * that the checks in namei() are correct.
	 */
	if (IN_CAPABILITY_MODE(curthread))
		return (ECAPMODE);
#endif

616
	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
mp's avatar
mp committed
617
618
619
620
	nd = &tempdata->nd;
	attr = &tempdata->attr;
	imgp = &tempdata->image_params;

621
622
623
624
	/*
	 * Initialize part of the common data
	 */
	imgp->proc = p;
mp's avatar
mp committed
625
	imgp->attr = attr;
626
	imgp->firstpage = NULL;
627
	imgp->image_header = NULL;
jeff's avatar
jeff committed
628
	imgp->object = NULL;
629
	imgp->execlabel = NULL;
630

631
	NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread);
mp's avatar
mp committed
632
633
	if ((error = namei(nd)) != 0) {
		nd->ni_vp = NULL;
634
635
		goto fail;
	}
mp's avatar
mp committed
636
637
	NDFREE(nd, NDF_ONLY_PNBUF);
	imgp->vp = nd->ni_vp;
638

639
640
641
	/*
	 * Check permissions, modes, uid, etc on the file, and "open" it.
	 */
642
	error = exec_check_permissions(imgp);
643
	if (error)
644
		goto fail;
645

646
	error = exec_map_first_page(imgp);
647
648
649
	if (error)
		goto fail;

650
651
	/*
	 * Also make certain that the interpreter stays the same, so set
652
	 * its VV_TEXT flag, too.
653
	 */
654
	VOP_SET_TEXT(nd->ni_vp);
655

656
	imgp->object = nd->ni_vp->v_object;
657

658
	hdr = (const Elf_Ehdr *)imgp->image_header;
659
	if ((error = __elfN(check_header)(hdr)) != 0)
660
		goto fail;
661
	if (hdr->e_type == ET_DYN)
662
		rbase = *addr;
663
	else if (hdr->e_type == ET_EXEC)
664
		rbase = 0;
665
	else {
666
667
668
		error = ENOEXEC;
		goto fail;
	}
669

670
	/* Only support headers that fit within first page for now      */
peter's avatar
peter committed
671
	if ((hdr->e_phoff > PAGE_SIZE) ||
672
	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
673
674
675
676
		error = ENOEXEC;
		goto fail;
	}

677
	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
678
679
680
681
	if (!aligned(phdr, Elf_Addr)) {
		error = ENOEXEC;
		goto fail;
	}
682

683
	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
kib's avatar
kib committed
684
685
		if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
			/* Loadable segment */
686
			prot = __elfN(trans_prot)(phdr[i].p_flags);
687
			error = __elfN(load_section)(imgp, phdr[i].p_offset,
jake's avatar
jake committed
688
			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
689
690
			    phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize);
			if (error != 0)
691
692
				goto fail;
			/*
693
694
			 * Establish the base address if this is the
			 * first segment.
695
			 */
696
			if (numsegs == 0)
697
698
  				base_addr = trunc_page(phdr[i].p_vaddr +
				    rbase);
699
			numsegs++;
700
701
		}
	}
702
	*addr = base_addr;
703
	*entry = (unsigned long)hdr->e_entry + rbase;
704
705

fail:
706
707
	if (imgp->firstpage)
		exec_unmap_first_page(imgp);
jeff's avatar
jeff committed
708

mp's avatar
mp committed
709
	if (nd->ni_vp)
710
		vput(nd->ni_vp);
mp's avatar
mp committed
711
712

	free(tempdata, M_TEMP);
713

jake's avatar
jake committed
714
	return (error);
715
716
}

eivind's avatar
eivind committed
717
static int
718
__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
719
{
720
	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
721
	const Elf_Phdr *phdr;
722
	Elf_Auxargs *elf_auxargs;
723
	struct vmspace *vmspace;
peter's avatar
peter committed
724
	vm_prot_t prot;
725
	u_long text_size = 0, data_size = 0, total_size = 0;
726
	u_long text_addr = 0, data_addr = 0;
727
	u_long seg_size, seg_addr;
728
	u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
729
	int32_t osrel = 0;
730
	int error = 0, i, n, interp_name_len = 0;
731
	const char *interp = NULL, *newinterp = NULL;
732
	Elf_Brandinfo *brand_info;
mp's avatar
mp committed
733
	char *path;
734
	struct sysentvec *sv;
735
736
737

	/*
	 * Do we have a valid ELF header ?
738
739
740
	 *
	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
	 * if particular brand doesn't support it.
741
	 */
742
743
	if (__elfN(check_header)(hdr) != 0 ||
	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
jake's avatar
jake committed
744
		return (-1);
745
746
747
748
749
750
751

	/*
	 * From here on down, we return an errno, not -1, as we've
	 * detected an ELF file.
	 */

	if ((hdr->e_phoff > PAGE_SIZE) ||
752
	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
753
		/* Only support headers in first page for now */
jake's avatar
jake committed
754
		return (ENOEXEC);
755
	}
756
	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
757
758
	if (!aligned(phdr, Elf_Addr))
		return (ENOEXEC);
759
760
	n = 0;
	baddr = 0;
761
	for (i = 0; i < hdr->e_phnum; i++) {
762
763
		switch (phdr[i].p_type) {
		case PT_LOAD:
764
765
766
			if (n == 0)
				baddr = phdr[i].p_vaddr;
			n++;
767
768
			break;
		case PT_INTERP:
769
			/* Path to interpreter */
770
			if (phdr[i].p_filesz > MAXPATHLEN ||
771
772
			    phdr[i].p_offset > PAGE_SIZE ||
			    phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset)
773
				return (ENOEXEC);
774
			interp = imgp->image_header + phdr[i].p_offset;
775
			interp_name_len = phdr[i].p_filesz;
776
777
778
779
780
			break;
		case PT_GNU_STACK:
			if (__elfN(nxstack))
				imgp->stack_prot =
				    __elfN(trans_prot)(phdr[i].p_flags);
781
			imgp->stack_sz = phdr[i].p_memsz;
782
			break;
783
784
785
		}
	}

786
787
	brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
	    &osrel);
788
789
790
	if (brand_info == NULL) {
		uprintf("ELF binary type \"%u\" not known.\n",
		    hdr->e_ident[EI_OSABI]);
791
		return (ENOEXEC);
792
	}
793
794
795
	if (hdr->e_type == ET_DYN) {
		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
			return (ENOEXEC);
796
	}
797
	sv = brand_info->sysvec;
798
	if (interp != NULL && brand_info->interp_newpath != NULL)
799
		newinterp = brand_info->interp_newpath;
800

801
802
803
804
805
806
807
	/*
	 * Avoid a possible deadlock if the current address space is destroyed
	 * and that address space maps the locked vnode.  In the common case,
	 * the locked vnode's v_usecount is decremented but remains greater
	 * than zero.  Consequently, the vnode lock is not needed by vrele().
	 * However, in cases where the vnode lock is external, such as nullfs,
	 * v_usecount may become zero.
808
809
810
	 *
	 * The VV_TEXT flag prevents modifications to the executable while
	 * the vnode is unlocked.
811
	 */
812
	VOP_UNLOCK(imgp->vp, 0);
813

814
	error = exec_new_vmspace(imgp, sv);
815
	imgp->proc->p_sysent = sv;
816

817
	et_dyn_addr = 0;
818
819
820
821
822
823
824
825
	if (hdr->e_type == ET_DYN) {
		/*
		 * Honour the base load address from the dso if it is
		 * non-zero for some reason.
		 */
		if (baddr == 0) {
			et_dyn_addr = ET_DYN_LOAD_ADDR;
#ifdef PAX_ASLR
826
			pax_aslr_execbase(imgp->proc, &et_dyn_addr);
827
#endif
828
829
		}
	}
830

831
	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
832
833
	if (error)
		return (error);
834

835
	for (i = 0; i < hdr->e_phnum; i++) {
836
		switch (phdr[i].p_type) {
837
		case PT_LOAD:	/* Loadable segment */
kib's avatar
kib committed
838
839
			if (phdr[i].p_memsz == 0)
				break;
840
			prot = __elfN(trans_prot)(phdr[i].p_flags);
841
			error = __elfN(load_section)(imgp, phdr[i].p_offset,
842
			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
jake's avatar
jake committed
843
			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
844
845
			    sv->sv_pagesize);
			if (error != 0)
846
				return (error);
847

848
849
850
851
852
853
854
855
856
			/*
			 * If this segment contains the program headers,
			 * remember their virtual address for the AT_PHDR
			 * aux entry. Static binaries don't usually include
			 * a PT_PHDR entry.
			 */
			if (phdr[i].p_offset == 0 &&
			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
				<= phdr[i].p_filesz)
857
858
				proghdr = phdr[i].p_vaddr + hdr->e_phoff +
				    et_dyn_addr;
859

860
			seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
861
			seg_size = round_page(phdr[i].p_memsz +
862
			    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
863
864

			/*
865
866
			 * Make the largest executable segment the official
			 * text segment and all others data.
867
868
869
870
871
872
			 *
			 * Note that obreak() assumes that data_addr + 
			 * data_size == end of data load area, and the ELF
			 * file format expects segments to be sorted by
			 * address.  If multiple data segments exist, the
			 * last one will be used.
873
			 */
874
875

			if (phdr[i].p_flags & PF_X && text_size < seg_size) {
876
877
878
				text_size = seg_size;
				text_addr = seg_addr;
			} else {
879
880
				data_size = seg_size;
				data_addr = seg_addr;
881
			}
dillon's avatar