elfextract: use libmd sha interfaces on unleashed
[unleashed-pkg5.git] / src / modules / elfextract.c
blob49beca50981df442af862abcf73e7880685e7c74
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
26 #include <libelf.h>
27 #include <gelf.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <sys/uio.h>
32 #include <fcntl.h>
33 #include <unistd.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <strings.h>
37 #include <string.h>
38 #include <netinet/in.h>
39 #include <inttypes.h>
40 #if (defined(__SVR4) && defined(__sun)) || defined(__unleashed__)
41 /* Solaris has built-in SHA-1 and SHA-2 library interfaces */
42 #include <sha1.h>
43 #include <sha2.h>
44 #else
46 * All others can use OpenSSL, but OpenSSL's method signatures
47 * are slightly different
49 #include <openssl/sha.h>
50 #define SHA1_CTX SHA_CTX
51 #define SHA1Update SHA1_Update
52 #define SHA1Init SHA1_Init
53 #define SHA1Final SHA1_Final
54 #endif
56 #include <liblist.h>
57 #include <elfextract.h>
59 char *
60 pkg_string_from_type(int type)
62 switch (type) {
63 case ET_EXEC:
64 return ("exe");
65 case ET_DYN:
66 return ("so");
67 case ET_CORE:
68 return ("core");
69 case ET_REL:
70 return ("rel");
71 default:
72 return ("other");
76 char *
77 pkg_string_from_arch(int arch)
79 switch (arch) {
80 case EM_NONE:
81 return ("none");
82 case EM_SPARC:
83 case EM_SPARC32PLUS:
84 case EM_SPARCV9:
85 return ("sparc");
86 case EM_386:
87 #if defined(__SVR4) && defined(__sun)
88 /* Solaris calls x86_64 "amd64", and recognizes 486 */
89 case EM_486:
90 case EM_AMD64:
91 #else
92 case EM_X86_64:
93 #endif
94 return ("i386");
95 case EM_PPC:
96 case EM_PPC64:
97 return ("ppc");
98 default:
99 return ("other");
103 char *
104 pkg_string_from_data(int data)
106 switch (data) {
107 case ELFDATA2LSB:
108 return ("lsb");
109 case ELFDATA2MSB:
110 return ("msb");
111 default:
112 return ("unknown");
116 char *
117 pkg_string_from_osabi(int osabi)
119 switch (osabi) {
120 case ELFOSABI_NONE:
121 /* case ELFOSABI_SYSV: */
122 return ("none");
123 case ELFOSABI_LINUX:
124 return ("linux");
125 case ELFOSABI_SOLARIS:
126 return ("solaris");
127 default:
128 return ("other");
132 static char *
133 getident(int fd)
135 char *id = NULL;
137 if ((id = malloc(EI_NIDENT)) == NULL) {
138 (void) PyErr_NoMemory();
139 return (NULL);
142 if (lseek(fd, 0, SEEK_SET) == -1) {
143 PyErr_SetFromErrno(PyExc_IOError);
144 free(id);
145 return (NULL);
148 if (read(fd, id, EI_NIDENT) < 0) {
149 PyErr_SetFromErrno(PyExc_IOError);
150 free(id);
151 return (NULL);
154 return (id);
158 iself(int fd)
160 char *ident;
162 if (!(ident = getident(fd)))
163 return (-1);
165 if (strncmp(ident, ELFMAG, strlen(ELFMAG)) == 0) {
166 free(ident);
167 return (1);
170 free(ident);
171 return (0);
175 iself32(int fd)
177 char *ident = NULL;
179 if (!(ident = getident(fd)))
180 return (-1);
182 if (ident[EI_CLASS] == ELFCLASS32) {
183 free(ident);
184 return (1);
187 free(ident);
188 return (0);
191 static GElf_Ehdr *
192 gethead(Elf *elf)
194 GElf_Ehdr *hdr;
196 if (!elf) {
197 PyErr_SetString(PyExc_ValueError,
198 "elf.so`gethead: argument 'elf' must not be NULL");
199 return (NULL);
202 if ((hdr = malloc(sizeof (GElf_Ehdr))) == NULL) {
203 (void) PyErr_NoMemory();
204 return (NULL);
207 if (gelf_getehdr(elf, hdr) == 0) {
208 PyErr_SetString(ElfError, elf_errmsg(-1));
209 free(hdr);
210 return (NULL);
213 return (hdr);
216 hdrinfo_t *
217 getheaderinfo(int fd)
219 Elf *elf;
220 GElf_Ehdr *hdr;
221 hdrinfo_t *hi;
223 if ((hi = malloc(sizeof (hdrinfo_t))) == NULL) {
224 (void) PyErr_NoMemory();
225 return (NULL);
228 if (elf_version(EV_CURRENT) == EV_NONE) {
229 PyErr_SetString(ElfError, elf_errmsg(-1));
230 free(hi);
231 return (NULL);
234 if (!(elf = elf_begin(fd, ELF_C_READ, NULL))) {
235 PyErr_SetString(ElfError, elf_errmsg(-1));
236 free(hi);
237 return (NULL);
240 if (!(hdr = gethead(elf))) {
241 (void) elf_end(elf);
242 free(hi);
243 return (NULL);
246 hi->type = hdr->e_type;
247 hi->bits = hdr->e_ident[EI_CLASS] == ELFCLASS32 ? 32 : 64;
248 hi->arch = hdr->e_machine;
249 hi->data = hdr->e_ident[EI_DATA];
250 hi->osabi = hdr->e_ident[EI_OSABI];
251 free(hdr);
253 (void) elf_end(elf);
255 return (hi);
259 * For ELF nontriviality: Need to turn an ELF object into a unique hash.
261 * From Eric Saxe's investigations, we see that the following sections can
262 * generally be ignored:
264 * .SUNW_signature, .comment, .SUNW_dof, .debug, .plt, .rela.bss,
265 * .rela.plt, .line, .note
267 * Conversely, the following sections are generally significant:
269 * .rodata.str1.8, .rodata.str1.1, .rodata, .data1, .data, .text
271 * Accordingly, we will hash on the latter group of sections to determine our
272 * ELF hash.
274 static int
275 hashsection(char *name)
277 if (strcmp(name, ".SUNW_signature") == 0 ||
278 strcmp(name, ".comment") == 0 ||
279 strcmp(name, ".SUNW_dof") == 0 ||
280 strcmp(name, ".debug") == 0 ||
281 strcmp(name, ".plt") == 0 ||
282 strcmp(name, ".rela.bss") == 0 ||
283 strcmp(name, ".rela.plt") == 0 ||
284 strcmp(name, ".line") == 0 ||
285 strcmp(name, ".note") == 0 ||
286 strcmp(name, ".compcom") == 0)
287 return (0);
289 return (1);
293 * Reads a section in 64k increments, adding it to the hash.
295 static int
296 readhash(int fd, SHA1_CTX *shc, SHA256_CTX *shc2, off_t offset, off_t size,
297 int sha1, int sha256)
299 off_t n;
300 char hashbuf[64 * 1024];
301 ssize_t rbytes;
303 if (!size)
304 return (0);
306 if (lseek(fd, offset, SEEK_SET) == -1) {
307 PyErr_SetFromErrno(PyExc_IOError);
308 return (-1);
311 do {
312 n = MIN(size, sizeof (hashbuf));
313 if ((rbytes = read(fd, hashbuf, n)) == -1) {
314 PyErr_SetFromErrno(PyExc_IOError);
315 return (-1);
317 if (sha1 > 0) {
318 SHA1Update(shc, hashbuf, rbytes);
320 if (sha256 > 0) {
321 SHA256Update(shc2, hashbuf, rbytes);
323 size -= rbytes;
324 } while (size != 0);
326 return (0);
330 * getdynamic - returns a struct filled with the
331 * information we want from an ELF file. Returns NULL
332 * if it can't find everything (eg. not ELF file, wrong
333 * class of ELF file).
334 * If sha1 is > 0, we produce an SHA1 hash as part of the returned dictionary.
335 * If sha256 is > 0, we include an SHA2 256 hash in the returned dictionary.
337 dyninfo_t *
338 getdynamic(int fd, int sha1, int sha256)
340 Elf *elf = NULL;
341 Elf_Scn *scn = NULL;
342 GElf_Shdr shdr;
343 Elf_Data *data_dyn = NULL;
344 Elf_Data *data_verneed = NULL, *data_verdef = NULL;
345 GElf_Dyn gd;
347 char *name = NULL;
348 size_t sh_str = 0;
349 size_t vernum = 0, verdefnum = 0;
350 int t = 0, num_dyn = 0, dynstr = -1;
352 SHA1_CTX shc;
353 SHA256_CTX shc2;
354 dyninfo_t *dyn = NULL;
356 liblist_t *deps = NULL;
357 off_t rpath = 0, runpath = 0, def = 0;
359 /* Verneed */
360 int a = 0;
361 char *buf = NULL, *cp = NULL;
362 GElf_Verneed *ev = NULL;
363 GElf_Vernaux *ea = NULL;
364 liblist_t *vers = NULL;
366 GElf_Verdef *vd = NULL;
367 GElf_Verdaux *va = NULL;
368 liblist_t *verdef = NULL;
370 if (elf_version(EV_CURRENT) == EV_NONE) {
371 PyErr_SetString(ElfError, elf_errmsg(-1));
372 return (NULL);
375 if (!(elf = elf_begin(fd, ELF_C_READ, NULL))) {
376 PyErr_SetString(ElfError, elf_errmsg(-1));
377 goto bad;
380 if (!elf_getshstrndx(elf, &sh_str)) {
381 PyErr_SetString(ElfError, elf_errmsg(-1));
382 goto bad;
385 /* get useful sections */
386 if (sha1 > 0) {
387 SHA1Init(&shc);
389 if (sha256 > 0) {
390 SHA256Init(&shc2);
392 while ((scn = elf_nextscn(elf, scn))) {
393 if (gelf_getshdr(scn, &shdr) != &shdr) {
394 PyErr_SetString(ElfError, elf_errmsg(-1));
395 goto bad;
398 if (!(name = elf_strptr(elf, sh_str, shdr.sh_name))) {
399 PyErr_SetString(ElfError, elf_errmsg(-1));
400 goto bad;
403 if (hashsection(name) && (sha1 > 0 || sha256 > 0)) {
404 if (shdr.sh_type == SHT_NOBITS) {
406 * We can't just push shdr.sh_size into
407 * SHA1Update(), as its raw bytes will be
408 * different on x86 than they are on sparc.
409 * Convert to network byte-order first.
411 uint64_t n = shdr.sh_size;
412 uint64_t mask = 0xffffffff00000000ULL;
413 uint32_t top = htonl((uint32_t)((n & mask) >> 32));
414 uint32_t bot = htonl((uint32_t)n);
415 if (sha1 > 0) {
416 SHA1Update(&shc, &top, sizeof (top));
417 SHA1Update(&shc, &bot, sizeof (bot));
419 if (sha256 > 0) {
420 SHA256Update(&shc2, &top, sizeof (top));
421 SHA256Update(&shc2, &bot, sizeof (bot));
423 } else {
424 int hash;
425 hash = readhash(fd, &shc, &shc2, shdr.sh_offset,
426 shdr.sh_size, sha1, sha256);
428 if (hash == -1)
429 goto bad;
433 switch (shdr.sh_type) {
434 case SHT_DYNAMIC:
435 if (!(data_dyn = elf_getdata(scn, NULL))) {
436 PyErr_SetString(ElfError, elf_errmsg(-1));
437 goto bad;
440 num_dyn = shdr.sh_size / shdr.sh_entsize;
441 dynstr = shdr.sh_link;
442 break;
444 #ifdef SHT_SUNW_verdef
445 case SHT_SUNW_verdef:
446 #else
447 case SHT_GNU_verdef:
448 #endif
449 if (!(data_verdef = elf_getdata(scn, NULL))) {
450 PyErr_SetString(ElfError, elf_errmsg(-1));
451 goto bad;
454 verdefnum = shdr.sh_info;
455 break;
457 #ifdef SHT_SUNW_verneed
458 case SHT_SUNW_verneed:
459 #else
460 case SHT_GNU_verneed:
461 #endif
462 if (!(data_verneed = elf_getdata(scn, NULL))) {
463 PyErr_SetString(ElfError, elf_errmsg(-1));
464 goto bad;
467 vernum = shdr.sh_info;
468 break;
472 /* Dynamic but no string table? */
473 if (data_dyn && dynstr < 0) {
474 PyErr_SetString(ElfError,
475 "bad elf: didn't find the dynamic duo");
476 goto bad;
479 /* Parse dynamic section */
480 if (!(deps = liblist_alloc()))
481 goto bad;
483 for (t = 0; t < num_dyn; t++) {
484 if (gelf_getdyn(data_dyn, t, &gd) == NULL) {
485 PyErr_SetString(ElfError, elf_errmsg(-1));
486 goto bad;
489 switch (gd.d_tag) {
490 case DT_NEEDED:
491 case DT_FILTER:
492 case DT_SUNW_FILTER:
493 if (liblist_add(deps, gd.d_un.d_val) == NULL)
494 goto bad;
495 break;
496 case DT_RPATH:
497 rpath = gd.d_un.d_val;
498 break;
499 case DT_RUNPATH:
500 runpath = gd.d_un.d_val;
501 break;
502 case DT_POSFLAG_1:
503 if (gd.d_un.d_val & DF_P1_DEFERRED) {
504 t++;
509 /* Runpath supercedes rpath, but use rpath if no runpath */
510 if (!runpath)
511 runpath = rpath;
514 * Finally, get version information for each item in
515 * our dependency list. This part is a little messier,
516 * as it seems that libelf / gelf do not implement this.
518 if (!(vers = liblist_alloc()))
519 goto bad;
521 if (vernum > 0 && data_verneed) {
522 buf = data_verneed->d_buf;
523 cp = buf;
526 for (t = 0; t < vernum; t++) {
527 liblist_t *veraux = NULL;
528 if (ev)
529 cp += ev->vn_next;
530 ev = (GElf_Verneed*)cp;
532 if (!(veraux = liblist_alloc()))
533 goto bad;
535 buf = cp;
537 cp += ev->vn_aux;
539 ea = NULL;
540 for (a = 0; a < ev->vn_cnt; a++) {
541 if (ea)
542 cp += ea->vna_next;
543 ea = (GElf_Vernaux*)cp;
544 if (liblist_add(veraux, ea->vna_name) == NULL)
545 goto bad;
548 if (liblist_add(vers, ev->vn_file) == NULL)
549 goto bad;
550 vers->tail->verlist = veraux;
552 cp = buf;
555 /* Consolidate version and dependency information */
556 if (liblist_foreach(deps, setver_liblist_cb, vers, NULL) == -1)
557 goto bad;
558 liblist_free(vers);
559 vers = NULL;
562 * Now, figure out what versions we provide.
565 if (!(verdef = liblist_alloc()))
566 goto bad;
568 if (verdefnum > 0 && data_verdef) {
569 buf = data_verdef->d_buf;
570 cp = buf;
573 for (t = 0; t < verdefnum; t++) {
574 if (vd)
575 cp += vd->vd_next;
576 vd = (GElf_Verdef*)cp;
578 buf = cp;
579 cp += vd->vd_aux;
581 va = NULL;
582 for (a = 0; a < vd->vd_cnt; a++) {
583 if (va)
584 cp += va->vda_next;
585 va = (GElf_Verdaux*)cp;
586 /* first one is name, rest are versions */
587 if (!def)
588 def = va->vda_name;
589 else if (liblist_add(verdef, va->vda_name) == NULL)
590 goto bad;
593 cp = buf;
596 if ((dyn = malloc(sizeof (dyninfo_t))) == NULL) {
597 (void) PyErr_NoMemory();
598 goto bad;
601 dyn->runpath = runpath;
602 dyn->dynstr = dynstr;
603 dyn->elf = elf;
604 dyn->deps = deps;
605 dyn->def = def;
606 dyn->vers = verdef;
607 if (sha1 > 0) {
608 SHA1Final(dyn->hash, &shc);
610 if (sha256 > 0) {
611 SHA256Final(dyn->hash256, &shc2);
613 return (dyn);
615 bad:
616 if (deps)
617 liblist_free(deps);
618 if (verdef)
619 liblist_free(verdef);
620 if (vers)
621 liblist_free(vers);
622 if (elf)
623 (void) elf_end(elf);
624 return (NULL);
627 void
628 dyninfo_free(dyninfo_t *dyn)
630 liblist_free(dyn->deps);
631 liblist_free(dyn->vers);
632 (void) elf_end(dyn->elf);
633 free(dyn);