NEWS: Add advisories.
[glibc.git] / scripts / glibcelf.py
blob5f3813f326cccb8b7bf9a2e99f7d998bfa605c86
1 #!/usr/bin/python3
2 # ELF support functionality for Python.
3 # Copyright (C) 2022-2024 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
20 """Basic ELF parser.
22 Use Image.readfile(path) to read an ELF file into memory and begin
23 parsing it.
25 """
27 import collections
28 import functools
29 import os
30 import struct
32 import glibcpp
34 class _MetaNamedValue(type):
35 """Used to set up _NamedValue subclasses."""
37 @classmethod
38 def __prepare__(metacls, cls, bases, **kwds):
39 # Indicates an int-based class. Needed for types like Shn.
40 int_based = False
41 for base in bases:
42 if issubclass(base, int):
43 int_based = int
44 break
45 return dict(by_value={},
46 by_name={},
47 prefix=None,
48 _int_based=int_based)
50 def __contains__(self, other):
51 return other in self.by_value
53 class _NamedValue(metaclass=_MetaNamedValue):
54 """Typed, named integer constants.
56 Constants have the following instance attributes:
58 name: The full name of the constant (e.g., "PT_NULL").
59 short_name: The name with of the constant without the prefix ("NULL").
60 value: The integer value of the constant.
62 The following class attributes are available:
64 by_value: A dict mapping integers to constants.
65 by_name: A dict mapping strings to constants.
66 prefix: A string that is removed from the start of short names, or None.
68 """
70 def __new__(cls, arg0, arg1=None):
71 """Instance creation.
73 For the one-argument form, the argument must be a string, an
74 int, or an instance of this class. Strings are looked up via
75 by_name. Values are looked up via by_value; if value lookup
76 fails, a new unnamed instance is returned. Instances of this
77 class a re returned as-is.
79 The two-argument form expects the name (a string) and the
80 value (an integer). A new instance is created in this case.
81 The instance is not registered in the by_value/by_name
82 dictionaries (but the caller can do that).
84 """
86 typ0 = type(arg0)
87 if arg1 is None:
88 if isinstance(typ0, cls):
89 # Re-use the existing object.
90 return arg0
91 if typ0 is int:
92 by_value = cls.by_value
93 try:
94 return by_value[arg0]
95 except KeyError:
96 # Create a new object of the requested value.
97 if cls._int_based:
98 result = int.__new__(cls, arg0)
99 else:
100 result = object.__new__(cls)
101 result.value = arg0
102 result.name = None
103 return result
104 if typ0 is str:
105 by_name = cls.by_name
106 try:
107 return by_name[arg0]
108 except KeyError:
109 raise ValueError('unknown {} constant: {!r}'.format(
110 cls.__name__, arg0))
111 else:
112 # Types for the two-argument form are rigid.
113 if typ0 is not str and typ0 is not None:
114 raise ValueError('type {} of name {!r} should be str'.format(
115 typ0.__name__, arg0))
116 if type(arg1) is not int:
117 raise ValueError('type {} of value {!r} should be int'.format(
118 type(arg1).__name__, arg1))
119 # Create a new named constants.
120 if cls._int_based:
121 result = int.__new__(cls, arg1)
122 else:
123 result = object.__new__(cls)
124 result.value = arg1
125 result.name = arg0
126 # Set up the short_name attribute.
127 prefix = cls.prefix
128 if prefix and arg0.startswith(prefix):
129 result.short_name = arg0[len(prefix):]
130 else:
131 result.short_name = arg0
132 return result
134 def __str__(self):
135 name = self.name
136 if name:
137 return name
138 else:
139 return str(self.value)
141 def __repr__(self):
142 name = self.name
143 if name:
144 return name
145 else:
146 return '{}({})'.format(self.__class__.__name__, self.value)
148 def __setattr__(self, name, value):
149 # Prevent modification of the critical attributes once they
150 # have been set.
151 if name in ('name', 'value', 'short_name') and hasattr(self, name):
152 raise AttributeError('can\'t set attribute {}'.format(name))
153 object.__setattr__(self, name, value)
155 @functools.total_ordering
156 class _TypedConstant(_NamedValue):
157 """Base class for integer-valued optionally named constants.
159 This type is not an integer type.
163 def __eq__(self, other):
164 return isinstance(other, self.__class__) and self.value == other.value
166 def __lt__(self, other):
167 return isinstance(other, self.__class__) and self.value <= other.value
169 def __hash__(self):
170 return hash(self.value)
172 class _IntConstant(_NamedValue, int):
173 """Base class for integer-like optionally named constants.
175 Instances compare equal to the integer of the same value, and can
176 be used in integer arithmetic.
180 pass
182 class _FlagConstant(_TypedConstant, int):
183 pass
185 def _parse_elf_h():
186 """Read ../elf/elf.h and return a dict with the constants in it."""
188 path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
189 '..', 'elf', 'elf.h')
190 class TokenizerReporter:
191 """Report tokenizer errors to standard output."""
193 def __init__(self):
194 self.errors = 0
196 def error(self, token, message):
197 self.errors += 1
198 print('{}:{}:{}: error: {}'.format(
199 path, token.line, token.column, message))
201 reporter = TokenizerReporter()
202 with open(path) as inp:
203 tokens = glibcpp.tokenize_c(inp.read(), reporter)
204 if reporter.errors:
205 raise IOError('parse error in elf.h')
207 class MacroReporter:
208 """Report macro errors to standard output."""
210 def __init__(self):
211 self.errors = 0
213 def error(self, line, message):
214 self.errors += 1
215 print('{}:{}: error: {}'.format(path, line, message))
217 def note(self, line, message):
218 print('{}:{}: note: {}'.format(path, line, message))
220 reporter = MacroReporter()
221 result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
222 if reporter.errors:
223 raise IOError('parse error in elf.h')
225 return result
226 _elf_h = _parse_elf_h()
227 del _parse_elf_h
228 _elf_h_processed = set()
230 def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
231 prefix = prefix or cls.prefix
232 if not prefix:
233 raise ValueError('missing prefix for {}'.format(cls.__name__))
234 by_value = cls.by_value
235 by_name = cls.by_name
236 processed = _elf_h_processed
238 skip = set(skip)
239 skip.add(prefix + 'NUM')
240 if ranges:
241 skip.add(prefix + 'LOOS')
242 skip.add(prefix + 'HIOS')
243 skip.add(prefix + 'LOPROC')
244 skip.add(prefix + 'HIPROC')
245 cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
246 cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
248 # Inherit the prefix from the parent if not set.
249 if parent and cls.prefix is None and parent.prefix is not None:
250 cls.prefix = parent.prefix
252 processed_len_start = len(processed)
253 for name, value in _elf_h.items():
254 if name in skip or name in processed:
255 continue
256 if name.startswith(prefix):
257 processed.add(name)
258 if value in by_value:
259 raise ValueError('duplicate value {}: {}, {}'.format(
260 value, name, by_value[value]))
261 obj = cls(name, value)
262 by_value[value] = obj
263 by_name[name] = obj
264 setattr(cls, name, obj)
265 if parent:
266 # Make the symbolic name available through the parent as well.
267 parent.by_name[name] = obj
268 setattr(parent, name, obj)
270 if len(processed) == processed_len_start:
271 raise ValueError('nothing matched prefix {!r}'.format(prefix))
273 class ElfClass(_TypedConstant):
274 """ELF word size. Type of EI_CLASS values."""
275 _register_elf_h(ElfClass, prefix='ELFCLASS')
277 class ElfData(_TypedConstant):
278 """ELF endianness. Type of EI_DATA values."""
279 _register_elf_h(ElfData, prefix='ELFDATA')
281 class Machine(_TypedConstant):
282 """ELF machine type. Type of values in Ehdr.e_machine field."""
283 prefix = 'EM_'
284 _register_elf_h(Machine, skip=('EM_ARC_A5',))
286 class Et(_TypedConstant):
287 """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
288 prefix = 'ET_'
289 _register_elf_h(Et, ranges=True)
291 class Shn(_IntConstant):
292 """ELF reserved section indices."""
293 prefix = 'SHN_'
294 class ShnMIPS(Shn):
295 """Supplemental SHN_* constants for EM_MIPS."""
296 class ShnPARISC(Shn):
297 """Supplemental SHN_* constants for EM_PARISC."""
298 _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
299 _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
300 _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
302 class Sht(_TypedConstant):
303 """ELF section types. Type of SHT_* values."""
304 prefix = 'SHT_'
305 class ShtALPHA(Sht):
306 """Supplemental SHT_* constants for EM_ALPHA."""
307 class ShtARC(Sht):
308 """Supplemental SHT_* constants for EM_ARC."""
309 class ShtARM(Sht):
310 """Supplemental SHT_* constants for EM_ARM."""
311 class ShtCSKY(Sht):
312 """Supplemental SHT_* constants for EM_CSKY."""
313 class ShtIA_64(Sht):
314 """Supplemental SHT_* constants for EM_IA_64."""
315 class ShtMIPS(Sht):
316 """Supplemental SHT_* constants for EM_MIPS."""
317 class ShtPARISC(Sht):
318 """Supplemental SHT_* constants for EM_PARISC."""
319 class ShtRISCV(Sht):
320 """Supplemental SHT_* constants for EM_RISCV."""
321 _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
322 _register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht)
323 _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
324 _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
325 _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
326 _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
327 _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
328 _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
329 _register_elf_h(Sht, ranges=True,
330 skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
332 class Pf(_FlagConstant):
333 """Program header flags. Type of Phdr.p_flags values."""
334 prefix = 'PF_'
335 class PfARM(Pf):
336 """Supplemental PF_* flags for EM_ARM."""
337 class PfHP(Pf):
338 """Supplemental PF_* flags for HP-UX."""
339 class PfIA_64(Pf):
340 """Supplemental PF_* flags for EM_IA_64."""
341 class PfMIPS(Pf):
342 """Supplemental PF_* flags for EM_MIPS."""
343 class PfPARISC(Pf):
344 """Supplemental PF_* flags for EM_PARISC."""
345 _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
346 _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
347 _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
348 _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
349 _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
350 _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
352 class Shf(_FlagConstant):
353 """Section flags. Type of Shdr.sh_type values."""
354 prefix = 'SHF_'
355 class ShfALPHA(Shf):
356 """Supplemental SHF_* constants for EM_ALPHA."""
357 class ShfARM(Shf):
358 """Supplemental SHF_* constants for EM_ARM."""
359 class ShfIA_64(Shf):
360 """Supplemental SHF_* constants for EM_IA_64."""
361 class ShfMIPS(Shf):
362 """Supplemental SHF_* constants for EM_MIPS."""
363 class ShfPARISC(Shf):
364 """Supplemental SHF_* constants for EM_PARISC."""
365 _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
366 _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
367 _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
368 _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
369 _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
370 _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
372 class Stb(_TypedConstant):
373 """ELF symbol binding type."""
374 prefix = 'STB_'
375 _register_elf_h(Stb, ranges=True)
377 class Stt(_TypedConstant):
378 """ELF symbol type."""
379 prefix = 'STT_'
380 class SttARM(Sht):
381 """Supplemental STT_* constants for EM_ARM."""
382 class SttPARISC(Sht):
383 """Supplemental STT_* constants for EM_PARISC."""
384 class SttSPARC(Sht):
385 """Supplemental STT_* constants for EM_SPARC."""
386 STT_SPARC_REGISTER = 13
387 class SttX86_64(Sht):
388 """Supplemental STT_* constants for EM_X86_64."""
389 _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
390 _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
391 _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
392 _register_elf_h(Stt, ranges=True)
395 class Pt(_TypedConstant):
396 """ELF program header types. Type of Phdr.p_type."""
397 prefix = 'PT_'
398 class PtAARCH64(Pt):
399 """Supplemental PT_* constants for EM_AARCH64."""
400 class PtARM(Pt):
401 """Supplemental PT_* constants for EM_ARM."""
402 class PtHP(Pt):
403 """Supplemental PT_* constants for HP-U."""
404 class PtIA_64(Pt):
405 """Supplemental PT_* constants for EM_IA_64."""
406 class PtMIPS(Pt):
407 """Supplemental PT_* constants for EM_MIPS."""
408 class PtPARISC(Pt):
409 """Supplemental PT_* constants for EM_PARISC."""
410 class PtRISCV(Pt):
411 """Supplemental PT_* constants for EM_RISCV."""
412 _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
413 _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
414 _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
415 _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
416 _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
417 _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
418 _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
419 _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
421 class Dt(_TypedConstant):
422 """ELF dynamic segment tags. Type of Dyn.d_val."""
423 prefix = 'DT_'
424 class DtAARCH64(Dt):
425 """Supplemental DT_* constants for EM_AARCH64."""
426 class DtALPHA(Dt):
427 """Supplemental DT_* constants for EM_ALPHA."""
428 class DtALTERA_NIOS2(Dt):
429 """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
430 class DtIA_64(Dt):
431 """Supplemental DT_* constants for EM_IA_64."""
432 class DtMIPS(Dt):
433 """Supplemental DT_* constants for EM_MIPS."""
434 class DtPPC(Dt):
435 """Supplemental DT_* constants for EM_PPC."""
436 class DtPPC64(Dt):
437 """Supplemental DT_* constants for EM_PPC64."""
438 class DtRISCV(Dt):
439 """Supplemental DT_* constants for EM_RISCV."""
440 class DtSPARC(Dt):
441 """Supplemental DT_* constants for EM_SPARC."""
442 class DtX86_64(Dt):
443 """Supplemental DT_* constants for EM_X86_64."""
444 _dt_skip = '''
445 DT_ENCODING DT_PROCNUM
446 DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
447 DT_VALRNGLO DT_VALRNGHI DT_VALNUM
448 DT_VERSIONTAGNUM DT_EXTRANUM
449 DT_AARCH64_NUM
450 DT_ALPHA_NUM
451 DT_IA_64_NUM
452 DT_MIPS_NUM
453 DT_PPC_NUM
454 DT_PPC64_NUM
455 DT_SPARC_NUM
456 DT_X86_64_NUM
457 '''.strip().split()
458 _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
459 _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
460 _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
461 _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
462 _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
463 _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
464 _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
465 _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
466 _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
467 _register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
468 _register_elf_h(Dt, skip=_dt_skip, ranges=True)
469 del _dt_skip
471 # Constant extraction is complete.
472 del _register_elf_h
473 del _elf_h
475 class StInfo:
476 """ELF symbol binding and type. Type of the Sym.st_info field."""
477 def __init__(self, arg0, arg1=None):
478 if isinstance(arg0, int) and arg1 is None:
479 self.bind = Stb(arg0 >> 4)
480 self.type = Stt(arg0 & 15)
481 else:
482 self.bind = Stb(arg0)
483 self.type = Stt(arg1)
485 def value(self):
486 """Returns the raw value for the bind/type combination."""
487 return (self.bind.value() << 4) | (self.type.value())
489 # Type in an ELF file. Used for deserialization.
490 _Layout = collections.namedtuple('_Layout', 'unpack size')
492 def _define_layouts(baseclass: type, layout32: str, layout64: str,
493 types=None, fields32=None):
494 """Assign variants dict to baseclass.
496 The variants dict is indexed by (ElfClass, ElfData) pairs, and its
497 values are _Layout instances.
500 struct32 = struct.Struct(layout32)
501 struct64 = struct.Struct(layout64)
503 # Check that the struct formats yield the right number of components.
504 for s in (struct32, struct64):
505 example = s.unpack(b' ' * s.size)
506 if len(example) != len(baseclass._fields):
507 raise ValueError('{!r} yields wrong field count: {} != {}'.format(
508 s.format, len(example), len(baseclass._fields)))
510 # Check that field names in types are correct.
511 if types is None:
512 types = ()
513 for n in types:
514 if n not in baseclass._fields:
515 raise ValueError('{} does not have field {!r}'.format(
516 baseclass.__name__, n))
518 if fields32 is not None \
519 and set(fields32) != set(baseclass._fields):
520 raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
521 fields32, baseclass._fields))
523 def unique_name(name, used_names = (set((baseclass.__name__,))
524 | set(baseclass._fields)
525 | {n.__name__
526 for n in (types or {}).values()})):
527 """Find a name that is not used for a class or field name."""
528 candidate = name
529 n = 0
530 while candidate in used_names:
531 n += 1
532 candidate = '{}{}'.format(name, n)
533 used_names.add(candidate)
534 return candidate
536 blob_name = unique_name('blob')
537 struct_unpack_name = unique_name('struct_unpack')
538 comps_name = unique_name('comps')
540 layouts = {}
541 for (bits, elfclass, layout, fields) in (
542 (32, ElfClass.ELFCLASS32, layout32, fields32),
543 (64, ElfClass.ELFCLASS64, layout64, None),
545 for (elfdata, structprefix, funcsuffix) in (
546 (ElfData.ELFDATA2LSB, '<', 'LE'),
547 (ElfData.ELFDATA2MSB, '>', 'BE'),
549 env = {
550 baseclass.__name__: baseclass,
551 struct_unpack_name: struct.unpack,
554 # Add the type converters.
555 if types:
556 for cls in types.values():
557 env[cls.__name__] = cls
559 funcname = ''.join(
560 ('unpack_', baseclass.__name__, str(bits), funcsuffix))
562 code = '''
563 def {funcname}({blob_name}):
564 '''.format(funcname=funcname, blob_name=blob_name)
566 indent = ' ' * 4
567 unpack_call = '{}({!r}, {})'.format(
568 struct_unpack_name, structprefix + layout, blob_name)
569 field_names = ', '.join(baseclass._fields)
570 if types is None and fields is None:
571 code += '{}return {}({})\n'.format(
572 indent, baseclass.__name__, unpack_call)
573 else:
574 # Destructuring tuple assignment.
575 if fields is None:
576 code += '{}{} = {}\n'.format(
577 indent, field_names, unpack_call)
578 else:
579 # Use custom field order.
580 code += '{}{} = {}\n'.format(
581 indent, ', '.join(fields), unpack_call)
583 # Perform the type conversions.
584 for n in baseclass._fields:
585 if n in types:
586 code += '{}{} = {}({})\n'.format(
587 indent, n, types[n].__name__, n)
588 # Create the named tuple.
589 code += '{}return {}({})\n'.format(
590 indent, baseclass.__name__, field_names)
592 exec(code, env)
593 layouts[(elfclass, elfdata)] = _Layout(
594 env[funcname], struct.calcsize(layout))
595 baseclass.layouts = layouts
598 # Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
599 class Ident(collections.namedtuple('Ident',
600 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
602 def __new__(cls, *args):
603 """Construct an object from a blob or its constituent fields."""
604 if len(args) == 1:
605 return cls.unpack(args[0])
606 return cls.__base__.__new__(cls, *args)
608 @staticmethod
609 def unpack(blob: memoryview) -> 'Ident':
610 """Parse raws data into a tuple."""
611 ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
612 ei_pad = struct.unpack('4s5B7s', blob)
613 return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
614 ei_version, ei_osabi, ei_abiversion, ei_pad)
615 size = 16
617 # Corresponds to Elf32_Ehdr and Elf64_Ehdr.
618 Ehdr = collections.namedtuple('Ehdr',
619 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
620 + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
621 _define_layouts(Ehdr,
622 layout32='16s2H5I6H',
623 layout64='16s2HI3QI6H',
624 types=dict(e_ident=Ident,
625 e_machine=Machine,
626 e_type=Et,
627 e_shstrndx=Shn))
629 # Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
630 Phdr = collections.namedtuple('Phdr',
631 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
632 _define_layouts(Phdr,
633 layout32='8I',
634 fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
635 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
636 layout64='2I6Q',
637 types=dict(p_type=Pt, p_flags=Pf))
640 # Corresponds to Elf32_Shdr and Elf64_Shdr.
641 class Shdr(collections.namedtuple('Shdr',
642 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
643 + ' sh_addralign sh_entsize')):
644 def resolve(self, strtab: 'StringTable') -> 'Shdr':
645 """Resolve sh_name using a string table."""
646 return self.__class__(strtab.get(self[0]), *self[1:])
647 _define_layouts(Shdr,
648 layout32='10I',
649 layout64='2I4Q2I2Q',
650 types=dict(sh_type=Sht,
651 sh_flags=Shf,
652 sh_link=Shn))
654 # Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
655 # d_un union is skipped, and d_ptr is missing (its representation in
656 # Python would be identical to d_val).
657 Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
658 _define_layouts(Dyn,
659 layout32='2i',
660 layout64='2q',
661 types=dict(d_tag=Dt))
663 # Corresponds to Elf32_Sym and Elf64_Sym.
664 class Sym(collections.namedtuple('Sym',
665 'st_name st_info st_other st_shndx st_value st_size')):
666 def resolve(self, strtab: 'StringTable') -> 'Sym':
667 """Resolve st_name using a string table."""
668 return self.__class__(strtab.get(self[0]), *self[1:])
669 _define_layouts(Sym,
670 layout32='3I2BH',
671 layout64='I2BH2Q',
672 fields32=('st_name', 'st_value', 'st_size', 'st_info',
673 'st_other', 'st_shndx'),
674 types=dict(st_shndx=Shn,
675 st_info=StInfo))
677 # Corresponds to Elf32_Rel and Elf64_Rel.
678 Rel = collections.namedtuple('Rel', 'r_offset r_info')
679 _define_layouts(Rel,
680 layout32='2I',
681 layout64='2Q')
683 # Corresponds to Elf32_Rel and Elf64_Rel.
684 Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
685 _define_layouts(Rela,
686 layout32='3I',
687 layout64='3Q')
689 class StringTable:
690 """ELF string table."""
691 def __init__(self, blob):
692 """Create a new string table backed by the data in the blob.
694 blob: a memoryview-like object
697 self.blob = blob
699 def get(self, index) -> bytes:
700 """Returns the null-terminated byte string at the index."""
701 blob = self.blob
702 endindex = index
703 while True:
704 if blob[endindex] == 0:
705 return bytes(blob[index:endindex])
706 endindex += 1
708 class Image:
709 """ELF image parser."""
710 def __init__(self, image):
711 """Create an ELF image from binary image data.
713 image: a memoryview-like object that supports efficient range
714 subscripting.
717 self.image = image
718 ident = self.read(Ident, 0)
719 classdata = (ident.ei_class, ident.ei_data)
720 # Set self.Ehdr etc. to the subtypes with the right parsers.
721 for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
722 setattr(self, typ.__name__, typ.layouts.get(classdata, None))
724 if self.Ehdr is not None:
725 self.ehdr = self.read(self.Ehdr, 0)
726 self._shdr_num = self._compute_shdr_num()
727 else:
728 self.ehdr = None
729 self._shdr_num = 0
731 self._section = {}
732 self._stringtab = {}
734 if self._shdr_num > 0:
735 self._shdr_strtab = self._find_shdr_strtab()
736 else:
737 self._shdr_strtab = None
739 @staticmethod
740 def readfile(path: str) -> 'Image':
741 """Reads the ELF file at the specified path."""
742 with open(path, 'rb') as inp:
743 return Image(memoryview(inp.read()))
745 def _compute_shdr_num(self) -> int:
746 """Computes the actual number of section headers."""
747 shnum = self.ehdr.e_shnum
748 if shnum == 0:
749 if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
750 # No section headers.
751 return 0
752 # Otherwise the extension mechanism is used (which may be
753 # needed because e_shnum is just 16 bits).
754 return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
755 return shnum
757 def _find_shdr_strtab(self) -> StringTable:
758 """Finds the section header string table (maybe via extensions)."""
759 shstrndx = self.ehdr.e_shstrndx
760 if shstrndx == Shn.SHN_XINDEX:
761 shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
762 return self._find_stringtab(shstrndx)
764 def read(self, typ: type, offset:int ):
765 """Reads an object at a specific offset.
767 The type must have been enhanced using _define_variants.
770 return typ.unpack(self.image[offset: offset + typ.size])
772 def phdrs(self) -> Phdr:
773 """Generator iterating over the program headers."""
774 if self.ehdr is None:
775 return
776 size = self.ehdr.e_phentsize
777 if size != self.Phdr.size:
778 raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
779 .format(size, self.Phdr.size))
781 offset = self.ehdr.e_phoff
782 for _ in range(self.ehdr.e_phnum):
783 yield self.read(self.Phdr, offset)
784 offset += size
786 def shdrs(self, resolve: bool=True) -> Shdr:
787 """Generator iterating over the section headers.
789 If resolve, section names are automatically translated
790 using the section header string table.
793 if self._shdr_num == 0:
794 return
796 size = self.ehdr.e_shentsize
797 if size != self.Shdr.size:
798 raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
799 .format(size, self.Shdr.size))
801 offset = self.ehdr.e_shoff
802 for _ in range(self._shdr_num):
803 shdr = self.read(self.Shdr, offset)
804 if resolve:
805 shdr = shdr.resolve(self._shdr_strtab)
806 yield shdr
807 offset += size
809 def dynamic(self) -> Dyn:
810 """Generator iterating over the dynamic segment."""
811 for phdr in self.phdrs():
812 if phdr.p_type == Pt.PT_DYNAMIC:
813 # Pick the first dynamic segment, like the loader.
814 if phdr.p_filesz == 0:
815 # Probably separated debuginfo.
816 return
817 offset = phdr.p_offset
818 end = offset + phdr.p_memsz
819 size = self.Dyn.size
820 while True:
821 next_offset = offset + size
822 if next_offset > end:
823 raise ValueError(
824 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
825 phdr.p_memsz, size))
826 yield self.read(self.Dyn, offset)
827 if next_offset == end:
828 return
829 offset = next_offset
831 def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
832 """A generator iterating over a symbol table.
834 If resolve, symbol names are automatically translated using
835 the string table for the symbol table.
838 assert shdr.sh_type == Sht.SHT_SYMTAB
839 size = shdr.sh_entsize
840 if size != self.Sym.size:
841 raise ValueError('Invalid symbol table entry size {}'.format(size))
842 offset = shdr.sh_offset
843 end = shdr.sh_offset + shdr.sh_size
844 if resolve:
845 strtab = self._find_stringtab(shdr.sh_link)
846 while offset < end:
847 sym = self.read(self.Sym, offset)
848 if resolve:
849 sym = sym.resolve(strtab)
850 yield sym
851 offset += size
852 if offset != end:
853 raise ValueError('Symbol table is not a multiple of entry size')
855 def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
856 """Looks up a string in a string table identified by its link index."""
857 try:
858 strtab = self._stringtab[strtab_index]
859 except KeyError:
860 strtab = self._find_stringtab(strtab_index)
861 return strtab.get(strtab_offset)
863 def find_section(self, shndx: Shn) -> Shdr:
864 """Returns the section header for the indexed section.
866 The section name is not resolved.
868 try:
869 return self._section[shndx]
870 except KeyError:
871 pass
872 if shndx in Shn:
873 raise ValueError('Reserved section index {}'.format(shndx))
874 idx = shndx.value
875 if idx < 0 or idx > self._shdr_num:
876 raise ValueError('Section index {} out of range [0, {})'.format(
877 idx, self._shdr_num))
878 shdr = self.read(
879 self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
880 self._section[shndx] = shdr
881 return shdr
883 def _find_stringtab(self, sh_link: int) -> StringTable:
884 if sh_link in self._stringtab:
885 return self._stringtab
886 if sh_link < 0 or sh_link >= self._shdr_num:
887 raise ValueError('Section index {} out of range [0, {})'.format(
888 sh_link, self._shdr_num))
889 shdr = self.read(
890 self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
891 if shdr.sh_type != Sht.SHT_STRTAB:
892 raise ValueError(
893 'Section {} is not a string table: {}'.format(
894 sh_link, shdr.sh_type))
895 strtab = StringTable(
896 self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
897 # This could retrain essentially arbitrary amounts of data,
898 # but caching string tables seems important for performance.
899 self._stringtab[sh_link] = strtab
900 return strtab
902 def elf_hash(s):
903 """Computes the ELF hash of the string."""
904 acc = 0
905 for ch in s:
906 if type(ch) is not int:
907 ch = ord(ch)
908 acc = ((acc << 4) + ch) & 0xffffffff
909 top = acc & 0xf0000000
910 acc = (acc ^ (top >> 24)) & ~top
911 return acc
913 def gnu_hash(s):
914 """Computes the GNU hash of the string."""
915 h = 5381
916 for ch in s:
917 if type(ch) is not int:
918 ch = ord(ch)
919 h = (h * 33 + ch) & 0xffffffff
920 return h
922 __all__ = [name for name in dir() if name[0].isupper()]