AArch64: Improve strrchr
[glibc.git] / scripts / glibcelf.py
blob6f48eee1297ac9b05f0b12b04b7c47d9fba28b4e
1 #!/usr/bin/python3
2 # ELF support functionality for Python.
3 # Copyright (C) 2022-2023 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
20 """Basic ELF parser.
22 Use Image.readfile(path) to read an ELF file into memory and begin
23 parsing it.
25 """
27 import collections
28 import functools
29 import os
30 import struct
32 import glibcpp
34 class _MetaNamedValue(type):
35 """Used to set up _NamedValue subclasses."""
37 @classmethod
38 def __prepare__(metacls, cls, bases, **kwds):
39 # Indicates an int-based class. Needed for types like Shn.
40 int_based = False
41 for base in bases:
42 if issubclass(base, int):
43 int_based = int
44 break
45 return dict(by_value={},
46 by_name={},
47 prefix=None,
48 _int_based=int_based)
50 def __contains__(self, other):
51 return other in self.by_value
53 class _NamedValue(metaclass=_MetaNamedValue):
54 """Typed, named integer constants.
56 Constants have the following instance attributes:
58 name: The full name of the constant (e.g., "PT_NULL").
59 short_name: The name with of the constant without the prefix ("NULL").
60 value: The integer value of the constant.
62 The following class attributes are available:
64 by_value: A dict mapping integers to constants.
65 by_name: A dict mapping strings to constants.
66 prefix: A string that is removed from the start of short names, or None.
68 """
70 def __new__(cls, arg0, arg1=None):
71 """Instance creation.
73 For the one-argument form, the argument must be a string, an
74 int, or an instance of this class. Strings are looked up via
75 by_name. Values are looked up via by_value; if value lookup
76 fails, a new unnamed instance is returned. Instances of this
77 class a re returned as-is.
79 The two-argument form expects the name (a string) and the
80 value (an integer). A new instance is created in this case.
81 The instance is not registered in the by_value/by_name
82 dictionaries (but the caller can do that).
84 """
86 typ0 = type(arg0)
87 if arg1 is None:
88 if isinstance(typ0, cls):
89 # Re-use the existing object.
90 return arg0
91 if typ0 is int:
92 by_value = cls.by_value
93 try:
94 return by_value[arg0]
95 except KeyError:
96 # Create a new object of the requested value.
97 if cls._int_based:
98 result = int.__new__(cls, arg0)
99 else:
100 result = object.__new__(cls)
101 result.value = arg0
102 result.name = None
103 return result
104 if typ0 is str:
105 by_name = cls.by_name
106 try:
107 return by_name[arg0]
108 except KeyError:
109 raise ValueError('unknown {} constant: {!r}'.format(
110 cls.__name__, arg0))
111 else:
112 # Types for the two-argument form are rigid.
113 if typ0 is not str and typ0 is not None:
114 raise ValueError('type {} of name {!r} should be str'.format(
115 typ0.__name__, arg0))
116 if type(arg1) is not int:
117 raise ValueError('type {} of value {!r} should be int'.format(
118 type(arg1).__name__, arg1))
119 # Create a new named constants.
120 if cls._int_based:
121 result = int.__new__(cls, arg1)
122 else:
123 result = object.__new__(cls)
124 result.value = arg1
125 result.name = arg0
126 # Set up the short_name attribute.
127 prefix = cls.prefix
128 if prefix and arg0.startswith(prefix):
129 result.short_name = arg0[len(prefix):]
130 else:
131 result.short_name = arg0
132 return result
134 def __str__(self):
135 name = self.name
136 if name:
137 return name
138 else:
139 return str(self.value)
141 def __repr__(self):
142 name = self.name
143 if name:
144 return name
145 else:
146 return '{}({})'.format(self.__class__.__name__, self.value)
148 def __setattr__(self, name, value):
149 # Prevent modification of the critical attributes once they
150 # have been set.
151 if name in ('name', 'value', 'short_name') and hasattr(self, name):
152 raise AttributeError('can\'t set attribute {}'.format(name))
153 object.__setattr__(self, name, value)
155 @functools.total_ordering
156 class _TypedConstant(_NamedValue):
157 """Base class for integer-valued optionally named constants.
159 This type is not an integer type.
163 def __eq__(self, other):
164 return isinstance(other, self.__class__) and self.value == other.value
166 def __lt__(self, other):
167 return isinstance(other, self.__class__) and self.value <= other.value
169 def __hash__(self):
170 return hash(self.value)
172 class _IntConstant(_NamedValue, int):
173 """Base class for integer-like optionally named constants.
175 Instances compare equal to the integer of the same value, and can
176 be used in integer arithmetic.
180 pass
182 class _FlagConstant(_TypedConstant, int):
183 pass
185 def _parse_elf_h():
186 """Read ../elf/elf.h and return a dict with the constants in it."""
188 path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
189 '..', 'elf', 'elf.h')
190 class TokenizerReporter:
191 """Report tokenizer errors to standard output."""
193 def __init__(self):
194 self.errors = 0
196 def error(self, token, message):
197 self.errors += 1
198 print('{}:{}:{}: error: {}'.format(
199 path, token.line, token.column, message))
201 reporter = TokenizerReporter()
202 with open(path) as inp:
203 tokens = glibcpp.tokenize_c(inp.read(), reporter)
204 if reporter.errors:
205 raise IOError('parse error in elf.h')
207 class MacroReporter:
208 """Report macro errors to standard output."""
210 def __init__(self):
211 self.errors = 0
213 def error(self, line, message):
214 self.errors += 1
215 print('{}:{}: error: {}'.format(path, line, message))
217 def note(self, line, message):
218 print('{}:{}: note: {}'.format(path, line, message))
220 reporter = MacroReporter()
221 result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
222 if reporter.errors:
223 raise IOError('parse error in elf.h')
225 return result
226 _elf_h = _parse_elf_h()
227 del _parse_elf_h
228 _elf_h_processed = set()
230 def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
231 prefix = prefix or cls.prefix
232 if not prefix:
233 raise ValueError('missing prefix for {}'.format(cls.__name__))
234 by_value = cls.by_value
235 by_name = cls.by_name
236 processed = _elf_h_processed
238 skip = set(skip)
239 skip.add(prefix + 'NUM')
240 if ranges:
241 skip.add(prefix + 'LOOS')
242 skip.add(prefix + 'HIOS')
243 skip.add(prefix + 'LOPROC')
244 skip.add(prefix + 'HIPROC')
245 cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
246 cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
248 # Inherit the prefix from the parent if not set.
249 if parent and cls.prefix is None and parent.prefix is not None:
250 cls.prefix = parent.prefix
252 processed_len_start = len(processed)
253 for name, value in _elf_h.items():
254 if name in skip or name in processed:
255 continue
256 if name.startswith(prefix):
257 processed.add(name)
258 if value in by_value:
259 raise ValueError('duplicate value {}: {}, {}'.format(
260 value, name, by_value[value]))
261 obj = cls(name, value)
262 by_value[value] = obj
263 by_name[name] = obj
264 setattr(cls, name, obj)
265 if parent:
266 # Make the symbolic name available through the parent as well.
267 parent.by_name[name] = obj
268 setattr(parent, name, obj)
270 if len(processed) == processed_len_start:
271 raise ValueError('nothing matched prefix {!r}'.format(prefix))
273 class ElfClass(_TypedConstant):
274 """ELF word size. Type of EI_CLASS values."""
275 _register_elf_h(ElfClass, prefix='ELFCLASS')
277 class ElfData(_TypedConstant):
278 """ELF endianess. Type of EI_DATA values."""
279 _register_elf_h(ElfData, prefix='ELFDATA')
281 class Machine(_TypedConstant):
282 """ELF machine type. Type of values in Ehdr.e_machine field."""
283 prefix = 'EM_'
284 _register_elf_h(Machine, skip=('EM_ARC_A5',))
286 class Et(_TypedConstant):
287 """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
288 prefix = 'ET_'
289 _register_elf_h(Et, ranges=True)
291 class Shn(_IntConstant):
292 """ELF reserved section indices."""
293 prefix = 'SHN_'
294 class ShnMIPS(Shn):
295 """Supplemental SHN_* constants for EM_MIPS."""
296 class ShnPARISC(Shn):
297 """Supplemental SHN_* constants for EM_PARISC."""
298 _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
299 _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
300 _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
302 class Sht(_TypedConstant):
303 """ELF section types. Type of SHT_* values."""
304 prefix = 'SHT_'
305 class ShtALPHA(Sht):
306 """Supplemental SHT_* constants for EM_ALPHA."""
307 class ShtARC(Sht):
308 """Supplemental SHT_* constants for EM_ARC."""
309 class ShtARM(Sht):
310 """Supplemental SHT_* constants for EM_ARM."""
311 class ShtCSKY(Sht):
312 """Supplemental SHT_* constants for EM_CSKY."""
313 class ShtIA_64(Sht):
314 """Supplemental SHT_* constants for EM_IA_64."""
315 class ShtMIPS(Sht):
316 """Supplemental SHT_* constants for EM_MIPS."""
317 class ShtPARISC(Sht):
318 """Supplemental SHT_* constants for EM_PARISC."""
319 class ShtRISCV(Sht):
320 """Supplemental SHT_* constants for EM_RISCV."""
321 _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
322 _register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht)
323 _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
324 _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
325 _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
326 _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
327 _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
328 _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
329 _register_elf_h(Sht, ranges=True,
330 skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
332 class Pf(_FlagConstant):
333 """Program header flags. Type of Phdr.p_flags values."""
334 prefix = 'PF_'
335 class PfARM(Pf):
336 """Supplemental PF_* flags for EM_ARM."""
337 class PfHP(Pf):
338 """Supplemental PF_* flags for HP-UX."""
339 class PfIA_64(Pf):
340 """Supplemental PF_* flags for EM_IA_64."""
341 class PfMIPS(Pf):
342 """Supplemental PF_* flags for EM_MIPS."""
343 class PfPARISC(Pf):
344 """Supplemental PF_* flags for EM_PARISC."""
345 _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
346 _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
347 _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
348 _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
349 _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
350 _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
352 class Shf(_FlagConstant):
353 """Section flags. Type of Shdr.sh_type values."""
354 prefix = 'SHF_'
355 class ShfALPHA(Shf):
356 """Supplemental SHF_* constants for EM_ALPHA."""
357 class ShfARM(Shf):
358 """Supplemental SHF_* constants for EM_ARM."""
359 class ShfIA_64(Shf):
360 """Supplemental SHF_* constants for EM_IA_64."""
361 class ShfMIPS(Shf):
362 """Supplemental SHF_* constants for EM_MIPS."""
363 class ShfPARISC(Shf):
364 """Supplemental SHF_* constants for EM_PARISC."""
365 _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
366 _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
367 _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
368 _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
369 _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
370 _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
372 class Stb(_TypedConstant):
373 """ELF symbol binding type."""
374 prefix = 'STB_'
375 _register_elf_h(Stb, ranges=True)
377 class Stt(_TypedConstant):
378 """ELF symbol type."""
379 prefix = 'STT_'
380 class SttARM(Sht):
381 """Supplemental STT_* constants for EM_ARM."""
382 class SttPARISC(Sht):
383 """Supplemental STT_* constants for EM_PARISC."""
384 class SttSPARC(Sht):
385 """Supplemental STT_* constants for EM_SPARC."""
386 STT_SPARC_REGISTER = 13
387 class SttX86_64(Sht):
388 """Supplemental STT_* constants for EM_X86_64."""
389 _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
390 _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
391 _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
392 _register_elf_h(Stt, ranges=True)
395 class Pt(_TypedConstant):
396 """ELF program header types. Type of Phdr.p_type."""
397 prefix = 'PT_'
398 class PtAARCH64(Pt):
399 """Supplemental PT_* constants for EM_AARCH64."""
400 class PtARM(Pt):
401 """Supplemental PT_* constants for EM_ARM."""
402 class PtHP(Pt):
403 """Supplemental PT_* constants for HP-U."""
404 class PtIA_64(Pt):
405 """Supplemental PT_* constants for EM_IA_64."""
406 class PtMIPS(Pt):
407 """Supplemental PT_* constants for EM_MIPS."""
408 class PtPARISC(Pt):
409 """Supplemental PT_* constants for EM_PARISC."""
410 class PtRISCV(Pt):
411 """Supplemental PT_* constants for EM_RISCV."""
412 _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
413 _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
414 _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
415 _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
416 _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
417 _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
418 _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
419 _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
421 class Dt(_TypedConstant):
422 """ELF dynamic segment tags. Type of Dyn.d_val."""
423 prefix = 'DT_'
424 class DtAARCH64(Dt):
425 """Supplemental DT_* constants for EM_AARCH64."""
426 class DtALPHA(Dt):
427 """Supplemental DT_* constants for EM_ALPHA."""
428 class DtALTERA_NIOS2(Dt):
429 """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
430 class DtIA_64(Dt):
431 """Supplemental DT_* constants for EM_IA_64."""
432 class DtMIPS(Dt):
433 """Supplemental DT_* constants for EM_MIPS."""
434 class DtPPC(Dt):
435 """Supplemental DT_* constants for EM_PPC."""
436 class DtPPC64(Dt):
437 """Supplemental DT_* constants for EM_PPC64."""
438 class DtRISCV(Dt):
439 """Supplemental DT_* constants for EM_RISCV."""
440 class DtSPARC(Dt):
441 """Supplemental DT_* constants for EM_SPARC."""
442 _dt_skip = '''
443 DT_ENCODING DT_PROCNUM
444 DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
445 DT_VALRNGLO DT_VALRNGHI DT_VALNUM
446 DT_VERSIONTAGNUM DT_EXTRANUM
447 DT_AARCH64_NUM
448 DT_ALPHA_NUM
449 DT_IA_64_NUM
450 DT_MIPS_NUM
451 DT_PPC_NUM
452 DT_PPC64_NUM
453 DT_SPARC_NUM
454 '''.strip().split()
455 _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
456 _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
457 _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
458 _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
459 _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
460 _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
461 _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
462 _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
463 _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
464 _register_elf_h(Dt, skip=_dt_skip, ranges=True)
465 del _dt_skip
467 # Constant extraction is complete.
468 del _register_elf_h
469 del _elf_h
471 class StInfo:
472 """ELF symbol binding and type. Type of the Sym.st_info field."""
473 def __init__(self, arg0, arg1=None):
474 if isinstance(arg0, int) and arg1 is None:
475 self.bind = Stb(arg0 >> 4)
476 self.type = Stt(arg0 & 15)
477 else:
478 self.bind = Stb(arg0)
479 self.type = Stt(arg1)
481 def value(self):
482 """Returns the raw value for the bind/type combination."""
483 return (self.bind.value() << 4) | (self.type.value())
485 # Type in an ELF file. Used for deserialization.
486 _Layout = collections.namedtuple('_Layout', 'unpack size')
488 def _define_layouts(baseclass: type, layout32: str, layout64: str,
489 types=None, fields32=None):
490 """Assign variants dict to baseclass.
492 The variants dict is indexed by (ElfClass, ElfData) pairs, and its
493 values are _Layout instances.
496 struct32 = struct.Struct(layout32)
497 struct64 = struct.Struct(layout64)
499 # Check that the struct formats yield the right number of components.
500 for s in (struct32, struct64):
501 example = s.unpack(b' ' * s.size)
502 if len(example) != len(baseclass._fields):
503 raise ValueError('{!r} yields wrong field count: {} != {}'.format(
504 s.format, len(example), len(baseclass._fields)))
506 # Check that field names in types are correct.
507 if types is None:
508 types = ()
509 for n in types:
510 if n not in baseclass._fields:
511 raise ValueError('{} does not have field {!r}'.format(
512 baseclass.__name__, n))
514 if fields32 is not None \
515 and set(fields32) != set(baseclass._fields):
516 raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
517 fields32, baseclass._fields))
519 def unique_name(name, used_names = (set((baseclass.__name__,))
520 | set(baseclass._fields)
521 | {n.__name__
522 for n in (types or {}).values()})):
523 """Find a name that is not used for a class or field name."""
524 candidate = name
525 n = 0
526 while candidate in used_names:
527 n += 1
528 candidate = '{}{}'.format(name, n)
529 used_names.add(candidate)
530 return candidate
532 blob_name = unique_name('blob')
533 struct_unpack_name = unique_name('struct_unpack')
534 comps_name = unique_name('comps')
536 layouts = {}
537 for (bits, elfclass, layout, fields) in (
538 (32, ElfClass.ELFCLASS32, layout32, fields32),
539 (64, ElfClass.ELFCLASS64, layout64, None),
541 for (elfdata, structprefix, funcsuffix) in (
542 (ElfData.ELFDATA2LSB, '<', 'LE'),
543 (ElfData.ELFDATA2MSB, '>', 'BE'),
545 env = {
546 baseclass.__name__: baseclass,
547 struct_unpack_name: struct.unpack,
550 # Add the type converters.
551 if types:
552 for cls in types.values():
553 env[cls.__name__] = cls
555 funcname = ''.join(
556 ('unpack_', baseclass.__name__, str(bits), funcsuffix))
558 code = '''
559 def {funcname}({blob_name}):
560 '''.format(funcname=funcname, blob_name=blob_name)
562 indent = ' ' * 4
563 unpack_call = '{}({!r}, {})'.format(
564 struct_unpack_name, structprefix + layout, blob_name)
565 field_names = ', '.join(baseclass._fields)
566 if types is None and fields is None:
567 code += '{}return {}({})\n'.format(
568 indent, baseclass.__name__, unpack_call)
569 else:
570 # Destructuring tuple assignment.
571 if fields is None:
572 code += '{}{} = {}\n'.format(
573 indent, field_names, unpack_call)
574 else:
575 # Use custom field order.
576 code += '{}{} = {}\n'.format(
577 indent, ', '.join(fields), unpack_call)
579 # Perform the type conversions.
580 for n in baseclass._fields:
581 if n in types:
582 code += '{}{} = {}({})\n'.format(
583 indent, n, types[n].__name__, n)
584 # Create the named tuple.
585 code += '{}return {}({})\n'.format(
586 indent, baseclass.__name__, field_names)
588 exec(code, env)
589 layouts[(elfclass, elfdata)] = _Layout(
590 env[funcname], struct.calcsize(layout))
591 baseclass.layouts = layouts
594 # Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
595 class Ident(collections.namedtuple('Ident',
596 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
598 def __new__(cls, *args):
599 """Construct an object from a blob or its constituent fields."""
600 if len(args) == 1:
601 return cls.unpack(args[0])
602 return cls.__base__.__new__(cls, *args)
604 @staticmethod
605 def unpack(blob: memoryview) -> 'Ident':
606 """Parse raws data into a tuple."""
607 ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
608 ei_pad = struct.unpack('4s5B7s', blob)
609 return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
610 ei_version, ei_osabi, ei_abiversion, ei_pad)
611 size = 16
613 # Corresponds to Elf32_Ehdr and Elf64_Ehdr.
614 Ehdr = collections.namedtuple('Ehdr',
615 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
616 + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
617 _define_layouts(Ehdr,
618 layout32='16s2H5I6H',
619 layout64='16s2HI3QI6H',
620 types=dict(e_ident=Ident,
621 e_machine=Machine,
622 e_type=Et,
623 e_shstrndx=Shn))
625 # Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
626 Phdr = collections.namedtuple('Phdr',
627 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
628 _define_layouts(Phdr,
629 layout32='8I',
630 fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
631 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
632 layout64='2I6Q',
633 types=dict(p_type=Pt, p_flags=Pf))
636 # Corresponds to Elf32_Shdr and Elf64_Shdr.
637 class Shdr(collections.namedtuple('Shdr',
638 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
639 + ' sh_addralign sh_entsize')):
640 def resolve(self, strtab: 'StringTable') -> 'Shdr':
641 """Resolve sh_name using a string table."""
642 return self.__class__(strtab.get(self[0]), *self[1:])
643 _define_layouts(Shdr,
644 layout32='10I',
645 layout64='2I4Q2I2Q',
646 types=dict(sh_type=Sht,
647 sh_flags=Shf,
648 sh_link=Shn))
650 # Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
651 # d_un union is skipped, and d_ptr is missing (its representation in
652 # Python would be identical to d_val).
653 Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
654 _define_layouts(Dyn,
655 layout32='2i',
656 layout64='2q',
657 types=dict(d_tag=Dt))
659 # Corresponds to Elf32_Sym and Elf64_Sym.
660 class Sym(collections.namedtuple('Sym',
661 'st_name st_info st_other st_shndx st_value st_size')):
662 def resolve(self, strtab: 'StringTable') -> 'Sym':
663 """Resolve st_name using a string table."""
664 return self.__class__(strtab.get(self[0]), *self[1:])
665 _define_layouts(Sym,
666 layout32='3I2BH',
667 layout64='I2BH2Q',
668 fields32=('st_name', 'st_value', 'st_size', 'st_info',
669 'st_other', 'st_shndx'),
670 types=dict(st_shndx=Shn,
671 st_info=StInfo))
673 # Corresponds to Elf32_Rel and Elf64_Rel.
674 Rel = collections.namedtuple('Rel', 'r_offset r_info')
675 _define_layouts(Rel,
676 layout32='2I',
677 layout64='2Q')
679 # Corresponds to Elf32_Rel and Elf64_Rel.
680 Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
681 _define_layouts(Rela,
682 layout32='3I',
683 layout64='3Q')
685 class StringTable:
686 """ELF string table."""
687 def __init__(self, blob):
688 """Create a new string table backed by the data in the blob.
690 blob: a memoryview-like object
693 self.blob = blob
695 def get(self, index) -> bytes:
696 """Returns the null-terminated byte string at the index."""
697 blob = self.blob
698 endindex = index
699 while True:
700 if blob[endindex] == 0:
701 return bytes(blob[index:endindex])
702 endindex += 1
704 class Image:
705 """ELF image parser."""
706 def __init__(self, image):
707 """Create an ELF image from binary image data.
709 image: a memoryview-like object that supports efficient range
710 subscripting.
713 self.image = image
714 ident = self.read(Ident, 0)
715 classdata = (ident.ei_class, ident.ei_data)
716 # Set self.Ehdr etc. to the subtypes with the right parsers.
717 for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
718 setattr(self, typ.__name__, typ.layouts.get(classdata, None))
720 if self.Ehdr is not None:
721 self.ehdr = self.read(self.Ehdr, 0)
722 self._shdr_num = self._compute_shdr_num()
723 else:
724 self.ehdr = None
725 self._shdr_num = 0
727 self._section = {}
728 self._stringtab = {}
730 if self._shdr_num > 0:
731 self._shdr_strtab = self._find_shdr_strtab()
732 else:
733 self._shdr_strtab = None
735 @staticmethod
736 def readfile(path: str) -> 'Image':
737 """Reads the ELF file at the specified path."""
738 with open(path, 'rb') as inp:
739 return Image(memoryview(inp.read()))
741 def _compute_shdr_num(self) -> int:
742 """Computes the actual number of section headers."""
743 shnum = self.ehdr.e_shnum
744 if shnum == 0:
745 if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
746 # No section headers.
747 return 0
748 # Otherwise the extension mechanism is used (which may be
749 # needed because e_shnum is just 16 bits).
750 return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
751 return shnum
753 def _find_shdr_strtab(self) -> StringTable:
754 """Finds the section header string table (maybe via extensions)."""
755 shstrndx = self.ehdr.e_shstrndx
756 if shstrndx == Shn.SHN_XINDEX:
757 shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
758 return self._find_stringtab(shstrndx)
760 def read(self, typ: type, offset:int ):
761 """Reads an object at a specific offset.
763 The type must have been enhanced using _define_variants.
766 return typ.unpack(self.image[offset: offset + typ.size])
768 def phdrs(self) -> Phdr:
769 """Generator iterating over the program headers."""
770 if self.ehdr is None:
771 return
772 size = self.ehdr.e_phentsize
773 if size != self.Phdr.size:
774 raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
775 .format(size, self.Phdr.size))
777 offset = self.ehdr.e_phoff
778 for _ in range(self.ehdr.e_phnum):
779 yield self.read(self.Phdr, offset)
780 offset += size
782 def shdrs(self, resolve: bool=True) -> Shdr:
783 """Generator iterating over the section headers.
785 If resolve, section names are automatically translated
786 using the section header string table.
789 if self._shdr_num == 0:
790 return
792 size = self.ehdr.e_shentsize
793 if size != self.Shdr.size:
794 raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
795 .format(size, self.Shdr.size))
797 offset = self.ehdr.e_shoff
798 for _ in range(self._shdr_num):
799 shdr = self.read(self.Shdr, offset)
800 if resolve:
801 shdr = shdr.resolve(self._shdr_strtab)
802 yield shdr
803 offset += size
805 def dynamic(self) -> Dyn:
806 """Generator iterating over the dynamic segment."""
807 for phdr in self.phdrs():
808 if phdr.p_type == Pt.PT_DYNAMIC:
809 # Pick the first dynamic segment, like the loader.
810 if phdr.p_filesz == 0:
811 # Probably separated debuginfo.
812 return
813 offset = phdr.p_offset
814 end = offset + phdr.p_memsz
815 size = self.Dyn.size
816 while True:
817 next_offset = offset + size
818 if next_offset > end:
819 raise ValueError(
820 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
821 phdr.p_memsz, size))
822 yield self.read(self.Dyn, offset)
823 if next_offset == end:
824 return
825 offset = next_offset
827 def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
828 """A generator iterating over a symbol table.
830 If resolve, symbol names are automatically translated using
831 the string table for the symbol table.
834 assert shdr.sh_type == Sht.SHT_SYMTAB
835 size = shdr.sh_entsize
836 if size != self.Sym.size:
837 raise ValueError('Invalid symbol table entry size {}'.format(size))
838 offset = shdr.sh_offset
839 end = shdr.sh_offset + shdr.sh_size
840 if resolve:
841 strtab = self._find_stringtab(shdr.sh_link)
842 while offset < end:
843 sym = self.read(self.Sym, offset)
844 if resolve:
845 sym = sym.resolve(strtab)
846 yield sym
847 offset += size
848 if offset != end:
849 raise ValueError('Symbol table is not a multiple of entry size')
851 def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
852 """Looks up a string in a string table identified by its link index."""
853 try:
854 strtab = self._stringtab[strtab_index]
855 except KeyError:
856 strtab = self._find_stringtab(strtab_index)
857 return strtab.get(strtab_offset)
859 def find_section(self, shndx: Shn) -> Shdr:
860 """Returns the section header for the indexed section.
862 The section name is not resolved.
864 try:
865 return self._section[shndx]
866 except KeyError:
867 pass
868 if shndx in Shn:
869 raise ValueError('Reserved section index {}'.format(shndx))
870 idx = shndx.value
871 if idx < 0 or idx > self._shdr_num:
872 raise ValueError('Section index {} out of range [0, {})'.format(
873 idx, self._shdr_num))
874 shdr = self.read(
875 self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
876 self._section[shndx] = shdr
877 return shdr
879 def _find_stringtab(self, sh_link: int) -> StringTable:
880 if sh_link in self._stringtab:
881 return self._stringtab
882 if sh_link < 0 or sh_link >= self._shdr_num:
883 raise ValueError('Section index {} out of range [0, {})'.format(
884 sh_link, self._shdr_num))
885 shdr = self.read(
886 self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
887 if shdr.sh_type != Sht.SHT_STRTAB:
888 raise ValueError(
889 'Section {} is not a string table: {}'.format(
890 sh_link, shdr.sh_type))
891 strtab = StringTable(
892 self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
893 # This could retrain essentially arbitrary amounts of data,
894 # but caching string tables seems important for performance.
895 self._stringtab[sh_link] = strtab
896 return strtab
898 def elf_hash(s):
899 """Computes the ELF hash of the string."""
900 acc = 0
901 for ch in s:
902 if type(ch) is not int:
903 ch = ord(ch)
904 acc = ((acc << 4) + ch) & 0xffffffff
905 top = acc & 0xf0000000
906 acc = (acc ^ (top >> 24)) & ~top
907 return acc
909 def gnu_hash(s):
910 """Computes the GNU hash of the string."""
911 h = 5381
912 for ch in s:
913 if type(ch) is not int:
914 ch = ord(ch)
915 h = (h * 33 + ch) & 0xffffffff
916 return h
918 __all__ = [name for name in dir() if name[0].isupper()]