Use '%z' instead of '%Z' on printf functions
[glibc.git] / scripts / glibcelf.py
blob420cb21943b28bba2681e1e125649cfa4e4474d0
1 #!/usr/bin/python3
2 # ELF support functionality for Python.
3 # Copyright (C) 2022 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
20 """Basic ELF parser.
22 Use Image.readfile(path) to read an ELF file into memory and begin
23 parsing it.
25 """
27 import collections
28 import functools
29 import os
30 import struct
32 import glibcpp
34 class _MetaNamedValue(type):
35 """Used to set up _NamedValue subclasses."""
37 @classmethod
38 def __prepare__(metacls, cls, bases, **kwds):
39 # Indicates an int-based class. Needed for types like Shn.
40 int_based = False
41 for base in bases:
42 if issubclass(base, int):
43 int_based = int
44 break
45 return dict(by_value={},
46 by_name={},
47 prefix=None,
48 _int_based=int_based)
50 def __contains__(self, other):
51 return other in self.by_value
53 class _NamedValue(metaclass=_MetaNamedValue):
54 """Typed, named integer constants.
56 Constants have the following instance attributes:
58 name: The full name of the constant (e.g., "PT_NULL").
59 short_name: The name with of the constant without the prefix ("NULL").
60 value: The integer value of the constant.
62 The following class attributes are available:
64 by_value: A dict mapping integers to constants.
65 by_name: A dict mapping strings to constants.
66 prefix: A string that is removed from the start of short names, or None.
68 """
70 def __new__(cls, arg0, arg1=None):
71 """Instance creation.
73 For the one-argument form, the argument must be a string, an
74 int, or an instance of this class. Strings are looked up via
75 by_name. Values are looked up via by_value; if value lookup
76 fails, a new unnamed instance is returned. Instances of this
77 class a re returned as-is.
79 The two-argument form expects the name (a string) and the
80 value (an integer). A new instance is created in this case.
81 The instance is not registered in the by_value/by_name
82 dictionaries (but the caller can do that).
84 """
86 typ0 = type(arg0)
87 if arg1 is None:
88 if isinstance(typ0, cls):
89 # Re-use the existing object.
90 return arg0
91 if typ0 is int:
92 by_value = cls.by_value
93 try:
94 return by_value[arg0]
95 except KeyError:
96 # Create a new object of the requested value.
97 if cls._int_based:
98 result = int.__new__(cls, arg0)
99 else:
100 result = object.__new__(cls)
101 result.value = arg0
102 result.name = None
103 return result
104 if typ0 is str:
105 by_name = cls.by_name
106 try:
107 return by_name[arg0]
108 except KeyError:
109 raise ValueError('unknown {} constant: {!r}'.format(
110 cls.__name__, arg0))
111 else:
112 # Types for the two-argument form are rigid.
113 if typ0 is not str and typ0 is not None:
114 raise ValueError('type {} of name {!r} should be str'.format(
115 typ0.__name__, arg0))
116 if type(arg1) is not int:
117 raise ValueError('type {} of value {!r} should be int'.format(
118 type(arg1).__name__, arg1))
119 # Create a new named constants.
120 if cls._int_based:
121 result = int.__new__(cls, arg1)
122 else:
123 result = object.__new__(cls)
124 result.value = arg1
125 result.name = arg0
126 # Set up the short_name attribute.
127 prefix = cls.prefix
128 if prefix and arg0.startswith(prefix):
129 result.short_name = arg0[len(prefix):]
130 else:
131 result.short_name = arg0
132 return result
134 def __str__(self):
135 name = self.name
136 if name:
137 return name
138 else:
139 return str(self.value)
141 def __repr__(self):
142 name = self.name
143 if name:
144 return name
145 else:
146 return '{}({})'.format(self.__class__.__name__, self.value)
148 def __setattr__(self, name, value):
149 # Prevent modification of the critical attributes once they
150 # have been set.
151 if name in ('name', 'value', 'short_name') and hasattr(self, name):
152 raise AttributeError('can\'t set attribute {}'.format(name))
153 object.__setattr__(self, name, value)
155 @functools.total_ordering
156 class _TypedConstant(_NamedValue):
157 """Base class for integer-valued optionally named constants.
159 This type is not an integer type.
163 def __eq__(self, other):
164 return isinstance(other, self.__class__) and self.value == other.value
166 def __lt__(self, other):
167 return isinstance(other, self.__class__) and self.value <= other.value
169 def __hash__(self):
170 return hash(self.value)
172 class _IntConstant(_NamedValue, int):
173 """Base class for integer-like optionally named constants.
175 Instances compare equal to the integer of the same value, and can
176 be used in integer arithmetic.
180 pass
182 class _FlagConstant(_TypedConstant, int):
183 pass
185 def _parse_elf_h():
186 """Read ../elf/elf.h and return a dict with the constants in it."""
188 path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
189 '..', 'elf', 'elf.h')
190 class TokenizerReporter:
191 """Report tokenizer errors to standard output."""
193 def __init__(self):
194 self.errors = 0
196 def error(self, token, message):
197 self.errors += 1
198 print('{}:{}:{}: error: {}'.format(
199 path, token.line, token.column, message))
201 reporter = TokenizerReporter()
202 with open(path) as inp:
203 tokens = glibcpp.tokenize_c(inp.read(), reporter)
204 if reporter.errors:
205 raise IOError('parse error in elf.h')
207 class MacroReporter:
208 """Report macro errors to standard output."""
210 def __init__(self):
211 self.errors = 0
213 def error(self, line, message):
214 errors += 1
215 print('{}:{}: error: {}'.format(path, line, message))
217 def note(self, line, message):
218 print('{}:{}: note: {}'.format(path, line, message))
220 reporter = MacroReporter()
221 result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
222 if reporter.errors:
223 raise IOError('parse error in elf.h')
225 return result
226 _elf_h = _parse_elf_h()
227 del _parse_elf_h
228 _elf_h_processed = set()
230 def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
231 prefix = prefix or cls.prefix
232 if not prefix:
233 raise ValueError('missing prefix for {}'.format(cls.__name__))
234 by_value = cls.by_value
235 by_name = cls.by_name
236 processed = _elf_h_processed
238 skip = set(skip)
239 skip.add(prefix + 'NUM')
240 if ranges:
241 skip.add(prefix + 'LOOS')
242 skip.add(prefix + 'HIOS')
243 skip.add(prefix + 'LOPROC')
244 skip.add(prefix + 'HIPROC')
245 cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
246 cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
248 # Inherit the prefix from the parent if not set.
249 if parent and cls.prefix is None and parent.prefix is not None:
250 cls.prefix = parent.prefix
252 processed_len_start = len(processed)
253 for name, value in _elf_h.items():
254 if name in skip or name in processed:
255 continue
256 if name.startswith(prefix):
257 processed.add(name)
258 if value in by_value:
259 raise ValueError('duplicate value {}: {}, {}'.format(
260 value, name, by_value[value]))
261 obj = cls(name, value)
262 by_value[value] = obj
263 by_name[name] = obj
264 setattr(cls, name, obj)
265 if parent:
266 # Make the symbolic name available through the parent as well.
267 parent.by_name[name] = obj
268 setattr(parent, name, obj)
270 if len(processed) == processed_len_start:
271 raise ValueError('nothing matched prefix {!r}'.format(prefix))
273 class ElfClass(_TypedConstant):
274 """ELF word size. Type of EI_CLASS values."""
275 _register_elf_h(ElfClass, prefix='ELFCLASS')
277 class ElfData(_TypedConstant):
278 """ELF endianess. Type of EI_DATA values."""
279 _register_elf_h(ElfData, prefix='ELFDATA')
281 class Machine(_TypedConstant):
282 """ELF machine type. Type of values in Ehdr.e_machine field."""
283 prefix = 'EM_'
284 _register_elf_h(Machine, skip=('EM_ARC_A5',))
286 class Et(_TypedConstant):
287 """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
288 prefix = 'ET_'
289 _register_elf_h(Et, ranges=True)
291 class Shn(_IntConstant):
292 """ELF reserved section indices."""
293 prefix = 'SHN_'
294 class ShnMIPS(Shn):
295 """Supplemental SHN_* constants for EM_MIPS."""
296 class ShnPARISC(Shn):
297 """Supplemental SHN_* constants for EM_PARISC."""
298 _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
299 _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
300 _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
302 class Sht(_TypedConstant):
303 """ELF section types. Type of SHT_* values."""
304 prefix = 'SHT_'
305 class ShtALPHA(Sht):
306 """Supplemental SHT_* constants for EM_ALPHA."""
307 class ShtARM(Sht):
308 """Supplemental SHT_* constants for EM_ARM."""
309 class ShtCSKY(Sht):
310 """Supplemental SHT_* constants for EM_CSKY."""
311 class ShtIA_64(Sht):
312 """Supplemental SHT_* constants for EM_IA_64."""
313 class ShtMIPS(Sht):
314 """Supplemental SHT_* constants for EM_MIPS."""
315 class ShtPARISC(Sht):
316 """Supplemental SHT_* constants for EM_PARISC."""
317 class ShtRISCV(Sht):
318 """Supplemental SHT_* constants for EM_RISCV."""
319 _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
320 _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
321 _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
322 _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
323 _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
324 _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
325 _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
326 _register_elf_h(Sht, ranges=True,
327 skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
329 class Pf(_FlagConstant):
330 """Program header flags. Type of Phdr.p_flags values."""
331 prefix = 'PF_'
332 class PfARM(Pf):
333 """Supplemental PF_* flags for EM_ARM."""
334 class PfHP(Pf):
335 """Supplemental PF_* flags for HP-UX."""
336 class PfIA_64(Pf):
337 """Supplemental PF_* flags for EM_IA_64."""
338 class PfMIPS(Pf):
339 """Supplemental PF_* flags for EM_MIPS."""
340 class PfPARISC(Pf):
341 """Supplemental PF_* flags for EM_PARISC."""
342 _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
343 _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
344 _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
345 _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
346 _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
347 _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
349 class Shf(_FlagConstant):
350 """Section flags. Type of Shdr.sh_type values."""
351 prefix = 'SHF_'
352 class ShfALPHA(Shf):
353 """Supplemental SHF_* constants for EM_ALPHA."""
354 class ShfARM(Shf):
355 """Supplemental SHF_* constants for EM_ARM."""
356 class ShfIA_64(Shf):
357 """Supplemental SHF_* constants for EM_IA_64."""
358 class ShfMIPS(Shf):
359 """Supplemental SHF_* constants for EM_MIPS."""
360 class ShfPARISC(Shf):
361 """Supplemental SHF_* constants for EM_PARISC."""
362 _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
363 _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
364 _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
365 _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
366 _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
367 _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
369 class Stb(_TypedConstant):
370 """ELF symbol binding type."""
371 prefix = 'STB_'
372 _register_elf_h(Stb, ranges=True)
374 class Stt(_TypedConstant):
375 """ELF symbol type."""
376 prefix = 'STT_'
377 class SttARM(Sht):
378 """Supplemental STT_* constants for EM_ARM."""
379 class SttPARISC(Sht):
380 """Supplemental STT_* constants for EM_PARISC."""
381 class SttSPARC(Sht):
382 """Supplemental STT_* constants for EM_SPARC."""
383 STT_SPARC_REGISTER = 13
384 class SttX86_64(Sht):
385 """Supplemental STT_* constants for EM_X86_64."""
386 _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
387 _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
388 _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
389 _register_elf_h(Stt, ranges=True)
392 class Pt(_TypedConstant):
393 """ELF program header types. Type of Phdr.p_type."""
394 prefix = 'PT_'
395 class PtAARCH64(Pt):
396 """Supplemental PT_* constants for EM_AARCH64."""
397 class PtARM(Pt):
398 """Supplemental PT_* constants for EM_ARM."""
399 class PtHP(Pt):
400 """Supplemental PT_* constants for HP-U."""
401 class PtIA_64(Pt):
402 """Supplemental PT_* constants for EM_IA_64."""
403 class PtMIPS(Pt):
404 """Supplemental PT_* constants for EM_MIPS."""
405 class PtPARISC(Pt):
406 """Supplemental PT_* constants for EM_PARISC."""
407 class PtRISCV(Pt):
408 """Supplemental PT_* constants for EM_RISCV."""
409 _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
410 _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
411 _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
412 _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
413 _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
414 _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
415 _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
416 _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
418 class Dt(_TypedConstant):
419 """ELF dynamic segment tags. Type of Dyn.d_val."""
420 prefix = 'DT_'
421 class DtAARCH64(Dt):
422 """Supplemental DT_* constants for EM_AARCH64."""
423 class DtALPHA(Dt):
424 """Supplemental DT_* constants for EM_ALPHA."""
425 class DtALTERA_NIOS2(Dt):
426 """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
427 class DtIA_64(Dt):
428 """Supplemental DT_* constants for EM_IA_64."""
429 class DtMIPS(Dt):
430 """Supplemental DT_* constants for EM_MIPS."""
431 class DtPPC(Dt):
432 """Supplemental DT_* constants for EM_PPC."""
433 class DtPPC64(Dt):
434 """Supplemental DT_* constants for EM_PPC64."""
435 class DtRISCV(Dt):
436 """Supplemental DT_* constants for EM_RISCV."""
437 class DtSPARC(Dt):
438 """Supplemental DT_* constants for EM_SPARC."""
439 _dt_skip = '''
440 DT_ENCODING DT_PROCNUM
441 DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
442 DT_VALRNGLO DT_VALRNGHI DT_VALNUM
443 DT_VERSIONTAGNUM DT_EXTRANUM
444 DT_AARCH64_NUM
445 DT_ALPHA_NUM
446 DT_IA_64_NUM
447 DT_MIPS_NUM
448 DT_PPC_NUM
449 DT_PPC64_NUM
450 DT_SPARC_NUM
451 '''.strip().split()
452 _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
453 _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
454 _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
455 _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
456 _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
457 _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
458 _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
459 _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
460 _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
461 _register_elf_h(Dt, skip=_dt_skip, ranges=True)
462 del _dt_skip
464 # Constant extraction is complete.
465 del _register_elf_h
466 del _elf_h
468 class StInfo:
469 """ELF symbol binding and type. Type of the Sym.st_info field."""
470 def __init__(self, arg0, arg1=None):
471 if isinstance(arg0, int) and arg1 is None:
472 self.bind = Stb(arg0 >> 4)
473 self.type = Stt(arg0 & 15)
474 else:
475 self.bind = Stb(arg0)
476 self.type = Stt(arg1)
478 def value(self):
479 """Returns the raw value for the bind/type combination."""
480 return (self.bind.value() << 4) | (self.type.value())
482 # Type in an ELF file. Used for deserialization.
483 _Layout = collections.namedtuple('_Layout', 'unpack size')
485 def _define_layouts(baseclass: type, layout32: str, layout64: str,
486 types=None, fields32=None):
487 """Assign variants dict to baseclass.
489 The variants dict is indexed by (ElfClass, ElfData) pairs, and its
490 values are _Layout instances.
493 struct32 = struct.Struct(layout32)
494 struct64 = struct.Struct(layout64)
496 # Check that the struct formats yield the right number of components.
497 for s in (struct32, struct64):
498 example = s.unpack(b' ' * s.size)
499 if len(example) != len(baseclass._fields):
500 raise ValueError('{!r} yields wrong field count: {} != {}'.format(
501 s.format, len(example), len(baseclass._fields)))
503 # Check that field names in types are correct.
504 if types is None:
505 types = ()
506 for n in types:
507 if n not in baseclass._fields:
508 raise ValueError('{} does not have field {!r}'.format(
509 baseclass.__name__, n))
511 if fields32 is not None \
512 and set(fields32) != set(baseclass._fields):
513 raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
514 fields32, baseclass._fields))
516 def unique_name(name, used_names = (set((baseclass.__name__,))
517 | set(baseclass._fields)
518 | {n.__name__
519 for n in (types or {}).values()})):
520 """Find a name that is not used for a class or field name."""
521 candidate = name
522 n = 0
523 while candidate in used_names:
524 n += 1
525 candidate = '{}{}'.format(name, n)
526 used_names.add(candidate)
527 return candidate
529 blob_name = unique_name('blob')
530 struct_unpack_name = unique_name('struct_unpack')
531 comps_name = unique_name('comps')
533 layouts = {}
534 for (bits, elfclass, layout, fields) in (
535 (32, ElfClass.ELFCLASS32, layout32, fields32),
536 (64, ElfClass.ELFCLASS64, layout64, None),
538 for (elfdata, structprefix, funcsuffix) in (
539 (ElfData.ELFDATA2LSB, '<', 'LE'),
540 (ElfData.ELFDATA2MSB, '>', 'BE'),
542 env = {
543 baseclass.__name__: baseclass,
544 struct_unpack_name: struct.unpack,
547 # Add the type converters.
548 if types:
549 for cls in types.values():
550 env[cls.__name__] = cls
552 funcname = ''.join(
553 ('unpack_', baseclass.__name__, str(bits), funcsuffix))
555 code = '''
556 def {funcname}({blob_name}):
557 '''.format(funcname=funcname, blob_name=blob_name)
559 indent = ' ' * 4
560 unpack_call = '{}({!r}, {})'.format(
561 struct_unpack_name, structprefix + layout, blob_name)
562 field_names = ', '.join(baseclass._fields)
563 if types is None and fields is None:
564 code += '{}return {}({})\n'.format(
565 indent, baseclass.__name__, unpack_call)
566 else:
567 # Destructuring tuple assignment.
568 if fields is None:
569 code += '{}{} = {}\n'.format(
570 indent, field_names, unpack_call)
571 else:
572 # Use custom field order.
573 code += '{}{} = {}\n'.format(
574 indent, ', '.join(fields), unpack_call)
576 # Perform the type conversions.
577 for n in baseclass._fields:
578 if n in types:
579 code += '{}{} = {}({})\n'.format(
580 indent, n, types[n].__name__, n)
581 # Create the named tuple.
582 code += '{}return {}({})\n'.format(
583 indent, baseclass.__name__, field_names)
585 exec(code, env)
586 layouts[(elfclass, elfdata)] = _Layout(
587 env[funcname], struct.calcsize(layout))
588 baseclass.layouts = layouts
591 # Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
592 class Ident(collections.namedtuple('Ident',
593 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
595 def __new__(cls, *args):
596 """Construct an object from a blob or its constituent fields."""
597 if len(args) == 1:
598 return cls.unpack(args[0])
599 return cls.__base__.__new__(cls, *args)
601 @staticmethod
602 def unpack(blob: memoryview) -> 'Ident':
603 """Parse raws data into a tuple."""
604 ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
605 ei_pad = struct.unpack('4s5B7s', blob)
606 return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
607 ei_version, ei_osabi, ei_abiversion, ei_pad)
608 size = 16
610 # Corresponds to Elf32_Ehdr and Elf64_Ehdr.
611 Ehdr = collections.namedtuple('Ehdr',
612 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
613 + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
614 _define_layouts(Ehdr,
615 layout32='16s2H5I6H',
616 layout64='16s2HI3QI6H',
617 types=dict(e_ident=Ident,
618 e_machine=Machine,
619 e_type=Et,
620 e_shstrndx=Shn))
622 # Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
623 Phdr = collections.namedtuple('Phdr',
624 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
625 _define_layouts(Phdr,
626 layout32='8I',
627 fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
628 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
629 layout64='2I6Q',
630 types=dict(p_type=Pt, p_flags=Pf))
633 # Corresponds to Elf32_Shdr and Elf64_Shdr.
634 class Shdr(collections.namedtuple('Shdr',
635 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
636 + ' sh_addralign sh_entsize')):
637 def resolve(self, strtab: 'StringTable') -> 'Shdr':
638 """Resolve sh_name using a string table."""
639 return self.__class__(strtab.get(self[0]), *self[1:])
640 _define_layouts(Shdr,
641 layout32='10I',
642 layout64='2I4Q2I2Q',
643 types=dict(sh_type=Sht,
644 sh_flags=Shf,
645 sh_link=Shn))
647 # Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
648 # d_un union is skipped, and d_ptr is missing (its representation in
649 # Python would be identical to d_val).
650 Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
651 _define_layouts(Dyn,
652 layout32='2i',
653 layout64='2q',
654 types=dict(d_tag=Dt))
656 # Corresponds to Elf32_Sym and Elf64_Sym.
657 class Sym(collections.namedtuple('Sym',
658 'st_name st_info st_other st_shndx st_value st_size')):
659 def resolve(self, strtab: 'StringTable') -> 'Sym':
660 """Resolve st_name using a string table."""
661 return self.__class__(strtab.get(self[0]), *self[1:])
662 _define_layouts(Sym,
663 layout32='3I2BH',
664 layout64='I2BH2Q',
665 fields32=('st_name', 'st_value', 'st_size', 'st_info',
666 'st_other', 'st_shndx'),
667 types=dict(st_shndx=Shn,
668 st_info=StInfo))
670 # Corresponds to Elf32_Rel and Elf64_Rel.
671 Rel = collections.namedtuple('Rel', 'r_offset r_info')
672 _define_layouts(Rel,
673 layout32='2I',
674 layout64='2Q')
676 # Corresponds to Elf32_Rel and Elf64_Rel.
677 Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
678 _define_layouts(Rela,
679 layout32='3I',
680 layout64='3Q')
682 class StringTable:
683 """ELF string table."""
684 def __init__(self, blob):
685 """Create a new string table backed by the data in the blob.
687 blob: a memoryview-like object
690 self.blob = blob
692 def get(self, index) -> bytes:
693 """Returns the null-terminated byte string at the index."""
694 blob = self.blob
695 endindex = index
696 while True:
697 if blob[endindex] == 0:
698 return bytes(blob[index:endindex])
699 endindex += 1
701 class Image:
702 """ELF image parser."""
703 def __init__(self, image):
704 """Create an ELF image from binary image data.
706 image: a memoryview-like object that supports efficient range
707 subscripting.
710 self.image = image
711 ident = self.read(Ident, 0)
712 classdata = (ident.ei_class, ident.ei_data)
713 # Set self.Ehdr etc. to the subtypes with the right parsers.
714 for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
715 setattr(self, typ.__name__, typ.layouts.get(classdata, None))
717 if self.Ehdr is not None:
718 self.ehdr = self.read(self.Ehdr, 0)
719 self._shdr_num = self._compute_shdr_num()
720 else:
721 self.ehdr = None
722 self._shdr_num = 0
724 self._section = {}
725 self._stringtab = {}
727 if self._shdr_num > 0:
728 self._shdr_strtab = self._find_shdr_strtab()
729 else:
730 self._shdr_strtab = None
732 @staticmethod
733 def readfile(path: str) -> 'Image':
734 """Reads the ELF file at the specified path."""
735 with open(path, 'rb') as inp:
736 return Image(memoryview(inp.read()))
738 def _compute_shdr_num(self) -> int:
739 """Computes the actual number of section headers."""
740 shnum = self.ehdr.e_shnum
741 if shnum == 0:
742 if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
743 # No section headers.
744 return 0
745 # Otherwise the extension mechanism is used (which may be
746 # needed because e_shnum is just 16 bits).
747 return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
748 return shnum
750 def _find_shdr_strtab(self) -> StringTable:
751 """Finds the section header string table (maybe via extensions)."""
752 shstrndx = self.ehdr.e_shstrndx
753 if shstrndx == Shn.SHN_XINDEX:
754 shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
755 return self._find_stringtab(shstrndx)
757 def read(self, typ: type, offset:int ):
758 """Reads an object at a specific offset.
760 The type must have been enhanced using _define_variants.
763 return typ.unpack(self.image[offset: offset + typ.size])
765 def phdrs(self) -> Phdr:
766 """Generator iterating over the program headers."""
767 if self.ehdr is None:
768 return
769 size = self.ehdr.e_phentsize
770 if size != self.Phdr.size:
771 raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
772 .format(size, self.Phdr.size))
774 offset = self.ehdr.e_phoff
775 for _ in range(self.ehdr.e_phnum):
776 yield self.read(self.Phdr, offset)
777 offset += size
779 def shdrs(self, resolve: bool=True) -> Shdr:
780 """Generator iterating over the section headers.
782 If resolve, section names are automatically translated
783 using the section header string table.
786 if self._shdr_num == 0:
787 return
789 size = self.ehdr.e_shentsize
790 if size != self.Shdr.size:
791 raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
792 .format(size, self.Shdr.size))
794 offset = self.ehdr.e_shoff
795 for _ in range(self._shdr_num):
796 shdr = self.read(self.Shdr, offset)
797 if resolve:
798 shdr = shdr.resolve(self._shdr_strtab)
799 yield shdr
800 offset += size
802 def dynamic(self) -> Dyn:
803 """Generator iterating over the dynamic segment."""
804 for phdr in self.phdrs():
805 if phdr.p_type == Pt.PT_DYNAMIC:
806 # Pick the first dynamic segment, like the loader.
807 if phdr.p_filesz == 0:
808 # Probably separated debuginfo.
809 return
810 offset = phdr.p_offset
811 end = offset + phdr.p_memsz
812 size = self.Dyn.size
813 while True:
814 next_offset = offset + size
815 if next_offset > end:
816 raise ValueError(
817 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
818 phdr.p_memsz, size))
819 yield self.read(self.Dyn, offset)
820 if next_offset == end:
821 return
822 offset = next_offset
824 def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
825 """A generator iterating over a symbol table.
827 If resolve, symbol names are automatically translated using
828 the string table for the symbol table.
831 assert shdr.sh_type == Sht.SHT_SYMTAB
832 size = shdr.sh_entsize
833 if size != self.Sym.size:
834 raise ValueError('Invalid symbol table entry size {}'.format(size))
835 offset = shdr.sh_offset
836 end = shdr.sh_offset + shdr.sh_size
837 if resolve:
838 strtab = self._find_stringtab(shdr.sh_link)
839 while offset < end:
840 sym = self.read(self.Sym, offset)
841 if resolve:
842 sym = sym.resolve(strtab)
843 yield sym
844 offset += size
845 if offset != end:
846 raise ValueError('Symbol table is not a multiple of entry size')
848 def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
849 """Looks up a string in a string table identified by its link index."""
850 try:
851 strtab = self._stringtab[strtab_index]
852 except KeyError:
853 strtab = self._find_stringtab(strtab_index)
854 return strtab.get(strtab_offset)
856 def find_section(self, shndx: Shn) -> Shdr:
857 """Returns the section header for the indexed section.
859 The section name is not resolved.
861 try:
862 return self._section[shndx]
863 except KeyError:
864 pass
865 if shndx in Shn:
866 raise ValueError('Reserved section index {}'.format(shndx))
867 idx = shndx.value
868 if idx < 0 or idx > self._shdr_num:
869 raise ValueError('Section index {} out of range [0, {})'.format(
870 idx, self._shdr_num))
871 shdr = self.read(
872 self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
873 self._section[shndx] = shdr
874 return shdr
876 def _find_stringtab(self, sh_link: int) -> StringTable:
877 if sh_link in self._stringtab:
878 return self._stringtab
879 if sh_link < 0 or sh_link >= self._shdr_num:
880 raise ValueError('Section index {} out of range [0, {})'.format(
881 sh_link, self._shdr_num))
882 shdr = self.read(
883 self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
884 if shdr.sh_type != Sht.SHT_STRTAB:
885 raise ValueError(
886 'Section {} is not a string table: {}'.format(
887 sh_link, shdr.sh_type))
888 strtab = StringTable(
889 self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
890 # This could retrain essentially arbitrary amounts of data,
891 # but caching string tables seems important for performance.
892 self._stringtab[sh_link] = strtab
893 return strtab
895 def elf_hash(s):
896 """Computes the ELF hash of the string."""
897 acc = 0
898 for ch in s:
899 if type(ch) is not int:
900 ch = ord(ch)
901 acc = ((acc << 4) + ch) & 0xffffffff
902 top = acc & 0xf0000000
903 acc = (acc ^ (top >> 24)) & ~top
904 return acc
906 def gnu_hash(s):
907 """Computes the GNU hash of the string."""
908 h = 5381
909 for ch in s:
910 if type(ch) is not int:
911 ch = ord(ch)
912 h = (h * 33 + ch) & 0xffffffff
913 return h
915 __all__ = [name for name in dir() if name[0].isupper()]