.
[idi.git] / file / map.py
blobeaa209dfbc38e104f0f1706d7b06c33809962fb0
1 from bisect import bisect
2 from struct import unpack
3 from array import array
5 class map:
6 "Object map"
7 # This is the core idi's data structure, mapping the file and
8 # slicing it to many "segments" of various types.
10 # Note that the segments may overlap. Also note that this is
11 # TOTALLY different from executable format's notion of "segments".
12 # Our segments are actually very tiny, typically spanning one
13 # data item or a single instruction.
14 def __init__(self, file):
15 self.file = file
16 self.segments = []
18 # Add new segment to the map.
19 def new_segment(self, seg):
20 # Optimize for common case - appending
21 pos = len(self.segments)
22 if (seg.physofs < pos):
23 pos = bisect(self.segments, seg.physofs);
24 self.segments.insert(pos, seg)
26 # Public attributes:
27 # file (class file.file.file)
28 # segments ([class segment]) (ordered by offset)
31 class segment:
32 "Object map segment"
33 # This is one segment in the file; that is, continuous part of the
34 # file that is either code or data. There can be also zero-sized
35 # comment-only segments.
36 def __init__(self, map, memofs, physofs, len, comment = None):
37 self.map = map
38 self.memofs = memofs
39 self.physofs = physofs
40 self.len = len
41 self.comment = comment
43 # Public attributes:
44 # map (class map)
45 # memofs (int, offset in memory)
46 # physofs (int, offset in file)
47 # len (int)
48 # comment (string, can be None)
49 # name (string)
50 # Contains user-visible name of segment type; use __class__ for
51 # programmatic segment type inspection instead.
52 name = 'Generic'
55 # XXX: I don't quite like how comments are done now.
58 class seg_code (segment):
59 "Map segment - code instruction"
60 # Code segment: single instruction
62 # TODO: This object is very rudimentary now, it will probably get
63 # much richer over time - flow information for jump instructions,
64 # register information, etc.
66 # len is length of the instruction's binary representation in octets
67 # txt is the textual representation of the instruction
68 # (TODO: This should be richer - registers identified, etc.)
69 # comment is optional comment, usually added by the user
70 def __init__(self, map, memofs, physofs, len, comment, txt):
71 segment.__init__(self, map, memofs, physofs, len, comment)
72 self.txt = txt
74 # Public attributes:
75 # txt (string)
76 name = 'Code'
79 # TODO: More FMT support
80 FMT_HEX=0 # cellsize 0: whole segment in hex
81 #FMT_DEC=1 # cellsize 0: N/A
82 #FMT_OCT=2 # cellsize 0: whole segment in hex
83 #FMT_FLOAT=3 # cellsize 0: N/A
84 #FMT_ASCII=4 # cellsize 0: whole segment in quoted ascii
85 #FMT_ZASCII=4 # cellsize!0: up to \0
86 #FMT_PASCII=5 # cellsize!0: first char is len
87 # ...
89 # Cell formatters:
91 def _fmt_1hex(num):
92 return "%02x" % num
94 def _fmt_hex(data):
95 return reduce(lambda a, b: a + _fmt_1hex(b) + ' ', array("B", data), '')
97 _fmts = [_fmt_hex];
99 class seg_data (segment):
100 "Map segment - data"
101 # Data segment
102 # Consists of one or more cells of given size, formatted in
103 # given way
105 # Convert given data to a string (assuming the data is contents
106 # of this segment); returns array of strings, one per cell
107 def string(self, data):
108 if self.cellsize > 0:
109 return filter(_fmts[self.format],
110 [data[i:i + self.cellsize]
111 for i in range(0, len(data), self.cellsize)])
112 else:
113 return [_fmts[self.format](data)]
115 # Public attributes:
116 # format (int: DATA_*)
117 # cellsize (int; 0 is fmt specific)
118 # TODO: endianity
119 format = FMT_HEX
120 cellsize = 0
122 name = 'Data'
125 class seg_comment (segment):
126 "Map segment - comment"
127 # Comment (that is not bound with any code/data)
129 name = 'Comment'