We need to avoid decoding errors during the parsing of patches and
[git-dm.git] / gitlog.py
blobc4af6d487ffe2f3057c5ff0d26e826626c67619a
2 # Stuff for dealing with the git log output.
4 # Someday this will be the only version of grabpatch, honest.
6 import re, datetime
7 from email.utils import parsedate
8 from patterns import patterns
9 import database
13 # Input file handling. Someday it would be good to make this smarter
14 # so that it handles running git with the right options and such.
16 # Someday.
18 SavedLine = ''
20 def getline(input):
21 global SavedLine
22 if SavedLine:
23 ret = SavedLine
24 SavedLine = ''
25 return ret
26 l = input.readline()
27 if not l:
28 return None
30 # In theory everything coming out of git is utf8. In practice,
31 # there's some funky stuff in the kernel repo. So...fall back
32 # to latin1 if a utf8 decode fails; that doesn't work for everything
33 # but it's about as good as we can do.
35 try:
36 l = l.decode('utf8')
37 except UnicodeDecodeError:
38 l = l.decode('latin1')
39 return l.rstrip()
41 def SaveLine(line):
42 global SavedLine
43 SavedLine = line
46 # A simple state machine based on where we are in the patch. The
47 # first stuff we get is the header.
49 S_HEADER = 0
51 # Then comes the single-line description.
53 S_DESC = 1
55 # ...the full changelog...
57 S_CHANGELOG = 2
59 # ...the tag section....
61 S_TAGS = 3
63 # ...the numstat section.
65 S_NUMSTAT = 4
67 S_DONE = 5
70 # The functions to handle each of these states.
72 def get_header(patch, line, input):
73 if line == '':
74 if patch.author == '':
75 print('Funky auth line in', patch.commit)
76 patch.author = database.LookupStoreHacker('Unknown',
77 'unknown@hacker.net')
78 return S_DESC
79 m = patterns['author'].match(line)
80 if m:
81 patch.email = database.RemapEmail(m.group(2))
82 patch.author = database.LookupStoreHacker(m.group(1), patch.email)
83 else:
84 m = patterns['date'].match(line)
85 if m:
86 dt = parsedate(m.group(2))
87 patch.date = datetime.date(dt[0], dt[1], dt[2])
88 return S_HEADER
90 def get_desc(patch, line, input):
91 if not line:
92 print('Missing desc in', patch.commit)
93 return S_CHANGELOG
94 patch.desc = line
95 line = getline(input)
96 while line:
97 patch.desc += line
98 line = getline(input)
99 return S_CHANGELOG
101 tagline = re.compile(r'^\s+(([-a-z]+-by)|cc|fixes):.*$', re.I)
102 def get_changelog(patch, line, input):
103 if not line:
104 if patch.templog:
105 patch.changelog += patch.templog
106 patch.templog = ''
107 if patterns['commit'].match(line):
108 # No changelog at all - usually a Linus tag
109 SaveLine(line)
110 return S_DONE
111 elif tagline.match(line):
112 if patch.templog:
113 patch.changelog += patch.templog
114 return get_tag(patch, line, input)
115 else:
116 patch.templog += line + '\n'
117 return S_CHANGELOG
119 def get_tag(patch, line, input):
121 # Some people put blank lines in the middle of tags.
123 if not line:
124 return S_TAGS
126 # A new commit line says we've gone too far.
128 if patterns['commit'].match(line):
129 SaveLine(line)
130 return S_DONE
132 # Check for a numstat line
134 if patterns['numstat'].match(line):
135 return get_numstat(patch, line, input)
137 # Look for interesting tags
139 m = patterns['signed-off-by'].match(line)
140 if m:
141 patch.signoffs.append(m.group(2))
142 else:
144 # Look for other tags indicating that somebody at least
145 # looked at the patch.
147 for tag in ('acked-by', 'reviewed-by', 'tested-by'):
148 if patterns[tag].match(line):
149 patch.othertags += 1
150 break
151 patch.taglines.append(line)
152 return S_TAGS
154 def get_numstat(patch, line, input):
155 m = patterns['numstat'].match(line)
156 if not m:
157 return S_DONE
158 try:
159 patch.addfile(int(m.group(1)), int(m.group(2)), m.group(3))
161 # Binary files just have "-" in the line fields. In this case, set
162 # the counts to zero so that we at least track that the file was
163 # touched.
165 except ValueError:
166 patch.addfile(0, 0, m.group(3))
167 return S_NUMSTAT
169 grabbers = [ get_header, get_desc, get_changelog, get_tag, get_numstat ]
173 # A variant on the gitdm patch class.
175 class patch:
176 def __init__(self, commit):
177 self.commit = commit
178 self.desc = ''
179 self.changelog = ''
180 self.templog = ''
181 self.author = ''
182 self.signoffs = [ ]
183 self.othertags = 0
184 self.added = self.removed = 0
185 self.files = [ ]
186 self.taglines = [ ]
188 def addfile(self, added, removed, file):
189 self.added += added
190 self.removed += removed
191 self.files.append(file)
193 tag = re.compile('\(tag: (v[.\d]+)\)')
194 def grabpatch(input):
196 # If it's not a patch something is screwy.
198 line = getline(input)
199 if line is None:
200 return None
201 m = patterns['commit'].match(line)
202 if not m:
203 print('noncommit', line)
204 return None
205 p = patch(m.group(1))
206 state = S_HEADER
208 # Look for a tag on this line. BUG fails with two tags
210 m = tag.search(line)
211 if m:
212 patch.tag = m.group(1)
213 else:
214 patch.tag = None
216 # Crank through the patch.
218 while state != S_DONE:
219 line = getline(input)
220 if line is None:
221 if state != S_NUMSTAT:
222 print('Ran out of patch', state)
223 return None
224 return p
225 state = grabbers[state](p, line, input)
226 return p