less false integrations
[fast-export.git] / p4-fast-export.py
bloba45068d362df70de0d3ebe45a6111ac7696dee31
1 #!/usr/bin/python
3 # p4-fast-export.py
5 # Author: Simon Hausmann <hausmann@kde.org>
6 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
8 # TODO:
9 # - support integrations (at least p4i)
10 # - support p4 submit (hah!)
11 # - emulate p4's delete behavior: if a directory becomes empty delete it. continue
12 # with parent dir until non-empty dir is found.
14 import os, string, sys, time, os.path
15 import marshal, popen2, getopt, sha
16 from sets import Set;
18 cacheDebug = False
20 silent = False
21 knownBranches = Set()
22 createdBranches = Set()
23 committedChanges = Set()
24 branch = "refs/heads/master"
25 globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
26 detectBranches = False
27 changesFile = ""
28 if len(globalPrefix) != 0:
29 globalPrefix = globalPrefix[:-1]
31 try:
32 opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
33 "cache-debug" ])
34 except getopt.GetoptError:
35 print "fixme, syntax error"
36 sys.exit(1)
38 for o, a in opts:
39 if o == "--branch":
40 branch = "refs/heads/" + a
41 elif o == "--detect-branches":
42 detectBranches = True
43 elif o == "--changesfile":
44 changesFile = a
45 elif o == "--silent":
46 silent= True
47 elif o == "--known-branches":
48 for branch in open(a).readlines():
49 knownBranches.add(branch[:-1])
50 elif o == "--cache-debug":
51 cacheDebug = True
53 if len(args) == 0 and len(globalPrefix) != 0:
54 if not silent:
55 print "[using previously specified depot path %s]" % globalPrefix
56 elif len(args) != 1:
57 print "usage: %s //depot/path[@revRange]" % sys.argv[0]
58 print "\n example:"
59 print " %s //depot/my/project/ -- to import the current head"
60 print " %s //depot/my/project/@all -- to import everything"
61 print " %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
62 print ""
63 print " (a ... is not needed in the path p4 specification, it's added implicitly)"
64 print ""
65 sys.exit(1)
66 else:
67 if len(globalPrefix) != 0 and globalPrefix != args[0]:
68 print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
69 sys.exit(1)
70 globalPrefix = args[0]
72 changeRange = ""
73 revision = ""
74 users = {}
75 initialParent = ""
76 lastChange = 0
77 initialTag = ""
79 if globalPrefix.find("@") != -1:
80 atIdx = globalPrefix.index("@")
81 changeRange = globalPrefix[atIdx:]
82 if changeRange == "@all":
83 changeRange = ""
84 elif changeRange.find(",") == -1:
85 revision = changeRange
86 changeRange = ""
87 globalPrefix = globalPrefix[0:atIdx]
88 elif globalPrefix.find("#") != -1:
89 hashIdx = globalPrefix.index("#")
90 revision = globalPrefix[hashIdx:]
91 globalPrefix = globalPrefix[0:hashIdx]
92 elif len(previousDepotPath) == 0:
93 revision = "#head"
95 if globalPrefix.endswith("..."):
96 globalPrefix = globalPrefix[:-3]
98 if not globalPrefix.endswith("/"):
99 globalPrefix += "/"
101 def p4File(depotPath):
102 cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
104 data = 0
105 try:
106 if not cacheDebug:
107 raise
108 data = open(cacheKey, "rb").read()
109 except:
110 data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
111 if cacheDebug:
112 open(cacheKey, "wb").write(data)
114 return data
116 def p4CmdList(cmd):
117 fullCmd = "p4 -G %s" % cmd;
119 cacheKey = sha.new(fullCmd).hexdigest()
120 cacheKey = "/tmp/p4cache/cmd-" + cacheKey
122 cached = True
123 pipe = 0
124 try:
125 if not cacheDebug:
126 raise
127 pipe = open(cacheKey, "rb")
128 except:
129 cached = False
130 pipe = os.popen(fullCmd, "rb")
132 result = []
133 try:
134 while True:
135 entry = marshal.load(pipe)
136 result.append(entry)
137 except EOFError:
138 pass
139 pipe.close()
141 if not cached and cacheDebug:
142 pipe = open(cacheKey, "wb")
143 for r in result:
144 marshal.dump(r, pipe)
145 pipe.close()
147 return result
149 def p4Cmd(cmd):
150 list = p4CmdList(cmd)
151 result = {}
152 for entry in list:
153 result.update(entry)
154 return result;
156 def extractFilesFromCommit(commit):
157 files = []
158 fnum = 0
159 while commit.has_key("depotFile%s" % fnum):
160 path = commit["depotFile%s" % fnum]
161 if not path.startswith(globalPrefix):
162 # if not silent:
163 # print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
164 fnum = fnum + 1
165 continue
167 file = {}
168 file["path"] = path
169 file["rev"] = commit["rev%s" % fnum]
170 file["action"] = commit["action%s" % fnum]
171 file["type"] = commit["type%s" % fnum]
172 files.append(file)
173 fnum = fnum + 1
174 return files
176 def isSubPathOf(first, second):
177 if not first.startswith(second):
178 return False
179 if first == second:
180 return True
181 return first[len(second)] == "/"
183 def branchesForCommit(files):
184 global knownBranches
185 branches = Set()
187 for file in files:
188 relativePath = file["path"][len(globalPrefix):]
189 # strip off the filename
190 relativePath = relativePath[0:relativePath.rfind("/")]
192 # if len(branches) == 0:
193 # branches.add(relativePath)
194 # knownBranches.add(relativePath)
195 # continue
197 ###### this needs more testing :)
198 knownBranch = False
199 for branch in branches:
200 if relativePath == branch:
201 knownBranch = True
202 break
203 # if relativePath.startswith(branch):
204 if isSubPathOf(relativePath, branch):
205 knownBranch = True
206 break
207 # if branch.startswith(relativePath):
208 if isSubPathOf(branch, relativePath):
209 branches.remove(branch)
210 break
212 if knownBranch:
213 continue
215 for branch in knownBranches:
216 #if relativePath.startswith(branch):
217 if isSubPathOf(relativePath, branch):
218 if len(branches) == 0:
219 relativePath = branch
220 else:
221 knownBranch = True
222 break
224 if knownBranch:
225 continue
227 branches.add(relativePath)
228 knownBranches.add(relativePath)
230 return branches
232 def findBranchParent(branchPrefix, files):
233 for file in files:
234 path = file["path"]
235 if not path.startswith(branchPrefix):
236 continue
237 action = file["action"]
238 if action != "integrate" and action != "branch":
239 continue
240 rev = file["rev"]
241 depotPath = path + "#" + rev
243 log = p4CmdList("filelog \"%s\"" % depotPath)
244 if len(log) != 1:
245 print "eek! I got confused by the filelog of %s" % depotPath
246 sys.exit(1);
248 log = log[0]
249 if log["action0"] != action:
250 print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
251 sys.exit(1);
253 branchAction = log["how0,0"]
254 # if branchAction == "branch into" or branchAction == "ignored":
255 # continue # ignore for branching
257 if not branchAction.endswith(" from"):
258 continue # ignore for branching
259 # print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
260 # sys.exit(1);
262 source = log["file0,0"]
263 if source.startswith(branchPrefix):
264 continue
266 lastSourceRev = log["erev0,0"]
268 sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
269 if len(sourceLog) != 1:
270 print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
271 sys.exit(1);
272 sourceLog = sourceLog[0]
274 relPath = source[len(globalPrefix):]
275 # strip off the filename
276 relPath = relPath[0:relPath.rfind("/")]
278 for branch in knownBranches:
279 if isSubPathOf(relPath, branch):
280 # print "determined parent branch branch %s due to change in file %s" % (branch, source)
281 return branch
282 # else:
283 # print "%s is not a subpath of branch %s" % (relPath, branch)
285 return ""
287 def commit(details, files, branch, branchPrefix, parent, merged = ""):
288 global users
289 global lastChange
290 global committedChanges
292 epoch = details["time"]
293 author = details["user"]
295 gitStream.write("commit %s\n" % branch)
296 # gitStream.write("mark :%s\n" % details["change"])
297 committedChanges.add(int(details["change"]))
298 committer = ""
299 if author in users:
300 committer = "%s %s %s" % (users[author], epoch, tz)
301 else:
302 committer = "%s <a@b> %s %s" % (author, epoch, tz)
304 gitStream.write("committer %s\n" % committer)
306 gitStream.write("data <<EOT\n")
307 gitStream.write(details["desc"])
308 gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
309 gitStream.write("EOT\n\n")
311 if len(parent) > 0:
312 gitStream.write("from %s\n" % parent)
314 if len(merged) > 0:
315 gitStream.write("merge %s\n" % merged)
317 for file in files:
318 path = file["path"]
319 if not path.startswith(branchPrefix):
320 # if not silent:
321 # print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
322 continue
323 rev = file["rev"]
324 depotPath = path + "#" + rev
325 relPath = path[len(branchPrefix):]
326 action = file["action"]
328 if action == "delete":
329 gitStream.write("D %s\n" % relPath)
330 else:
331 mode = 644
332 if file["type"].startswith("x"):
333 mode = 755
335 data = p4File(depotPath)
337 gitStream.write("M %s inline %s\n" % (mode, relPath))
338 gitStream.write("data %s\n" % len(data))
339 gitStream.write(data)
340 gitStream.write("\n")
342 gitStream.write("\n")
344 lastChange = int(details["change"])
346 def extractFilesInCommitToBranch(files, branchPrefix):
347 newFiles = []
349 for file in files:
350 path = file["path"]
351 if path.startswith(branchPrefix):
352 newFiles.append(file)
354 return newFiles
356 def findBranchSourceHeuristic(files, branch, branchPrefix):
357 for file in files:
358 action = file["action"]
359 if action != "integrate" and action != "branch":
360 continue
361 path = file["path"]
362 rev = file["rev"]
363 depotPath = path + "#" + rev
365 log = p4CmdList("filelog \"%s\"" % depotPath)
366 if len(log) != 1:
367 print "eek! I got confused by the filelog of %s" % depotPath
368 sys.exit(1);
370 log = log[0]
371 if log["action0"] != action:
372 print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
373 sys.exit(1);
375 branchAction = log["how0,0"]
377 if not branchAction.endswith(" from"):
378 continue # ignore for branching
379 # print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
380 # sys.exit(1);
382 source = log["file0,0"]
383 if source.startswith(branchPrefix):
384 continue
386 lastSourceRev = log["erev0,0"]
388 sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
389 if len(sourceLog) != 1:
390 print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
391 sys.exit(1);
392 sourceLog = sourceLog[0]
394 relPath = source[len(globalPrefix):]
395 # strip off the filename
396 relPath = relPath[0:relPath.rfind("/")]
398 for candidate in knownBranches:
399 if isSubPathOf(relPath, candidate) and candidate != branch:
400 return candidate
402 return ""
404 def changeIsBranchMerge(sourceBranch, destinationBranch, change):
405 sourceFiles = {}
406 for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
407 if file["action"] == "delete":
408 continue
409 sourceFiles[file["depotFile"]] = file
411 destinationFiles = {}
412 for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
413 destinationFiles[file["depotFile"]] = file
415 for fileName in sourceFiles.keys():
416 integrations = []
417 deleted = False
418 integrationCount = 0
419 for integration in p4CmdList("integrated \"%s\"" % fileName):
420 toFile = integration["fromFile"] # yes, it's true, it's fromFile
421 if not toFile in destinationFiles:
422 continue
423 destFile = destinationFiles[toFile]
424 if destFile["action"] == "delete":
425 # print "file %s has been deleted in %s" % (fileName, toFile)
426 deleted = True
427 break
428 integrationCount += 1
429 if integration["how"] == "branch from":
430 continue
432 if int(integration["change"]) == change:
433 integrations.append(integration)
434 continue
435 if int(integration["change"]) > change:
436 continue
438 destRev = int(destFile["rev"])
440 startRev = integration["startFromRev"][1:]
441 if startRev == "none":
442 startRev = 0
443 else:
444 startRev = int(startRev)
446 endRev = integration["endFromRev"][1:]
447 if endRev == "none":
448 endRev = 0
449 else:
450 endRev = int(endRev)
452 initialBranch = (destRev == 1 and integration["how"] != "branch into")
453 inRange = (destRev >= startRev and destRev <= endRev)
454 newer = (destRev > startRev and destRev > endRev)
456 if initialBranch or inRange or newer:
457 integrations.append(integration)
459 if deleted:
460 continue
462 if len(integrations) == 0 and integrationCount > 1:
463 print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
464 return False
466 return True
468 def getUserMap():
469 users = {}
471 for output in p4CmdList("users"):
472 if not output.has_key("User"):
473 continue
474 users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
475 return users
477 users = getUserMap()
479 if len(changeRange) == 0:
480 try:
481 sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
482 output = sout.read()
483 if output.endswith("\n"):
484 output = output[:-1]
485 tagIdx = output.index(" tags/p4/")
486 caretIdx = output.find("^")
487 endPos = len(output)
488 if caretIdx != -1:
489 endPos = caretIdx
490 rev = int(output[tagIdx + 9 : endPos]) + 1
491 changeRange = "@%s,#head" % rev
492 initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
493 initialTag = "p4/%s" % (int(rev) - 1)
494 except:
495 pass
497 tz = - time.timezone / 36
498 tzsign = ("%s" % tz)[0]
499 if tzsign != '+' and tzsign != '-':
500 tz = "+" + ("%s" % tz)
502 gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
504 if len(revision) > 0:
505 print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
507 details = { "user" : "git perforce import user", "time" : int(time.time()) }
508 details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
509 details["change"] = revision
510 newestRevision = 0
512 fileCnt = 0
513 for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
514 change = int(info["change"])
515 if change > newestRevision:
516 newestRevision = change
518 if info["action"] == "delete":
519 continue
521 for prop in [ "depotFile", "rev", "action", "type" ]:
522 details["%s%s" % (prop, fileCnt)] = info[prop]
524 fileCnt = fileCnt + 1
526 details["change"] = newestRevision
528 try:
529 commit(details, extractFilesFromCommit(details), branch, globalPrefix)
530 except:
531 print gitError.read()
533 else:
534 changes = []
536 if len(changesFile) > 0:
537 output = open(changesFile).readlines()
538 changeSet = Set()
539 for line in output:
540 changeSet.add(int(line))
542 for change in changeSet:
543 changes.append(change)
545 changes.sort()
546 else:
547 output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
549 for line in output:
550 changeNum = line.split(" ")[1]
551 changes.append(changeNum)
553 changes.reverse()
555 if len(changes) == 0:
556 if not silent:
557 print "no changes to import!"
558 sys.exit(1)
560 cnt = 1
561 for change in changes:
562 description = p4Cmd("describe %s" % change)
564 if not silent:
565 sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
566 sys.stdout.flush()
567 cnt = cnt + 1
569 try:
570 files = extractFilesFromCommit(description)
571 if detectBranches:
572 for branch in branchesForCommit(files):
573 knownBranches.add(branch)
574 branchPrefix = globalPrefix + branch + "/"
576 filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
578 merged = ""
579 parent = ""
580 ########### remove cnt!!!
581 if branch not in createdBranches and cnt > 2:
582 createdBranches.add(branch)
583 parent = findBranchParent(branchPrefix, files)
584 if parent == branch:
585 parent = ""
586 # elif len(parent) > 0:
587 # print "%s branched off of %s" % (branch, parent)
589 if len(parent) == 0:
590 merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
591 if len(merged) > 0:
592 print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
593 if not changeIsBranchMerge(merged, branch, int(description["change"])):
594 merged = ""
596 branch = "refs/heads/" + branch
597 if len(parent) > 0:
598 parent = "refs/heads/" + parent
599 if len(merged) > 0:
600 merged = "refs/heads/" + merged
601 commit(description, files, branch, branchPrefix, parent, merged)
602 else:
603 commit(description, filesForCommit, branch, globalPrefix, initialParent)
604 initialParent = ""
605 except IOError:
606 print gitError.read()
607 sys.exit(1)
609 if not silent:
610 print ""
612 gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
613 gitStream.write("from %s\n\n" % branch);
616 gitStream.close()
617 gitOutput.close()
618 gitError.close()
620 os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
621 if len(initialTag) > 0:
622 os.popen("git tag -d %s" % initialTag).read()
624 sys.exit(0)