3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
9 # Copyright (c) 2012 Dominik Riebeling
11 # All files in this archive are subject to the GNU General Public License.
12 # See the file COPYING in the source tree root for full license agreement.
14 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
15 # KIND, either express or implied.
18 '''Scrape files from a git repository.
20 This module provides functions to get a subset of files from a git repository.
21 The files to retrieve can be specified, and the git tree to work on can be
22 specified. That was arbitrary trees can be retrieved (like a subset of files
25 Retrieved files can be packaged into a bzip2 compressed tarball or stored in a
26 given folder for processing afterwards.
28 Calls git commands directly for maximum compatibility.
40 '''Get dict matching refs to hashes from repository pointed to by repo.
41 @param repo Path to repository root.
42 @return Dict matching hashes to each ref.
44 print "Getting list of refs"
45 output
= subprocess
.Popen(["git", "show-ref"], stdout
=subprocess
.PIPE
,
46 stderr
=subprocess
.PIPE
, cwd
=repo
)
47 cmdout
= output
.communicate()
50 if len(cmdout
[1]) > 0:
51 print "An error occured!\n"
56 regex
= re
.findall(r
'([a-f0-9]+)\s+(\S+)', line
)
58 # ref is the key, hash its value.
64 def get_lstree(repo
, start
, filterlist
=[]):
65 '''Get recursive list of tree objects for a given tree.
66 @param repo Path to repository root.
67 @param start Hash identifying the tree.
68 @param filterlist List of paths to retrieve objecs hashes for.
69 An empty list will retrieve all paths.
70 @return Dict mapping filename to blob hash
72 output
= subprocess
.Popen(["git", "ls-tree", "-r", start
],
73 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
74 cmdout
= output
.communicate()
77 if len(cmdout
[1]) > 0:
78 print "An error occured!\n"
82 for line
in cmdout
[0].split('\n'):
83 regex
= re
.findall(r
'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(\S+)', line
)
88 if rf
[3].find(f
) == 0:
91 # If two files have the same content they have the same hash, so
92 # the filename has to be used as key.
93 if len(filterlist
) == 0 or add
== True:
95 print "FATAL: key already exists in dict!"
97 objects
[rf
[3]] = rf
[2]
101 def get_object(repo
, blob
, destfile
):
102 '''Get an identified object from the repository.
103 @param repo Path to repository root.
104 @param blob hash for blob to retrieve.
105 @param destfile filename for blob output.
106 @return True if file was successfully written, False on error.
108 output
= subprocess
.Popen(["git", "cat-file", "-p", blob
],
109 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
110 cmdout
= output
.communicate()
111 # make sure output path exists
112 if len(cmdout
[1]) > 0:
113 print "An error occured!\n"
116 if not os
.path
.exists(os
.path
.dirname(destfile
)):
117 os
.makedirs(os
.path
.dirname(destfile
))
118 f
= open(destfile
, 'wb')
119 for line
in cmdout
[0]:
125 def describe_treehash(repo
, treehash
):
126 '''Retrieve output of git-describe for a given hash.
127 @param repo Path to repository root.
128 @param treehash Hash identifying the tree / commit to describe.
129 @return Description string.
131 output
= subprocess
.Popen(["git", "describe", treehash
],
132 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
133 cmdout
= output
.communicate()
134 if len(cmdout
[1]) > 0:
135 print "An error occured!\n"
138 return cmdout
[0].rstrip()
141 def scrape_files(repo
, treehash
, filelist
, dest
=""):
142 '''Scrape list of files from repository.
143 @param repo Path to repository root.
144 @param treehash Hash identifying the tree.
145 @param filelist List of files to get from repository.
146 @param dest Destination path for files. Files will get retrieved with full
147 path from the repository, and the folder structure will get
148 created below dest as necessary.
149 @return Destination path.
151 print "Scraping files from repository"
154 dest
= tempfile
.mkdtemp()
155 treeobjects
= get_lstree(repo
, treehash
, filelist
)
156 for obj
in treeobjects
:
157 get_object(repo
, treeobjects
[obj
], os
.path
.join(dest
, obj
))
162 def archive_files(repo
, treehash
, filelist
, basename
, tmpfolder
="",
164 '''Archive list of files into tarball.
165 @param repo Path to repository root.
166 @param treehash Hash identifying the tree.
167 @param filelist List of files to archive. All files in the archive if left
169 @param basename Basename (including path) of output file. Will get used as
170 basename inside of the archive as well (i.e. no tarbomb).
171 @param tmpfolder Folder to put intermediate files in. If no folder is given
172 a temporary one will get used.
173 @param archive Type of archive to create. Supported values are "tbz" and
174 "7z". The latter requires the 7z binary available in the
176 @return Output filename.
181 tmpfolder
= tempfile
.mkdtemp()
184 workfolder
= scrape_files(repo
, treehash
, filelist
,
185 os
.path
.join(tmpfolder
, basename
))
188 print "Archiving files from repository"
190 outfile
= basename
+ ".7z"
191 output
= subprocess
.Popen(["7z", "a",
192 os
.path
.join(os
.getcwd(), basename
+ ".7z"), basename
],
193 cwd
=tmpfolder
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
196 outfile
= basename
+ ".tar.bz2"
197 tf
= tarfile
.open(outfile
, "w:bz2")
198 tf
.add(workfolder
, basename
)
200 if tmpfolder
!= workfolder
:
201 shutil
.rmtree(workfolder
)
203 shutil
.rmtree(tmpfolder
)