3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
9 # Copyright (c) 2012 Dominik Riebeling
11 # All files in this archive are subject to the GNU General Public License.
12 # See the file COPYING in the source tree root for full license agreement.
14 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
15 # KIND, either express or implied.
18 '''Scrape files from a git repository.
20 This module provides functions to get a subset of files from a git repository.
21 The files to retrieve can be specified, and the git tree to work on can be
22 specified. That way arbitrary trees can be retrieved (like a subset of files
25 Retrieved files can be packaged into a bzip2 compressed tarball or stored in a
26 given folder for processing afterwards.
28 Calls git commands directly for maximum compatibility.
40 '''Get dict matching refs to hashes from repository pointed to by repo.
41 @param repo Path to repository root.
42 @return Dict matching hashes to each ref.
44 print("Getting list of refs")
45 output
= subprocess
.Popen(["git", "show-ref", "--abbrev"],
46 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
47 cmdout
= output
.communicate()
50 if len(cmdout
[1]) > 0:
51 print("An error occured!\n")
56 regex
= re
.findall(b
'([a-f0-9]+)\s+(\S+)', line
)
58 # ref is the key, hash its value.
59 refs
[r
[1].decode()] = r
[0].decode()
64 def get_lstree(repo
, start
, filterlist
=[]):
65 '''Get recursive list of tree objects for a given tree.
66 @param repo Path to repository root.
67 @param start Hash identifying the tree.
68 @param filterlist List of paths to retrieve objecs hashes for.
69 An empty list will retrieve all paths.
70 @return Dict mapping filename to blob hash
72 output
= subprocess
.Popen(["git", "ls-tree", "-r", start
],
73 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
74 cmdout
= output
.communicate()
77 if len(cmdout
[1]) > 0:
78 print("An error occured!\n")
82 for line
in cmdout
[0].decode().split('\n'):
83 regex
= re
.findall(b
'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(\S+)',
89 if rf
[3].decode().find(f
) == 0:
92 # If two files have the same content they have the same hash, so
93 # the filename has to be used as key.
94 if len(filterlist
) == 0 or add
== True:
96 print("FATAL: key already exists in dict!")
98 objects
[rf
[3]] = rf
[2]
102 def get_file_timestamp(repo
, tree
, filename
):
103 '''Get timestamp for a file.
104 @param repo Path to repository root.
105 @param tree Hash of tree to use.
106 @param filename Filename in tree
107 @return Timestamp as string.
109 output
= subprocess
.Popen(
110 ["git", "log", "--format=%ai", "-n", "1", tree
, filename
],
111 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
112 cmdout
= output
.communicate()
114 return cmdout
[0].decode().rstrip()
117 def get_object(repo
, blob
, destfile
):
118 '''Get an identified object from the repository.
119 @param repo Path to repository root.
120 @param blob hash for blob to retrieve.
121 @param destfile filename for blob output.
122 @return True if file was successfully written, False on error.
124 output
= subprocess
.Popen(["git", "cat-file", "-p", blob
],
125 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
126 cmdout
= output
.communicate()
127 # make sure output path exists
128 if len(cmdout
[1]) > 0:
129 print("An error occured!\n")
132 if not os
.path
.exists(os
.path
.dirname(destfile
)):
133 os
.makedirs(os
.path
.dirname(destfile
))
134 f
= open(destfile
, 'wb')
140 def describe_treehash(repo
, treehash
):
141 '''Retrieve output of git-describe for a given hash.
142 @param repo Path to repository root.
143 @param treehash Hash identifying the tree / commit to describe.
144 @return Description string.
146 output
= subprocess
.Popen(["git", "describe", treehash
],
147 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, cwd
=repo
)
148 cmdout
= output
.communicate()
149 if len(cmdout
[1]) > 0:
150 print("An error occured!\n")
153 return cmdout
[0].rstrip()
156 def scrape_files(repo
, treehash
, filelist
, dest
="", timestamp_files
=[]):
157 '''Scrape list of files from repository.
158 @param repo Path to repository root.
159 @param treehash Hash identifying the tree.
160 @param filelist List of files to get from repository.
161 @param dest Destination path for files. Files will get retrieved with full
162 path from the repository, and the folder structure will get
163 created below dest as necessary.
164 @param timestamp_files List of files to also get the last modified date.
165 WARNING: this is SLOW!
166 @return Destination path, filename:timestamp dict.
168 print("Scraping files from repository")
171 dest
= tempfile
.mkdtemp()
172 treeobjects
= get_lstree(repo
, treehash
, filelist
)
174 for obj
in treeobjects
:
175 get_object(repo
, treeobjects
[obj
], os
.path
.join(dest
.encode(), obj
))
176 for f
in timestamp_files
:
178 timestamps
[obj
] = get_file_timestamp(repo
, treehash
, obj
)
180 return [dest
, timestamps
]
183 def archive_files(repo
, treehash
, filelist
, basename
, tmpfolder
="",
185 '''Archive list of files into tarball.
186 @param repo Path to repository root.
187 @param treehash Hash identifying the tree.
188 @param filelist List of files to archive. All files in the archive if left
190 @param basename Basename (including path) of output file. Will get used as
191 basename inside of the archive as well (i.e. no tarbomb).
192 @param tmpfolder Folder to put intermediate files in. If no folder is given
193 a temporary one will get used.
194 @param archive Type of archive to create. Supported values are "tbz" and
195 "7z". The latter requires the 7z binary available in the
197 @return Output filename.
202 tmpfolder
= tempfile
.mkdtemp()
205 workfolder
= scrape_files(repo
, treehash
, filelist
,
206 os
.path
.join(tmpfolder
, basename
))[0]
209 print("Archiving files from repository")
211 outfile
= basename
+ ".7z"
212 output
= subprocess
.Popen(["7z", "a",
213 os
.path
.join(os
.getcwd(), basename
+ ".7z"), basename
],
214 cwd
=tmpfolder
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
216 elif archive
== "tbz":
217 outfile
= basename
+ ".tar.bz2"
218 tf
= tarfile
.open(outfile
, "w:bz2")
219 tf
.add(workfolder
, basename
)
222 print("Files not archived")
223 if tmpfolder
!= workfolder
:
224 shutil
.rmtree(workfolder
)
226 shutil
.rmtree(tmpfolder
)