aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin H. Johnson <robbat2@gentoo.org>2016-12-01 17:00:55 -0800
committerRobin H. Johnson <robbat2@gentoo.org>2016-12-01 17:00:55 -0800
commit05834af144a9c5dbb3afe3783a8e8affd126728c (patch)
treeaa6c87c0c257172119aa6a670370d51569d7acbb /egenchangelog2.py
parentExtra git tree of what gets generated. (diff)
downloadmastermirror-scripts-05834af144a9c5dbb3afe3783a8e8affd126728c.tar.gz
mastermirror-scripts-05834af144a9c5dbb3afe3783a8e8affd126728c.tar.bz2
mastermirror-scripts-05834af144a9c5dbb3afe3783a8e8affd126728c.zip
echangelog2: Start to PEP8 strict pythonify.
Signed-off-by: Robin H. Johnson <robbat2@gentoo.org>
Diffstat (limited to 'egenchangelog2.py')
-rwxr-xr-xegenchangelog2.py169
1 files changed, 124 insertions, 45 deletions
diff --git a/egenchangelog2.py b/egenchangelog2.py
index 434ff51..5ce453a 100755
--- a/egenchangelog2.py
+++ b/egenchangelog2.py
@@ -78,32 +78,52 @@ git_log_command = ['git',
#git_log_command += ['-n', '100']
-
-
-_commitsdatasize = None
-def get_commits(last_commit='HEAD', first_commit=None):
- reuse_file = False
+# TODO: convert this into a stream
+def get_commits_buffer(last_commit='HEAD', first_commit=None, opts=argparse.Namespace()):
buf = None
- # TODO: include first/last commit in command.
- if not reuse_file:
- buf = subprocess.check_output(git_log_command).decode('utf-8')
- print('Command', git_log_command)
- with open('workfile', 'w') as f:
- f.write(buf)
- f.close()
+ if opts.read_git_log_data is not None:
+ buf = opts.read_git_log_data.read().decode('utf-8')
+ opts.read_git_log_data.close()
else:
- with open('workfile', 'rb') as f:
- buf = f.read().decode('utf-8')
- f.close()
- global _commitsdatasize
- _commitsdatasize = len(buf)
- regex = re.compile(r'\x01\x01COMMIT\x01', flags=re.MULTILINE)
- pos = 0
- for commit in regex.finditer(buf):
- span = commit.span()
- yield buf[pos:span[0]]
- pos = span[1]
- yield buf[pos:]
+ # TODO: include first/last commit in command.
+ if opts.debug > 0:
+ #cmdstr = ' '.join(["'{}'".format(s) for s in git_log_command])
+ cmdstr = ' '.join(["'"+s+"'" for s in git_log_command])
+ print('Command', cmdstr, flush=True)
+ buf = subprocess.check_output(git_log_command).decode('utf-8')
+
+ if opts.write_git_log_data is not None:
+ opts.write_git_log_data.write(buf.encode('utf-8'))
+ opts.write_git_log_data.close()
+
+ return buf
+
+class CommitGenerator(object):
+ def __init__(self, buf, opts=argparse.Namespace()):
+ self.buf = buf
+ self.regex = re.compile(r'\x01\x01COMMIT\x01', flags=re.MULTILINE)
+ self.buflen = len(self.buf)
+ self.pos = 0
+ self.regex_iter = self.regex.finditer(self.buf)
+
+ def __iter__(self):
+ return self
+ def __next__(self):
+ return self.next()
+
+ def next(self):
+ result = None
+ try:
+ commit = next(self.regex_iter)
+ span = commit.span()
+ result = self.buf[self.pos:span[0]]
+ self.pos = span[1]
+ except StopIteration:
+ if self.pos >= self.buflen:
+ raise StopIteration
+ result = self.buf[self.pos:]
+ self.pos = self.buflen
+ return result
def get_commit_fields(buf):
regex = re.compile(r'\x02\x01([-A-Z0-9a-z._]+)\x01', flags=re.MULTILINE)
@@ -452,27 +472,77 @@ def starmap_func_write_package_changelog(cp, cp_commits):
def main():
parallel = True
- if parallel:
+
+ parser = argparse.ArgumentParser(description='Generate ChangeLog for repo/gentoo.git (or other similarly structured trees)')
+ parser.add_argument('--destdir', metavar='DIR', type=str, required=True, help='Destination directory for output')
+ parser.add_argument('--jobs', metavar='N', type=str, default='max', help='Number of parallel workers to use, "max" -> all cpus [%(default)s]')
+ parser.add_argument('--progress', action='count', default=0, help='Display progress output (number of commits & packages processed)')
+ parser.add_argument('--verbose', action='count', default=0, help='Display verbose output')
+ parser.add_argument('--debug', action='count', default=0, help='Display debug output')
+
+ parser.add_argument('--write-git-log-data', type=argparse.FileType('wb'), help='Write git-log data to specified file')
+ parser.add_argument('--read-git-log-data', type=argparse.FileType('rb'), help='Read git-log data from specified file (INSTEAD of running git-log)')
+
+ g1 = parser.add_mutually_exclusive_group()
+ g1.add_argument('--delete-git-log-data', dest='delete_git_log_data', action='store_true', help='Delete git-log data file after completion (default yes if not reading)')
+ g1.add_argument('--no-delete-git-log-data', dest='delete_git_log_data', action='store_false', help='Do not delete git-log data file after completion')
+
+ parser.add_argument('--max-count', type=int, default=None, help=argparse.SUPPRESS) # Max number of commits to request (only for debugging)
+
+ opts = parser.parse_args()
+
+ if opts.jobs == 'max':
+ opts.jobs = multiprocessing.cpu_count()
+ elif re.match(r'^\d+$', opts.jobs) and int(opts.jobs) > 0:
+ opts.jobs = int(opts.jobs)
+ else:
+ ArgumentParser.error('Jobs must be "max" or a positive integer.')
+
+ # if --delete-git-log-data is not set
+ # set to true if --read-git-log-data is NOT set
+ if opts.delete_git_log_data is None:
+ opts.delete_git_log_data = (opts.read_git_log_data is None)
+
+ if opts.write_git_log_data is None:
+ tmpopts = { 'prefix':'egenchangelog2-git-log-', 'suffix': '.bin' }
+ if opts.delete_git_log_data is True:
+ opts.write_git_log_data = tempfile.SpooledTemporaryFile(max_size=(128 * 2 << 20), **tmpopts)
+ else:
+ opts.write_git_log_data = tempfile.NamedTemporaryFile(delete=False, **tmpopts)
+
+ if opts.jobs > 1:
# WARNING: all input functions to these MUST be pickable!
# Lambdas or local functions will NOT work.
- p = multiprocessing.Pool() # use all available CPUs
- imap_ordered = p.imap
- imap_unordered = p.imap_unordered
- starmap = p.starmap
+ opts.p = multiprocessing.Pool(processes=opts.jobs)
+ opts.imap_ordered = opts.p.imap
+ opts.imap_unordered = opts.p.imap_unordered
+ opts.starmap = opts.p.starmap
else:
- p = None
- imap_ordered = map
- imap_unordered = map
- starmap = itertools.starmap
+ opts.p = None
+ opts.imap_ordered = map
+ opts.imap_unordered = map
+ opts.starmap = itertools.starmap
+
+ print(opts)
+
+ if opts.progress > 0 or opts.verbose > 0:
+ print("Querying git-log.", flush=True)
+
+ buf = get_commits_buffer(opts=opts)
- print("Querying git-log.", flush=True)
- commits1 = get_commits()
- global _commitsdatasize
- print("git-log gave us {} bytes".format(_commitsdatasize), flush=True)
+ if opts.progress > 0 or opts.verbose > 0:
+ print("git-log gave us {} bytes".format(len(buf)), flush=True)
+
+
+ if opts.progress > 0 or opts.verbose > 0:
+ print("Evaluating commits:", flush=True)
+ commits1 = CommitGenerator(buf, opts=opts)
+ #for n,c in enumerate(commits1):
+ # print(n, c)
+ #return
- print("Evaluating commits:", flush=True)
slicer = itertools.chain(commits1) # Force lazy eval
- commits2 = imap_ordered(process_git_commit_text_to_struct, slicer)
+ commits2 = opts.imap_ordered(process_git_commit_text_to_struct, slicer)
changed_packages = dict()
for n, c in enumerate(commits2):
# Skip bad output (should NOT happen)
@@ -486,21 +556,30 @@ def main():
#if c['H'] in EXCLUDE_COMMITS:
# continue
- print(n, 'H={}'.format(c['H']))
+ if opts.progress > 0:
+ print(n, 'H={}'.format(c['H']))
+
# TODO: Stash commits to SQLite
for cp in c['FILES'].keys():
if cp not in changed_packages:
changed_packages[cp] = []
changed_packages[cp].append(c)
- print("Done commits. Saw {} packages.".format(len(changed_packages)), flush=True)
+ if opts.progress > 0 or opts.verbose > 0:
+ print("Done {} commits. Saw {} total packages.".format(n, len(changed_packages)), flush=True)
- print("Writing ChangeLogs:", flush=True)
- commits3 = starmap(starmap_func_write_package_changelog, changed_packages.items())
+ if opts.progress > 0 or opts.verbose > 0:
+ print("Writing ChangeLogs:", flush=True)
+
+ commits3 = opts.starmap(starmap_func_write_package_changelog, changed_packages.items())
# This must be done because otherwise only lazy evaluation happens.
for n,cp in enumerate(commits3):
- print(n, cp)
- print("Done writing ChangeLogs.", flush=True)
+ if opts.progress > 0:
+ print(n, cp)
+
+ if opts.progress > 0 or opts.verbose > 0:
+ print("Done writing ChangeLogs.", flush=True)
+
if __name__ == '__main__':
main()