From 05834af144a9c5dbb3afe3783a8e8affd126728c Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 1 Dec 2016 17:00:55 -0800 Subject: echangelog2: Start to PEP8 strict pythonify. Signed-off-by: Robin H. Johnson --- egenchangelog2.py | 169 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 45 deletions(-) (limited to 'egenchangelog2.py') diff --git a/egenchangelog2.py b/egenchangelog2.py index 434ff51..5ce453a 100755 --- a/egenchangelog2.py +++ b/egenchangelog2.py @@ -78,32 +78,52 @@ git_log_command = ['git', #git_log_command += ['-n', '100'] - - -_commitsdatasize = None -def get_commits(last_commit='HEAD', first_commit=None): - reuse_file = False +# TODO: convert this into a stream +def get_commits_buffer(last_commit='HEAD', first_commit=None, opts=argparse.Namespace()): buf = None - # TODO: include first/last commit in command. - if not reuse_file: - buf = subprocess.check_output(git_log_command).decode('utf-8') - print('Command', git_log_command) - with open('workfile', 'w') as f: - f.write(buf) - f.close() + if opts.read_git_log_data is not None: + buf = opts.read_git_log_data.read().decode('utf-8') + opts.read_git_log_data.close() else: - with open('workfile', 'rb') as f: - buf = f.read().decode('utf-8') - f.close() - global _commitsdatasize - _commitsdatasize = len(buf) - regex = re.compile(r'\x01\x01COMMIT\x01', flags=re.MULTILINE) - pos = 0 - for commit in regex.finditer(buf): - span = commit.span() - yield buf[pos:span[0]] - pos = span[1] - yield buf[pos:] + # TODO: include first/last commit in command. + if opts.debug > 0: + #cmdstr = ' '.join(["'{}'".format(s) for s in git_log_command]) + cmdstr = ' '.join(["'"+s+"'" for s in git_log_command]) + print('Command', cmdstr, flush=True) + buf = subprocess.check_output(git_log_command).decode('utf-8') + + if opts.write_git_log_data is not None: + opts.write_git_log_data.write(buf.encode('utf-8')) + opts.write_git_log_data.close() + + return buf + +class CommitGenerator(object): + def __init__(self, buf, opts=argparse.Namespace()): + self.buf = buf + self.regex = re.compile(r'\x01\x01COMMIT\x01', flags=re.MULTILINE) + self.buflen = len(self.buf) + self.pos = 0 + self.regex_iter = self.regex.finditer(self.buf) + + def __iter__(self): + return self + def __next__(self): + return self.next() + + def next(self): + result = None + try: + commit = next(self.regex_iter) + span = commit.span() + result = self.buf[self.pos:span[0]] + self.pos = span[1] + except StopIteration: + if self.pos >= self.buflen: + raise StopIteration + result = self.buf[self.pos:] + self.pos = self.buflen + return result def get_commit_fields(buf): regex = re.compile(r'\x02\x01([-A-Z0-9a-z._]+)\x01', flags=re.MULTILINE) @@ -452,27 +472,77 @@ def starmap_func_write_package_changelog(cp, cp_commits): def main(): parallel = True - if parallel: + + parser = argparse.ArgumentParser(description='Generate ChangeLog for repo/gentoo.git (or other similarly structured trees)') + parser.add_argument('--destdir', metavar='DIR', type=str, required=True, help='Destination directory for output') + parser.add_argument('--jobs', metavar='N', type=str, default='max', help='Number of parallel workers to use, "max" -> all cpus [%(default)s]') + parser.add_argument('--progress', action='count', default=0, help='Display progress output (number of commits & packages processed)') + parser.add_argument('--verbose', action='count', default=0, help='Display verbose output') + parser.add_argument('--debug', action='count', default=0, help='Display debug output') + + parser.add_argument('--write-git-log-data', type=argparse.FileType('wb'), help='Write git-log data to specified file') + parser.add_argument('--read-git-log-data', type=argparse.FileType('rb'), help='Read git-log data from specified file (INSTEAD of running git-log)') + + g1 = parser.add_mutually_exclusive_group() + g1.add_argument('--delete-git-log-data', dest='delete_git_log_data', action='store_true', help='Delete git-log data file after completion (default yes if not reading)') + g1.add_argument('--no-delete-git-log-data', dest='delete_git_log_data', action='store_false', help='Do not delete git-log data file after completion') + + parser.add_argument('--max-count', type=int, default=None, help=argparse.SUPPRESS) # Max number of commits to request (only for debugging) + + opts = parser.parse_args() + + if opts.jobs == 'max': + opts.jobs = multiprocessing.cpu_count() + elif re.match(r'^\d+$', opts.jobs) and int(opts.jobs) > 0: + opts.jobs = int(opts.jobs) + else: + ArgumentParser.error('Jobs must be "max" or a positive integer.') + + # if --delete-git-log-data is not set + # set to true if --read-git-log-data is NOT set + if opts.delete_git_log_data is None: + opts.delete_git_log_data = (opts.read_git_log_data is None) + + if opts.write_git_log_data is None: + tmpopts = { 'prefix':'egenchangelog2-git-log-', 'suffix': '.bin' } + if opts.delete_git_log_data is True: + opts.write_git_log_data = tempfile.SpooledTemporaryFile(max_size=(128 * 2 << 20), **tmpopts) + else: + opts.write_git_log_data = tempfile.NamedTemporaryFile(delete=False, **tmpopts) + + if opts.jobs > 1: # WARNING: all input functions to these MUST be pickable! # Lambdas or local functions will NOT work. - p = multiprocessing.Pool() # use all available CPUs - imap_ordered = p.imap - imap_unordered = p.imap_unordered - starmap = p.starmap + opts.p = multiprocessing.Pool(processes=opts.jobs) + opts.imap_ordered = opts.p.imap + opts.imap_unordered = opts.p.imap_unordered + opts.starmap = opts.p.starmap else: - p = None - imap_ordered = map - imap_unordered = map - starmap = itertools.starmap + opts.p = None + opts.imap_ordered = map + opts.imap_unordered = map + opts.starmap = itertools.starmap + + print(opts) + + if opts.progress > 0 or opts.verbose > 0: + print("Querying git-log.", flush=True) + + buf = get_commits_buffer(opts=opts) - print("Querying git-log.", flush=True) - commits1 = get_commits() - global _commitsdatasize - print("git-log gave us {} bytes".format(_commitsdatasize), flush=True) + if opts.progress > 0 or opts.verbose > 0: + print("git-log gave us {} bytes".format(len(buf)), flush=True) + + + if opts.progress > 0 or opts.verbose > 0: + print("Evaluating commits:", flush=True) + commits1 = CommitGenerator(buf, opts=opts) + #for n,c in enumerate(commits1): + # print(n, c) + #return - print("Evaluating commits:", flush=True) slicer = itertools.chain(commits1) # Force lazy eval - commits2 = imap_ordered(process_git_commit_text_to_struct, slicer) + commits2 = opts.imap_ordered(process_git_commit_text_to_struct, slicer) changed_packages = dict() for n, c in enumerate(commits2): # Skip bad output (should NOT happen) @@ -486,21 +556,30 @@ def main(): #if c['H'] in EXCLUDE_COMMITS: # continue - print(n, 'H={}'.format(c['H'])) + if opts.progress > 0: + print(n, 'H={}'.format(c['H'])) + # TODO: Stash commits to SQLite for cp in c['FILES'].keys(): if cp not in changed_packages: changed_packages[cp] = [] changed_packages[cp].append(c) - print("Done commits. Saw {} packages.".format(len(changed_packages)), flush=True) + if opts.progress > 0 or opts.verbose > 0: + print("Done {} commits. Saw {} total packages.".format(n, len(changed_packages)), flush=True) - print("Writing ChangeLogs:", flush=True) - commits3 = starmap(starmap_func_write_package_changelog, changed_packages.items()) + if opts.progress > 0 or opts.verbose > 0: + print("Writing ChangeLogs:", flush=True) + + commits3 = opts.starmap(starmap_func_write_package_changelog, changed_packages.items()) # This must be done because otherwise only lazy evaluation happens. for n,cp in enumerate(commits3): - print(n, cp) - print("Done writing ChangeLogs.", flush=True) + if opts.progress > 0: + print(n, cp) + + if opts.progress > 0 or opts.verbose > 0: + print("Done writing ChangeLogs.", flush=True) + if __name__ == '__main__': main() -- cgit v1.2.3-65-gdbad