#!/usr/bin/env python import tarfile import zipfile import os import sys import datetime try: import hashlib md5_cons = hashlib.md5 sha1_cons = hashlib.sha1 except: import md5 import sha md5_cons = md5.new sha1_cons = sha.new def hash_file(fileobj, *hashobjects): """ RAM efficient hashing implementation for stream-based file objects. """ data = fileobj.read(1024*1024) while data: for ho in hashobjects: ho.update(data) data = fileobj.read(1024*1024) return (ho.hexdigest() for ho in hashobjects) class ArchiveInfo: def __init__(self, filename): self.filename = filename self.filesize = os.path.getsize(filename) # this can throw an exception self.mtime = os.path.getmtime(filename) self.basename = os.path.basename(filename) def write_info(self, outfiledir): outfilename = "%s.%s.INDEX" % (self.basename, self.mtime) self.out_fd = open(outfilename, "w") self.write_header() header_end = self.out_fd.tell() try: self.write_tar_content() except: self.out_fd.seek(header_end) try: self.write_zip_content() except: self.out_fd.seek(header_end) self.out_fd.truncate() self.out_fd.close() def write_header(self): self.out_fd.write("File-dist-name: %s\n" % (self.basename)) self.out_fd.write("File-dist-size: %s\n" % (self.filesize)) self.out_fd.write("File-dist-isdistfile: 1\n") #self.out_fd.write("File-dist-origin: %s\n" % (self.url)) #self.out_fd.write("File-dist-cpv: %s\n" % (self.cpv)) #self.out_fd.write("File-dist-cat: %s\n" % (self.cat)) #self.out_fd.write("File-dist-pn: %s\n" % (self.pn)) #self.out_fd.write("File-dist-pv: %s\n" % (self.pv)) #self.out_fd.write("File-dist-pr: %s\n" % (self.pr)) #self.out_fd.write("File-dist-pf: %s\n" % (self.pf)) #self.out_fd.write("File-dist-cpv: %s\n" % (self.cpv)) self.out_fd.write("File-dist-mtime: %s\n" %(self.mtime)) distfile = open(self.filename) (md5, sha1) = hash_file(distfile, md5_cons(), sha1_cons()) self.out_fd.write("File-dist-md5: %s\n" % (md5)) self.out_fd.write("File-dist-sha1: %s\n" % (sha1)) distfile.close() def write_tar_content(self): tar = tarfile.TarFile.open(self.filename, 'r') file_info = tar.next() filecount = 0 while file_info != None: if file_info.isfile(): file_stream = tar.extractfile(file_info) if file_stream == None: print "Filestream empty on %s" % (file_info.name) pass else: self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.name)) self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.size)) self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_info.mtime)) (md5, sha1) = hash_file(file_stream, md5_cons(), sha1_cons()) self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5)) self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1)) filecount += 1 file_info = tar.next() tar.close() def write_zip_content(self): zip = zipfile.ZipFile(self.filename, 'r') filecount = 0 for name in zip.namelist(): file_info = zip.getinfo(name) if os.path.basename(file_info.filename) == "": # this is a directory continue file_content = zip.read(name) # Doesn't work stream based :-/ file_time = datetime.datetime(*file_info.date_time) self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.filename)) self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.file_size)) self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_time.strftime("%s"))) md5 = md5_cons(file_content).hexdigest() sha1 = sha1_cons(file_content).hexdigest() self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5)) self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1)) filecount += 1 zip.close() def main(): import getopt try: optlist, files = getopt.getopt(sys.argv[1:], 'P:hu:') except getopt.GetoptError: usage(sys.argv[0]) sys.exit(2) outdir = "." for opt, arg in optlist: if opt == '-h': usage(sys.argv[0]) sys.exit(0) if opt == '-P': outdir = arg if len(files) == 0: print "Please specify a filename." else: for infilename in files: try: a = ArchiveInfo(infilename) filename = a.write_info(outdir) except Error, e: print infilename, " could not be opened: %s" % str(e) def usage(programname): """ Print usage information """ print "Usage: %s [-h] [-P ] [ ..]" % (programname) print ''' This script opens the file(s) specified, and writes the index to the directory specified by -P. Parameters: -h Display this help -f file Path to the file to index, must be present -P path Directory to create the index file in (default: .) ''' if __name__ == "__main__": try: main() except KeyboardInterrupt: print '\n ! Exiting.'