summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Buchholz <rbu@gentoo.org>2008-03-25 02:08:45 +0100
committerRobert Buchholz <rbu@gentoo.org>2008-03-25 02:08:45 +0100
commit170fba697c75ebef0013615fa52d932f7cf31fc2 (patch)
tree41968d937e7aaf1ba6c88c2f5e9a2841b026f032
parentChange to mtime.INDEX (diff)
downloaddistindex-170fba697c75ebef0013615fa52d932f7cf31fc2.tar.gz
distindex-170fba697c75ebef0013615fa52d932f7cf31fc2.tar.bz2
distindex-170fba697c75ebef0013615fa52d932f7cf31fc2.zip
Ebuild indexing script.
It will interface with portage and generate an index of all ebuild->distfile correlations, including ebuild data (such as mtime and checksum), as well as distfile uri.
-rwxr-xr-xebuild-indexer.py141
1 files changed, 141 insertions, 0 deletions
diff --git a/ebuild-indexer.py b/ebuild-indexer.py
new file mode 100755
index 0000000..3a8e2dc
--- /dev/null
+++ b/ebuild-indexer.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python2.4
+
+import os
+import re
+import portage
+import portage_dep, portage_util, portage_versions
+
+use_reduce = None
+
+try: use_reduce = portage_dep.use_reduce
+except AttributeError:
+ use_reduce = portage.use_reduce
+
+paren_reduce = None
+try: paren_reduce = portage_dep.paren_reduce
+except AttributeError:
+ paren_reduce = portage.paren_reduce
+
+flatten = None
+try: flatten = portage_util.flatten
+except AttributeError:
+ flatten = portage.flatten
+
+import md5
+import sha
+md5_cons = md5.new
+sha1_cons = sha.new
+
+porttree = "/usr/portage"
+
+outdir = "ebuildindex"
+if not os.path.exists(outdir):
+ os.mkdir(outdir)
+elif not os.path.isdir(outdir):
+ import sys
+ print "%s exists, but is not a directory." % (outdir)
+ sys.exit(2)
+
+
+def write_ebuild_stats(fd_out, cpv, dbapi):
+ try:
+ # find out the actual path to the ebuild
+ path = dbapi.findname(cpv)
+ filesize = os.path.getsize(path)
+ mtime = os.path.getmtime(path)
+
+ # hash it
+ ebuild = open(path)
+ (md5, sha1) = hash_file(ebuild, md5_cons(), sha1_cons())
+ ebuild.seek(0)
+
+ # Find $Header$ or $Id$ line
+ matcher = re.compile("(\$(Header|Id):.*\$)")
+ header = ""
+ for line in ebuild:
+ m = matcher.search(line)
+ if m:
+ header = m.group(1)
+ break
+ ebuild.close()
+ except:
+ filesize = 0
+ mtime = 0
+ md5 = 0
+ sha1 = 0
+ header = ""
+ pass
+ fd_out.write("Ebuild-PF: %s\n" % (cpv))
+ fd_out.write("Ebuild-mtime: %s\n" % (mtime))
+ fd_out.write("Ebuild-size: %s\n" % (filesize))
+ fd_out.write("Ebuild-md5: %s\n" % (md5))
+ fd_out.write("Ebuild-sha1: %s\n" % (sha1))
+ fd_out.write("Ebuild-header: %s\n" % (header))
+
+ # Reconstruct portage's CPV-related variables
+ cpvlist = portage_versions.catpkgsplit(cpv)
+ if len(cpvlist) == 4:
+ cat = cpvlist[0]
+ pac = cpvlist[1]
+ ver = cpvlist[2]
+ rev = cpvlist[3]
+ fd_out.write("Ebuild-PN: %s\n" % (pac))
+ fd_out.write("Ebuild-PV: %s\n" % (ver))
+ fd_out.write("Ebuild-PR: %s\n" % (rev))
+ fd_out.write("Ebuild-CATEGORY: %s\n" % (cat))
+ fd_out.write("Ebuild-P: %s-%s\n" % (pac, ver))
+ fd_out.write("Ebuild-PVR: %s-%s-%s\n" % (pac, ver, rev))
+
+
+def hash_file(fileobj, *hashobjects):
+ """ RAM efficient hashing implementation for stream-based file objects. """
+ data = fileobj.read(1024*1024)
+ while data:
+ for ho in hashobjects:
+ ho.update(data)
+ data = fileobj.read(1024*1024)
+
+ return (ho.hexdigest() for ho in hashobjects)
+
+
+
+config = portage.settings
+config.unlock()
+config["PORTDIR_OVERLAY"] = ''
+config["PORTDIR"] = porttree
+
+dbapi = portage.portdbapi(porttree, mysettings=config)
+
+
+for cp in dbapi.cp_all():
+ for cpv in dbapi.cp_list(cp):
+ uris = ""
+ try:
+ uris, = dbapi.aux_get(cpv, ("SRC_URI",))
+ uris = use_reduce(paren_reduce(uris), matchall=1)
+ uris = flatten(uris)
+ except Exception, e:
+ print "Error with %s: %s" % (cpv, str(e))
+ continue
+
+ indexfile = "%s/%s.DIST" % (outdir, cpv.replace("/", "+"))
+ fd_out = open(indexfile, "w")
+
+ write_ebuild_stats(fd_out, cpv, dbapi)
+
+ oldfiles = {}
+ num = 0
+ for uri in uris:
+ filename = os.path.basename(uri)
+ filenum = num
+ if filename in oldfiles:
+ # in case we have multiple uri's for one filename,
+ # use the old filenum
+ filenum = oldfiles[filename]
+ else:
+ oldfiles[filename] = num
+ fd_out.write("Distfile-%05d-name: %s\n" % (num, filename))
+ num += 1
+ fd_out.write("Distfile-%05d-uri: %s\n" % (filenum, uri))
+
+ fd_out.close()