Index: src/bsfix/xml-rewrite-2.py =================================================================== --- src/bsfix/xml-rewrite-2.py (revision 3421) +++ src/bsfix/xml-rewrite-2.py (working copy) @@ -3,15 +3,17 @@ # Copyright 2004-2006 Gentoo Foundation # Distributed under the terms of the GNU General Public Licence v2 -# $Header: /var/cvsroot/gentoo-src/javatoolkit/src/bsfix/xml-rewrite.py,v 1.6 2005/07/19 10:35:18 axxo Exp $ -# Author: Saleem Abdulrasool <compnerd@compnerd.org> +# Authors: +# Saleem Abdulrasool <compnerd@compnerd.org> +# Petteri Räty <betelgeuse@gentoo.org> # Maintainer: Gentoo Java Herd <java@gentoo.org> # Python based XML modifier # ChangeLog # Petteri Räty <betelgeuse@gentoo.org # December 06, 2006 - Changed to use xml.parsers.expat and basically rewrote the whole file +# December 29, 2006 - Added a SAX based implementation to handle entities etc ( test on dev-java/skinlf ) # Saleem A. <compnerd@compnerd.org> # December 23, 2004 - Initial Write # December 24, 2004 - Added usage information @@ -26,6 +28,11 @@ __version__ = "$Revision: 1.7 $"[11:-2] class DomRewriter: + """ + The old DOM rewriter is still around for index based stuff. It can + be used for all the complex stuff but portage needed features should + be in StreamRewriterBase subclasses as they are much faster. + """ from xml.dom import NotFoundErr def __init__(self, modifyElems, attributes, values=None, index=None): @@ -61,40 +68,32 @@ def write(self,stream): stream.write(self.document.toxml()) -class ExpatRewriter: +class StreamRewriterBase: def __init__(self, elems, attributes, values, index): self.buffer = StringIO.StringIO() - self.p = self.buffer.write + self.__write = self.buffer.write self.elems = elems self.attributes = attributes self.values = values - def process(self, in_stream): - from xml.parsers.expat import ParserCreate - parser = ParserCreate() + def p(self,str): + self.__write(str.encode('utf8')) - parser.StartElementHandler = self.start_element - parser.EndElementHandler = self.end_element - parser.CharacterDataHandler = self.char_data - parser.ParseFile(in_stream) - self.p('\n') - def write(self, out_stream): - out_stream.write(self.buffer.getvalue()) - self.buffer.close() - self.buffer = StringIO.StringIO() - self.p = self.buffer.write + value = self.buffer.getvalue() + out_stream.write(value) + self.buffer.truncate(0) def write_attr(self,a,v): - self.buffer.write('%s=%s ' % (a,quoteattr(v))) + self.p(u'%s=%s ' % (a,quoteattr(v, {u'©':'©'}))) def start_element(self, name, attrs): - self.p('<%s ' % name) + self.p(u'<%s ' % name) match = ( name in self.elems ) - for a,v in attrs.iteritems(): + for a,v in attrs: if not ( match and a in self.attributes ): self.write_attr(a,v) @@ -102,14 +101,50 @@ for i, attr in enumerate(self.attributes): self.write_attr(attr, self.values[i]) - self.p('>') + self.p(u'>') +class ExpatRewriter(StreamRewriterBase): + """ + The only problem with this Expat based implementation is that it does not + handle entities doctypes etc properly so for example dev-java/skinlf fails. + """ + def process(self, in_stream): + from xml.parsers.expat import ParserCreate + parser = ParserCreate() + + parser.StartElementHandler = self.start_element + parser.EndElementHandler = self.end_element + parser.CharacterDataHandler = self.char_data + parser.ParseFile(in_stream) + self.p(u'\n') + + def start_element(self, name, attrs): + StreamRewriterBase(self, name, attrs.iteritems()) + def end_element(self,name): - self.p('</%s>' % name) + self.p(u'</%s>' % name) def char_data(self,data): self.p(escape(data)) +from xml.sax.saxutils import XMLGenerator +class SaxRewriter(XMLGenerator, StreamRewriterBase): + """ + Using Sax gives us the support for writing back doctypes and all easily + and is only marginally slower than expat as it is just a tight layer over it + """ + def __init__(self, elems, attributes, values, index): + StreamRewriterBase.__init__(self, elems, attributes, values, index) + XMLGenerator.__init__(self, self.buffer, 'UTF-8') + + def process(self, in_stream): + from xml.sax import parse + parse(in_stream, self) + self.p(u'\n') + + def startElement(self, name, attrs): + self.start_element(name, attrs.items()) + if __name__ == '__main__': usage = "XML Rewrite Python Module Version " + __version__ + "\n" usage += "Copyright 2004 Gentoo Foundation\n" @@ -166,8 +201,6 @@ error("You must give value for every attribute you are changing.") # End Invalid Arguments Check - import codecs - def get_rewriter(options): if options.index or options.doDelete: # java-ant-2.eclass does not use these options so we can optimize the ExpatWriter @@ -175,23 +208,32 @@ rewriter = DomRewriter(options.elements, options.attributes, options.values, options.index) print "Using DOM to rewrite the build.xml files" else: - rewriter = ExpatRewriter(options.elements, options.attributes, options.values, options.index) - print "Using Expat to rewrite the build.xml files" + rewriter = SaxRewriter(options.elements, options.attributes, options.values, options.index) + print "Using Sax to rewrite the build.xml files" return rewriter rewriter = get_rewriter(options) if options.files: + import os for file in options.files: print "Rewriting %s" % file - f = open(file,"r") + # First parse the file into memory + # Tricks with cwd are needed for relative includes of other xml files to build.xml files + cwd = os.getcwd() + dirname = os.path.dirname(file) + if dirname != '': # for file = build.xml comes out as '' + os.chdir(os.path.dirname(file)) + f = open(os.path.basename(file),"r") rewriter.process(f) + os.chdir(cwd) f.close() + # Then write it back to the file f = open(file, "w") - rewriter.write(codecs.getwriter('utf-8')(f)) + rewriter.write(f) f.close() else: rewriter.process(sys.stdin) - rewriter.write(codecs.getwriter('utf-8')(sys.stdout)) + rewriter.write(sys.stdout)