Index: src/bsfix/xml-rewrite-2.py
===================================================================
--- src/bsfix/xml-rewrite-2.py	(revision 3421)
+++ src/bsfix/xml-rewrite-2.py	(working copy)
@@ -3,15 +3,17 @@
 
 # Copyright 2004-2006 Gentoo Foundation
 # Distributed under the terms of the GNU General Public Licence v2
-# $Header: /var/cvsroot/gentoo-src/javatoolkit/src/bsfix/xml-rewrite.py,v 1.6 2005/07/19 10:35:18 axxo Exp $
 
-# Author: Saleem Abdulrasool <compnerd@compnerd.org>
+# Authors: 
+#	Saleem Abdulrasool <compnerd@compnerd.org>
+#	Petteri Räty <betelgeuse@gentoo.org>
 # Maintainer: Gentoo Java Herd <java@gentoo.org>
 # Python based XML modifier
 
 # ChangeLog
 # Petteri Räty <betelgeuse@gentoo.org
 #	   December 06, 2006 - Changed to use xml.parsers.expat and basically rewrote the whole file
+#	   December 29, 2006 - Added a SAX based implementation to handle entities etc ( test on dev-java/skinlf )
 # Saleem A. <compnerd@compnerd.org>
 #	   December 23, 2004 - Initial Write
 #	   December 24, 2004 - Added usage information
@@ -26,6 +28,11 @@
 __version__ = "$Revision: 1.7 $"[11:-2]
 
 class DomRewriter:
+	"""
+	The old DOM rewriter is still around for index based stuff. It can
+	be used for all the complex stuff but portage needed features should
+	be in StreamRewriterBase subclasses as they are much faster.
+	"""
 	from xml.dom import NotFoundErr
 
 	def __init__(self, modifyElems, attributes, values=None, index=None):
@@ -61,40 +68,32 @@
 	def write(self,stream):
 		stream.write(self.document.toxml())
 
-class ExpatRewriter:
+class StreamRewriterBase:
 
 	def __init__(self, elems, attributes, values, index):
 		self.buffer = StringIO.StringIO()
-		self.p = self.buffer.write
+		self.__write = self.buffer.write
 		self.elems = elems
 		self.attributes = attributes
 		self.values = values
 
-	def process(self, in_stream):
-		from xml.parsers.expat import ParserCreate
-		parser = ParserCreate()
+	def p(self,str):
+		self.__write(str.encode('utf8'))
 
-		parser.StartElementHandler = self.start_element
-		parser.EndElementHandler = self.end_element
-		parser.CharacterDataHandler = self.char_data
-		parser.ParseFile(in_stream)
-		self.p('\n')
-
 	def write(self, out_stream):
-		out_stream.write(self.buffer.getvalue())
-		self.buffer.close()
-		self.buffer = StringIO.StringIO()
-		self.p = self.buffer.write
+		value = self.buffer.getvalue()
+		out_stream.write(value)
+		self.buffer.truncate(0)
 
 	def write_attr(self,a,v):
-		self.buffer.write('%s=%s ' % (a,quoteattr(v)))
+		self.p(u'%s=%s ' % (a,quoteattr(v, {u'©':'&#169;'})))
 
 	def start_element(self, name, attrs):
-		self.p('<%s ' % name)
+		self.p(u'<%s ' % name)
 
 		match = ( name in self.elems )
 		
-		for a,v in attrs.iteritems():
+		for a,v in attrs:
 			if not ( match and a in self.attributes ):
 				self.write_attr(a,v)
 		
@@ -102,14 +101,50 @@
 			for i, attr in enumerate(self.attributes):
 				self.write_attr(attr, self.values[i])
 
-		self.p('>')
+		self.p(u'>')
 
+class ExpatRewriter(StreamRewriterBase):
+	"""
+	The only problem with this Expat based implementation is that it does not
+	handle entities doctypes etc properly so for example dev-java/skinlf fails.
+	"""
+	def process(self, in_stream):
+		from xml.parsers.expat import ParserCreate
+		parser = ParserCreate()
+
+		parser.StartElementHandler = self.start_element
+		parser.EndElementHandler = self.end_element
+		parser.CharacterDataHandler = self.char_data
+		parser.ParseFile(in_stream)
+		self.p(u'\n')
+	
+	def start_element(self, name, attrs):
+		StreamRewriterBase(self, name, attrs.iteritems())
+
 	def end_element(self,name):
-		self.p('</%s>' % name)
+		self.p(u'</%s>' % name)
 
 	def char_data(self,data):
 		self.p(escape(data))
 
+from xml.sax.saxutils import XMLGenerator
+class SaxRewriter(XMLGenerator, StreamRewriterBase):
+	"""
+	Using Sax gives us the support for writing back doctypes and all easily
+	and is only marginally slower than expat as it is just a tight layer over it
+	"""
+	def __init__(self, elems, attributes, values, index):
+		StreamRewriterBase.__init__(self, elems, attributes, values, index)
+		XMLGenerator.__init__(self, self.buffer, 'UTF-8')
+
+	def process(self, in_stream):
+		from xml.sax import parse
+		parse(in_stream, self)
+		self.p(u'\n')
+
+	def startElement(self, name, attrs):
+		self.start_element(name, attrs.items())
+
 if __name__ == '__main__':
 	usage = "XML Rewrite Python Module Version " + __version__ + "\n"
 	usage += "Copyright 2004 Gentoo Foundation\n"
@@ -166,8 +201,6 @@
 		error("You must give value for every attribute you are changing.")
 	# End Invalid Arguments Check
 	
-	import codecs
-	
 	def get_rewriter(options):
 		if options.index or options.doDelete:
 			# java-ant-2.eclass does not use these options so we can optimize the ExpatWriter 
@@ -175,23 +208,32 @@
 			rewriter = DomRewriter(options.elements, options.attributes, options.values, options.index)
 			print "Using DOM to rewrite the build.xml files"
 		else:
-			rewriter = ExpatRewriter(options.elements, options.attributes, options.values, options.index)
-			print "Using Expat to rewrite the build.xml files"
+			rewriter = SaxRewriter(options.elements, options.attributes, options.values, options.index)
+			print "Using Sax to rewrite the build.xml files"
 
 		return rewriter
 	
 	rewriter = get_rewriter(options)
 
 	if options.files:
+		import os
 		for file in options.files:
 			print "Rewriting %s" % file
-			f = open(file,"r")
+			# First parse the file into memory
+			# Tricks with cwd are needed for relative includes of other xml files to build.xml files
+			cwd = os.getcwd()
+			dirname = os.path.dirname(file)
+			if dirname != '': # for file = build.xml comes out as ''
+				os.chdir(os.path.dirname(file))
+			f = open(os.path.basename(file),"r")
 			rewriter.process(f)
+			os.chdir(cwd)
 			f.close()		
+			# Then write it back to the file
 			f = open(file, "w")
-			rewriter.write(codecs.getwriter('utf-8')(f))
+			rewriter.write(f)
 			f.close()
 	else:
 		rewriter.process(sys.stdin)
-		rewriter.write(codecs.getwriter('utf-8')(sys.stdout))
+		rewriter.write(sys.stdout)