summaryrefslogtreecommitdiff
path: root/textproc/itstool
diff options
context:
space:
mode:
Diffstat (limited to 'textproc/itstool')
-rw-r--r--textproc/itstool/Makefile28
-rw-r--r--textproc/itstool/distinfo4
-rw-r--r--textproc/itstool/files/patch-PR1888
-rw-r--r--textproc/itstool/files/patch-itstool.in52
-rw-r--r--textproc/itstool/files/patch-py-lxml1490
5 files changed, 1507 insertions, 155 deletions
diff --git a/textproc/itstool/Makefile b/textproc/itstool/Makefile
index ed5fbd914450..eaae15ca3c38 100644
--- a/textproc/itstool/Makefile
+++ b/textproc/itstool/Makefile
@@ -1,28 +1,28 @@
PORTNAME= itstool
PORTVERSION= 2.0.7
-PORTREVISION= 2
+PORTREVISION= 3
CATEGORIES= textproc
-MASTER_SITES= http://files.itstool.org/itstool/
+MASTER_SITES= https://files.itstool.org/itstool/
-MAINTAINER= kwm@FreeBSD.org
-COMMENT= Make XML documents translatable through po files
-WWW= https://itstool.org/
+PATCH_SITES= https://github.com/itstool/itstool/commit/
+PATCHFILES= 32c7d07664dc37765100285d1202d488cd6a27e8.patch:-p1
-LICENSE= GPLv3
+MAINTAINER= sunpoet@FreeBSD.org
+COMMENT= Translate XML with PO files using W3C Internationalization Tag Set rules
+WWW= https://itstool.org/ \
+ https://github.com/itstool/itstool
-BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}libxml2-python>0:textproc/libxml2-python@${PY_FLAVOR}
-RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}libxml2-python>0:textproc/libxml2-python@${PY_FLAVOR}
+LICENSE= GPLv3
+LICENSE_FILE= ${WRKSRC}/COPYING.GPL3
-USES= python tar:bzip2
-GNU_CONFIGURE= yes
-GNU_CONFIGURE_MANPREFIX=${PREFIX}/share
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml5>=0:devel/py-lxml5@${PY_FLAVOR}
-CONFLICTS_INSTALL= itstool22
+USES= autoreconf python tar:bzip2
+GNU_CONFIGURE= yes
NO_ARCH= yes
post-patch:
- @${REINPLACE_CMD} -e 's|@PYTHON@|${PYTHON_CMD}|g' \
- ${WRKSRC}/itstool.in
+ @${REINPLACE_CMD} -e 's|@PYTHON@|${PYTHON_CMD}|g' ${WRKSRC}/itstool.in
.include <bsd.port.mk>
diff --git a/textproc/itstool/distinfo b/textproc/itstool/distinfo
index 6f95f612faf6..f9d5bbff5dfa 100644
--- a/textproc/itstool/distinfo
+++ b/textproc/itstool/distinfo
@@ -1,3 +1,5 @@
-TIMESTAMP = 1632582980
+TIMESTAMP = 1756517770
SHA256 (itstool-2.0.7.tar.bz2) = 6b9a7cd29a12bb95598f5750e8763cee78836a1a207f85b74d8b3275b27e87ca
SIZE (itstool-2.0.7.tar.bz2) = 104648
+SHA256 (32c7d07664dc37765100285d1202d488cd6a27e8.patch) = 4e64a2e884f9d4cbc493732fcbde9f1d5bed534f9a66330bbcc1cbeb54808c1e
+SIZE (32c7d07664dc37765100285d1202d488cd6a27e8.patch) = 3095
diff --git a/textproc/itstool/files/patch-PR18 b/textproc/itstool/files/patch-PR18
deleted file mode 100644
index b4cafecdb0b1..000000000000
--- a/textproc/itstool/files/patch-PR18
+++ /dev/null
@@ -1,88 +0,0 @@
-# https://github.com/itstool/itstool/pull/18
-# https://github.com/itstool/itstool/issues/17
-
-From 98d04cdabf1721cb541ecd234c975f13fde4fa41 Mon Sep 17 00:00:00 2001
-From: Guido Trentalancia <guido@trentalancia.com>
-Date: Wed, 1 Nov 2017 18:20:36 +0100
-Subject: [PATCH 1/2] Revert "Be more careful about libxml2 memory management"
-
-This reverts commit 9b84c007a73e8275ca45762f1bfa3ab7c3a852e2.
----
- itstool.in | 13 ++-----------
- 1 file changed, 2 insertions(+), 11 deletions(-)
-
-diff --git itstool.in itstool.in
-index a16eba9..c1d0585 100755
---- itstool.in
-+++ itstool.in
-@@ -477,7 +477,6 @@ class Document (object):
- if load_dtd:
- ctxt.loadSubset(1)
- if keep_entities:
-- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
-@@ -1044,7 +1043,6 @@ class Document (object):
- if self._load_dtd:
- ctxt.loadSubset(1)
- if self._keep_entities:
-- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
-@@ -1071,9 +1069,7 @@ class Document (object):
- ph_node = msg.get_placeholder(child.name).node
- if self.has_child_elements(ph_node):
- self.merge_translations(translations, None, ph_node, strict=strict)
-- newnode = ph_node.copyNode(1)
-- newnode.setTreeDoc(self._doc)
-- child.replaceNode(newnode)
-+ child.replaceNode(ph_node)
- else:
- repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
- child.replaceNode(repl)
-@@ -1088,15 +1084,10 @@ class Document (object):
- (lang + ' ') if lang is not None else '',
- msgstr.encode('utf-8')))
- self._xml_err = ''
-- ctxt.doc().freeDoc()
- return node
- retnode = node.copyNode(2)
-- retnode.setTreeDoc(self._doc)
- for child in xml_child_iter(trnode):
-- newnode = child.copyNode(1)
-- newnode.setTreeDoc(self._doc)
-- retnode.addChild(newnode)
--
-+ retnode.addChild(child.copyNode(1))
- ctxt.doc().freeDoc()
- return retnode
-
-
-From 1549b6d12eb2f35e5c7f1b1856c21768e92ba794 Mon Sep 17 00:00:00 2001
-From: Guido Trentalancia <guido@trentalancia.com>
-Date: Wed, 1 Nov 2017 18:23:44 +0100
-Subject: [PATCH 2/2] Fix a segmentation fault bug introduced with version
- 2.0.4.
-
-https://github.com/itstool/itstool/issues/17
-
-This fix seems a lot easier than the previous reverted commit.
----
- itstool.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git itstool.in itstool.in
-index c1d0585..e492e95 100755
---- itstool.in
-+++ itstool.in
-@@ -1048,7 +1048,7 @@ class Document (object):
- else:
- ctxt.replaceEntities(1)
- ctxt.parseDocument()
-- trnode = ctxt.doc().getRootElement()
-+ trnode = ctxt.doc().getRootElement().copyNode(1)
- try:
- self._check_errors()
- except libxml2.parserError:
diff --git a/textproc/itstool/files/patch-itstool.in b/textproc/itstool/files/patch-itstool.in
deleted file mode 100644
index ea6ed79c3781..000000000000
--- a/textproc/itstool/files/patch-itstool.in
+++ /dev/null
@@ -1,52 +0,0 @@
-# Workaround https://github.com/itstool/itstool/issues/25
-# Obtained from Fedora
-
---- itstool.in.orig 2018-08-21 15:27:24 UTC
-+++ itstool.in
-@@ -44,9 +44,22 @@ if PY3:
- else:
- return str(s)
- ustr_type = str
-+ def pr_str(s):
-+ """Return a string that can be safely print()ed"""
-+ # Since print works on both bytes and unicode, just return the argument
-+ return s
- else:
- string_types = basestring,
- ustr = ustr_type = unicode
-+ def pr_str(s):
-+ """Return a string that can be safely print()ed"""
-+ if isinstance(s, str):
-+ # Since print works on str, just return the argument
-+ return s
-+ else:
-+ # print may not work on unicode if the output encoding cannot be
-+ # detected, so just encode with UTF-8
-+ return unicode.encode(s, 'utf-8')
-
- NS_ITS = 'http://www.w3.org/2005/11/its'
- NS_ITST = 'http://itstool.org/extensions/'
-@@ -1060,9 +1073,9 @@ class Document (object):
- if strict:
- raise
- else:
-- sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
-+ sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
- (lang + ' ') if lang is not None else '',
-- msgstr.encode('utf-8')))
-+ msgstr)))
- self._xml_err = ''
- return node
- def scan_node(node):
-@@ -1087,9 +1100,9 @@ class Document (object):
- if strict:
- raise
- else:
-- sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
-+ sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
- (lang + ' ') if lang is not None else '',
-- msgstr.encode('utf-8')))
-+ msgstr)))
- self._xml_err = ''
- ctxt.doc().freeDoc()
- return node
diff --git a/textproc/itstool/files/patch-py-lxml b/textproc/itstool/files/patch-py-lxml
new file mode 100644
index 000000000000..897eaf6349d7
--- /dev/null
+++ b/textproc/itstool/files/patch-py-lxml
@@ -0,0 +1,1490 @@
+Obtained from: https://github.com/itstool/itstool/pull/57
+
+--- configure.ac.orig 2021-09-25 15:09:48 UTC
++++ configure.ac
+@@ -12,7 +12,7 @@ AM_PATH_PYTHON([2.6])
+
+ AM_PATH_PYTHON([2.6])
+
+-py_module=libxml2
++py_module=lxml
+ AC_MSG_CHECKING(for python module $py_module)
+ echo "import $py_module" | $PYTHON - &>/dev/null
+ if test $? -ne 0; then
+--- itstool.in.orig 2025-08-30 01:59:59 UTC
++++ itstool.in
+@@ -24,7 +24,8 @@ import hashlib
+
+ import gettext
+ import hashlib
+-import libxml2
++from copy import deepcopy
++from lxml import etree
+ import optparse
+ import os
+ import os.path
+@@ -190,7 +191,7 @@ class Placeholder (object):
+ class Placeholder (object):
+ def __init__ (self, node):
+ self.node = node
+- self.name = ustr(node.name, 'utf-8')
++ self.name = ustr(xml_localname(node), 'utf-8')
+
+
+ class Message (object):
+@@ -243,32 +244,30 @@ class Message (object):
+ def add_start_tag (self, node):
+ if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
+ self._message.append('')
+- if node.ns() is not None and node.ns().name is not None:
+- self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
+- else:
+- self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
+- for prop in xml_attr_iter(node):
+- name = prop.name
+- if prop.ns() is not None:
+- name = prop.ns().name + ':' + name
+- atval = prop.content
++ self._message[-1] += ('<%s' % ustr(xml_qname(node), 'utf-8'))
++ for name, atval in node.items():
++ qname = etree.QName(name)
++ if qname.namespace is not None:
++ # lxml doesn't expose the prefix of attributes, so we use
++ # an XPath expression to get the attribute's prefixed name.
++ # This is horribly inefficient.
++ expr = 'name(@*[local-name()="%s" and namespace-uri()="%s"])' % (
++ qname.localname, qname.namespace)
++ name = node.xpath(expr)
+ if not isinstance(atval, ustr_type):
+ atval = ustr(atval, 'utf-8')
+ atval = atval.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+ self._message += " %s=\"%s\"" % (name, atval)
+- if node.children is not None:
++ if len(node) > 0 or node.text:
+ self._message[-1] += '>'
+ else:
+ self._message[-1] += '/>'
+
+ def add_end_tag (self, node):
+- if node.children is not None:
++ if len(node) > 0 or node.text:
+ if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
+ self._message.append('')
+- if node.ns() is not None and node.ns().name is not None:
+- self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
+- else:
+- self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8'))
++ self._message[-1] += ('</%s>' % ustr(xml_qname(node), 'utf-8'))
+
+ def is_empty (self):
+ return self._empty
+@@ -379,69 +378,86 @@ class Message (object):
+ return ret
+
+
+-def xml_child_iter (node):
+- child = node.children
+- while child is not None:
+- yield child
+- child = child.next
++def xml_localname (node):
++ return etree.QName(node.tag).localname
+
+-def xml_attr_iter (node):
+- attr = node.get_properties()
+- while attr is not None:
+- yield attr
+- attr = attr.next
++def xml_qname (node):
++ qname = etree.QName(node.tag).localname
++ if node.prefix is not None:
++ qname = node.prefix + ':' + qname
++ return qname
+
+-def xml_is_ns_name (node, ns, name):
+- if node.type != 'element':
+- return False
+- return node.name == name and node.ns() is not None and node.ns().content == ns
++def xml_content (node):
++ if isinstance(node, string_types):
++ return node
++ if isinstance(node, XMLAttr):
++ return node.parent.get(node.tag)
++ return etree.tostring(node, method='text', encoding='unicode')
+
++def xml_delete_node (node):
++ parent = node.getparent()
++ prev = node.getprevious()
++ tail = node.tail
++ if parent is not None:
++ parent.remove(node)
++ if prev is not None:
++ if prev.tail is None or re.fullmatch(r'\s+', prev.tail):
++ prev.tail = tail
++ else:
++ prev.tail += tail
++ elif parent is not None:
++ if parent.text is None or re.fullmatch(r'\s+', parent.text):
++ parent.text = tail
++ else:
++ parent.text += tail
++
+ def xml_get_node_path(node):
+ # The built-in nodePath() method only does numeric indexes
+ # when necessary for disambiguation. For various reasons,
+ # we prefer always using indexes.
+- name = node.name
+- if node.ns() is not None and node.ns().name is not None:
+- name = node.ns().name + ':' + name
+- if node.type == 'attribute':
++ name = xml_qname(node)
++ if isinstance(node, XMLAttr):
+ name = '@' + name
+ name = '/' + name
+- if node.type == 'element' and node.parent.type == 'element':
++ if node.getparent() is not None:
+ count = 1
+- prev = node.previousElementSibling()
++ prev = node.getprevious()
+ while prev is not None:
+- if prev.name == node.name:
+- if prev.ns() is None:
+- if node.ns() is None:
+- count += 1
+- else:
+- if node.ns() is not None:
+- if prev.ns().name == node.ns().name:
+- count += 1
+- prev = prev.previousElementSibling()
++ if prev.tag == node.tag:
++ count += 1
++ prev = prev.getprevious()
+ name = '%s[%i]' % (name, count)
+- if node.parent.type == 'element':
+- name = xml_get_node_path(node.parent) + name
++ name = xml_get_node_path(node.getparent()) + name
+ return name
+
+-def xml_error_catcher(doc, error):
+- doc._xml_err += " %s" % error
+
+-def fix_node_ns (node, nsdefs):
+- childnsdefs = nsdefs.copy()
+- nsdef = node.nsDefs()
+- while nsdef is not None:
+- nextnsdef = nsdef.next
+- if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
+- node.removeNsDef(nsdef.content)
+- else:
+- childnsdefs[nsdef.name] = nsdef.content
+- nsdef = nextnsdef
+- for child in xml_child_iter(node):
+- if child.type == 'element':
+- fix_node_ns(child, childnsdefs)
++# lxml doesn't support attribute nodes, so we have to emulate them.
++class XMLAttr (object):
++ def __init__(self, element, tag):
++ self.parent = element
++ self.tag = tag
++ self.attrib = {}
++ self.sourceline = element.sourceline
+
++ def __repr__(self):
++ return '%s@%s' % (repr(self.parent), self.tag)
+
++ def __eq__(self, other):
++ return other and self.parent == other.parent and self.tag == other.tag
++
++ def __ne__(self, other):
++ return not self.__eq__(other)
++
++ def __hash__(self):
++ return hash(repr(self))
++
++ def getparent(self):
++ return self.parent
++
++ def get(self, default=None):
++ return default
++
++
+ class LocNote (object):
+ def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False):
+ self.locnote = locnote
+@@ -464,82 +480,51 @@ class Document (object):
+
+ class Document (object):
+ def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
+- self._xml_err = ''
+- libxml2.registerErrorHandler(xml_error_catcher, self)
+- try:
+- ctxt = libxml2.createFileParserCtxt(filename)
+- except:
+- sys.stderr.write('Error: cannot open XML file %s\n' % filename)
+- sys.exit(1)
+- ctxt.lineNumbers(1)
+ self._load_dtd = load_dtd
+ self._keep_entities = keep_entities
+- if load_dtd:
+- ctxt.loadSubset(1)
+- if keep_entities:
+- ctxt.loadSubset(1)
+- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
+- ctxt.replaceEntities(0)
+- else:
+- ctxt.replaceEntities(1)
+- ctxt.parseDocument()
++ parser = etree.XMLParser(load_dtd = load_dtd or keep_entities,
++ resolve_entities = not(keep_entities))
++ doc = etree.parse(filename, parser)
++ doc.xinclude()
+ self._filename = filename
+- self._doc = ctxt.doc()
++ self._doc = doc
+ self._localrules = []
+- def pre_process (node):
+- for child in xml_child_iter(node):
+- if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
+- if child.nsProp('parse', None) == 'text':
+- child.xincludeProcessTree()
+- elif xml_is_ns_name(child, NS_ITS, 'rules'):
+- if child.hasNsProp('href', NS_XLINK):
+- href = child.nsProp('href', NS_XLINK)
+- fileref = os.path.join(os.path.dirname(filename), href)
+- if not os.path.exists(fileref):
+- if opts.itspath is not None:
+- for pathdir in opts.itspath:
+- fileref = os.path.join(pathdir, href)
+- if os.path.exists(fileref):
+- break
+- if not os.path.exists(fileref):
+- sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
+- sys.exit(1)
+- hctxt = libxml2.createFileParserCtxt(fileref)
+- hctxt.replaceEntities(1)
+- hctxt.parseDocument()
+- root = hctxt.doc().getRootElement()
+- version = None
+- if root.hasNsProp('version', None):
+- version = root.nsProp('version', None)
+- else:
+- sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
+- os.path.basename(href))
+- if version is not None and version not in ('1.0', '2.0'):
+- sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
+- (os.path.basename(href), root.nsProp('version', None)))
+- else:
+- self._localrules.append(root)
++ for child in doc.iter():
++ if child.tag == '{' + NS_ITS + '}rules':
++ href = child.get('{' + NS_XLINK + '}href')
++ if href is not None:
++ fileref = os.path.join(os.path.dirname(filename), href)
++ if not os.path.exists(fileref):
++ if opts.itspath is not None:
++ for pathdir in opts.itspath:
++ fileref = os.path.join(pathdir, href)
++ if os.path.exists(fileref):
++ break
++ if not os.path.exists(fileref):
++ sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
++ sys.exit(1)
++ root = etree.parse(fileref).getroot()
+ version = None
+- if child.hasNsProp('version', None):
+- version = child.nsProp('version', None)
++ version = root.get('version')
++ if version is None:
++ sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
++ os.path.basename(href))
++ elif version not in ('1.0', '2.0'):
++ sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
++ (os.path.basename(href), root.get('version')))
+ else:
+- root = child.doc.getRootElement()
+- if root.hasNsProp('version', NS_ITS):
+- version = root.nsProp('version', NS_ITS)
+- else:
+- sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
+- if version is not None and version not in ('1.0', '2.0'):
+- sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
+- version)
+- else:
+- self._localrules.append(child)
+- pre_process(child)
+- pre_process(self._doc)
+- try:
+- self._check_errors()
+- except libxml2.parserError as e:
+- sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
+- sys.exit(1)
++ self._localrules.append(root)
++ version = child.get('version')
++ if version is None:
++ root = child.getroottree()
++ version = root.get('{' + NS_ITS + '}version')
++ if version is None:
++ sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
++ elif version not in ('1.0', '2.0'):
++ sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
++ version)
++ else:
++ self._localrules.append(child)
+ self._msgs = messages
+ self._its_translate_nodes = {}
+ self._its_within_text_nodes = {}
+@@ -556,13 +541,6 @@ class Document (object):
+
+ self._clear_cache()
+
+- def __del__ (self):
+- self._doc.freeDoc()
+-
+- def _check_errors(self):
+- if self._xml_err:
+- raise libxml2.parserError(self._xml_err)
+-
+ def _clear_cache(self):
+ self._its_translate_nodes_cache = {}
+ self._its_locale_filters_cache = {}
+@@ -570,123 +548,107 @@ class Document (object):
+
+ def get_its_params(self, rules):
+ params = {}
+- for child in xml_child_iter(rules):
+- if xml_is_ns_name(child, NS_ITS, 'param'):
+- params[child.nsProp('name', None)] = child.getContent()
++ for child in rules.iterchildren():
++ if child.tag == '{' + NS_ITS + '}param':
++ params[child.get('name')] = xml_content(child)
+ return params
+
+- def register_its_params(self, xpath, params, userparams={}):
+- for param in params:
+- if param in userparams:
+- xpath.xpathRegisterVariable(name, None, userparams[param])
++ def register_its_params(self, var, params, userparams={}):
++ for name in params:
++ if name in userparams:
++ var[name] = userparams[name]
+ else:
+- xpath.xpathRegisterVariable(name, None, params[param])
++ var[name] = params[name]
+
+ def apply_its_rule(self, rule, xpath):
+ self._clear_cache()
+- if rule.type != 'element':
+- return
+- if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_translate_nodes[node] = rule.nsProp('translate', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
+- elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- val = rule.nsProp('preserveSpace', None)
++ if rule.tag == '{' + NS_ITS + '}translateRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_translate_nodes[node] = rule.get('translate')
++ elif rule.tag == '{' + NS_ITS + '}withinTextRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_within_text_nodes[node] = rule.get('withinText')
++ elif rule.tag == '{' + NS_ITST + '}preserveSpaceRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ val = rule.get('preserveSpace')
+ if val == 'yes':
+ self._its_preserve_space_nodes[node] = 'preserve'
+- elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
+- if rule.nsProp('selector', None) is not None:
+- if rule.hasNsProp('localeFilterList', None):
+- lst = rule.nsProp('localeFilterList', None)
+- else:
+- lst = '*'
+- if rule.hasNsProp('localeFilterType', None):
+- typ = rule.nsProp('localeFilterType', None)
+- else:
+- typ = 'include'
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
++ elif rule.tag == '{' + NS_ITS + '}preserveSpaceRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_preserve_space_nodes[node] = rule.get('space')
++ elif rule.tag == '{' + NS_ITS + '}localeFilterRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ lst = rule.get('localeFilterList', '*')
++ typ = rule.get('localeFilterType', 'include')
++ for node in self._try_xpath_eval(xpath, sel):
+ self._its_locale_filters[node] = (lst, typ)
+- elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._itst_drop_nodes[node] = rule.nsProp('drop', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
+- sel = rule.nsProp('selector', None)
+- idv = rule.nsProp('idValue', None)
++ elif rule.tag == '{' + NS_ITST + '}dropRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._itst_drop_nodes[node] = rule.get('drop')
++ elif rule.tag == '{' + NS_ITS + '}idValueRule':
++ sel = rule.get('selector')
++ idv = rule.get('idValue')
+ if sel is not None and idv is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- idvalue = self._try_xpath_eval(xpath, idv)
++ idvalue = self._try_xpath_eval(xpath, idv, node=node)
+ if isinstance(idvalue, string_types):
+ self._its_id_values[node] = idvalue
+ else:
+ for val in idvalue:
+- self._its_id_values[node] = val.content
++ self._its_id_values[node] = xml_content(val)
+ break
+- xpath.setContextNode(oldnode)
+ pass
+- elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- if rule.hasNsProp('context', None):
+- self._itst_contexts[node] = rule.nsProp('context', None)
+- elif rule.hasNsProp('contextPointer', None):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
++ elif rule.tag == '{' + NS_ITST + '}contextRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ ctxt = rule.get('context')
++ cp = rule.get('contextPointer')
++ if ctxt is not None:
++ self._itst_contexts[node] = ctxt
++ elif cp is not None:
++ ctxt = self._try_xpath_eval(xpath, cp, node=node)
+ if isinstance(ctxt, string_types):
+ self._itst_contexts[node] = ctxt
+ else:
+ for ctxt in ctxt:
+- self._itst_contexts[node] = ctxt.content
++ self._itst_contexts[node] = xml_content(ctxt)
+ break
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
++ elif rule.tag == '{' + NS_ITS + '}locNoteRule':
+ locnote = None
+- notetype = rule.nsProp('locNoteType', None)
+- for child in xml_child_iter(rule):
+- if xml_is_ns_name(child, NS_ITS, 'locNote'):
+- locnote = LocNote(locnote=child.content, locnotetype=notetype)
+- break
++ notetype = rule.get('locNoteType')
++ for child in rule.iterchildren('{' + NS_ITS + '}locNote'):
++ locnote = LocNote(locnote=xml_content(child), locnotetype=notetype)
++ break
+ if locnote is None:
+- if rule.hasNsProp('locNoteRef', None):
+- locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
++ if 'locNoteRef' in rule.attrib:
++ locnote = LocNote(locnoteref=rule.get('locNoteRef'), locnotetype=notetype)
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
+ if locnote is not None:
+ self._its_loc_notes.setdefault(node, []).append(locnote)
+ else:
+- if rule.hasNsProp('locNotePointer', None):
+- sel = rule.nsProp('locNotePointer', None)
++ if 'locNotePointer' in rule.attrib:
++ sel = rule.get('locNotePointer')
+ ref = False
+- elif rule.hasNsProp('locNoteRefPointer', None):
+- sel = rule.nsProp('locNoteRefPointer', None)
++ elif 'locNoteRefPointer' in rule.attrib:
++ sel = rule.get('locNoteRefPointer')
+ ref = True
+ else:
+ continue
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- note = self._try_xpath_eval(xpath, sel)
++ note = self._try_xpath_eval(xpath, sel, node=node)
+ if isinstance(note, string_types):
+ if ref:
+ nodenote = LocNote(locnoteref=note, locnotetype=notetype)
+@@ -695,55 +657,56 @@ class Document (object):
+ self._its_loc_notes.setdefault(node, []).append(nodenote)
+ else:
+ for note in note:
++ text = xml_content(note)
+ if ref:
+- nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
++ nodenote = LocNote(locnoteref=text, locnotetype=notetype)
+ else:
+- nodenote = LocNote(locnote=note.content, locnotetype=notetype,
++ nodenote = LocNote(locnote=text, locnotetype=notetype,
+ space=self.get_preserve_space(note))
+ self._its_loc_notes.setdefault(node, []).append(nodenote)
+ break
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
+- if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
++ elif rule.tag == '{' + NS_ITS + '}langRule':
++ sel = rule.get('selector')
++ lp = rule.get('langPointer')
++ if sel is not None and lp is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ res = self._try_xpath_eval(xpath, lp, node=node)
+ if len(res) > 0:
+- self._its_lang[node] = res[0].content
++ self._its_lang[node] = xml_content(res[0])
+ # We need to construct language attributes, not just read
+ # language information. Technically, langPointer could be
+ # any XPath expression. But if it looks like an attribute
+ # accessor, just use the attribute name.
+- if rule.nsProp('langPointer', None)[0] == '@':
+- self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITST, 'credits'):
+- if rule.nsProp('appendTo', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
++ # TODO: This should probably be skipped if langPointer
++ # equals '@xml:lang' which is the default.
++ if lp[0] == '@':
++ name = lp[1:]
++ if ':' in name:
++ prefix, lname = name.split(':', 2)
++ nsuri = node.nsmap.get(prefix)
++ if nsuri is None:
++ name = lname
++ else:
++ name = '{' + nsuri + '}' + lname
++ self._itst_lang_attr[node] = name
++ elif rule.tag == '{' + NS_ITST + '}credits':
++ sel = rule.get('appendTo')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
+ self._itst_credits = (node, rule)
+ break
+- elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
+- xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
+- sel = rule.nsProp('selector', None)
+- if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
+- ptr = rule.nsProp('externalResourceRefPointer', None)
++ elif (rule.tag == '{' + NS_ITS + '}externalResourceRefRule' or
++ rule.tag == '{' + NS_ITST + '}externalRefRule'):
++ sel = rule.get('selector')
++ if rule.tag == '{' + NS_ITS + '}externalResourceRefRule':
++ ptr = rule.get('externalResourceRefPointer')
+ else:
+- ptr = rule.nsProp('refPointer', None)
++ ptr = rule.get('refPointer')
+ if sel is not None and ptr is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- res = self._try_xpath_eval(xpath, ptr)
++ res = self._try_xpath_eval(xpath, ptr, node=node)
+ if len(res) > 0:
+- self._its_externals[node] = res[0].content
+- xpath.setContextNode(oldnode)
++ self._its_externals[node] = xml_content(res[0])
+
+ def apply_its_rules(self, builtins, userparams={}):
+ self._clear_cache()
+@@ -773,94 +736,59 @@ class Document (object):
+
+ def apply_its_file(self, filename, userparams={}):
+ self._clear_cache()
+- doc = libxml2.parseFile(filename)
+- root = doc.getRootElement()
+- if not xml_is_ns_name(root, NS_ITS, 'rules'):
++ parser = etree.XMLParser(resolve_entities = False)
++ root = etree.parse(filename, parser).getroot()
++ if root.tag != '{' + NS_ITS + '}rules':
+ return
+- version = None
+- if root.hasNsProp('version', None):
+- version = root.nsProp('version', None)
+- else:
++ version = root.get('version')
++ if version is None:
+ sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
+ os.path.basename(filename))
+- if version is not None and version not in ('1.0', '2.0'):
++ elif version not in ('1.0', '2.0'):
+ sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
+- (os.path.basename(filename), root.nsProp('version', None)))
++ (os.path.basename(filename), root.get('version')))
+ return
+ matched = True
+- for match in xml_child_iter(root):
+- if xml_is_ns_name(match, NS_ITST, 'match'):
++ for match in root.iterchildren():
++ if match.tag == '{' + NS_ITST + '}match':
+ matched = False
+- xpath = self._doc.xpathNewContext()
+- par = match
+- nss = {}
+- while par is not None:
+- nsdef = par.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- if nsdef.name not in nss:
+- nss[nsdef.name] = nsdef.content
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- par = par.parent
+- if match.hasNsProp('selector', None):
+- if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
++ sel = match.get('selector')
++ if sel is not None:
++ ns = { k: v for k, v in match.nsmap.items() if k is not None }
++ xpath = (ns, {})
++ if len(self._try_xpath_eval(xpath, sel)) > 0:
+ matched = True
+ break
+ if matched == False:
+ return
++ ns = { k: v for k, v in match.nsmap.items() if k is not None }
++ var = {}
+ params = self.get_its_params(root)
+- for rule in xml_child_iter(root):
+- xpath = self._doc.xpathNewContext()
+- par = match
+- nss = {}
+- while par is not None:
+- nsdef = par.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- if nsdef.name not in nss:
+- nss[nsdef.name] = nsdef.content
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- par = par.parent
+- self.register_its_params(xpath, params, userparams=userparams)
++ self.register_its_params(var, params, userparams=userparams)
++ xpath = (ns, var)
++ for rule in root.iterchildren():
+ self.apply_its_rule(rule, xpath)
+
+ def apply_local_its_rules(self, userparams={}):
+ self._clear_cache()
+ for rules in self._localrules:
+- def reg_ns(xpath, node):
+- if node.parent is not None:
+- reg_ns(xpath, node.parent)
+- nsdef = node.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- xpath = self._doc.xpathNewContext()
+- reg_ns(xpath, rules)
++ var = {}
+ params = self.get_its_params(rules)
+- self.register_its_params(xpath, params, userparams=userparams)
+- for rule in xml_child_iter(rules):
+- if rule.type != 'element':
+- continue
+- if rule.nsDefs() is not None:
+- rule_xpath = self._doc.xpathNewContext()
+- reg_ns(rule_xpath, rule)
+- self.register_its_params(rule_xpath, params, userparams=userparams)
+- else:
+- rule_xpath = xpath
++ self.register_its_params(var, params, userparams=userparams)
++ for rule in rules.iterchildren():
++ ns = { k: v for k, v in rule.nsmap.items() if k is not None }
++ rule_xpath = (ns, var)
+ self.apply_its_rule(rule, rule_xpath)
+
+ def _append_credits(self, parent, node, trdata):
+- if xml_is_ns_name(node, NS_ITST, 'for-each'):
+- select = node.nsProp('select', None)
++ if node.tag == '{' + NS_ITST + '}for-each':
++ select = node.get('select')
+ if select == 'years':
+ for year in trdata[2].split(','):
+- for child in xml_child_iter(node):
++ for child in node.iterchildren():
+ self._append_credits(parent, child, trdata + (year.strip(),))
+- elif xml_is_ns_name(node, NS_ITST, 'value-of'):
+- select = node.nsProp('select', None)
++ elif node.tag == '{' + NS_ITST + '}value-of':
++ select = node.get('select')
+ val = None
+ if select == 'name':
+ val = trdata[0]
+@@ -873,11 +801,20 @@ class Document (object):
+ if val is not None:
+ if not PY3:
+ val = val.encode('utf-8')
+- parent.addContent(val)
++ if len(parent):
++ if parent[-1].tail:
++ parent[-1].tail += val
++ else:
++ parent[-1].tail = val
++ else:
++ if parent.text:
++ parent.text += val
++ else:
++ parent.text = val
+ else:
+- newnode = node.copyNode(2)
+- parent.addChild(newnode)
+- for child in xml_child_iter(node):
++ newnode = parent.makeelement(node.tag, node.attrib)
++ parent.append(newnode)
++ for child in node.iterchildren():
+ self._append_credits(newnode, child, trdata)
+
+ def merge_credits(self, translations, language, node):
+@@ -895,7 +832,7 @@ class Document (object):
+ if not match:
+ continue
+ trdata = match.groups()
+- for node in xml_child_iter(self._itst_credits[1]):
++ for node in self._itst_credits[1].iterchildren():
+ self._append_credits(self._itst_credits[0], node, trdata)
+
+ def join_translations(self, translations, node=None, strict=False):
+@@ -903,29 +840,30 @@ class Document (object):
+ if node is None:
+ is_root = True
+ self.generate_messages(comments=False)
+- node = self._doc.getRootElement()
+- if node is None or node.type != 'element':
++ node = self._doc.getroot()
++ if node is None:
+ return
+ if self.get_itst_drop(node) == 'yes':
+- prev = node.prev
+- node.unlinkNode()
+- node.freeNode()
+- if prev is not None and prev.isBlankNode():
+- prev.unlinkNode()
+- prev.freeNode()
++ xml_delete_node(node)
+ return
+ msg = self._msgs.get_message_by_node(node)
+ if msg is None:
+- self.translate_attrs(node, node)
+- children = [child for child in xml_child_iter(node)]
+- for child in children:
++ #self.translate_attrs(node, node)
++ for child in node.iterchildren():
+ self.join_translations(translations, node=child, strict=strict)
+ else:
+- prevnode = None
+- if node.prev is not None and node.prev.type == 'text':
+- prevtext = node.prev.content
+- if re.sub(r'\s+', '', prevtext) == '':
+- prevnode = node.prev
++ prevtext = None
++ prev = node.getprevious()
++ if prev is None:
++ parent = node.getparent()
++ if parent is not None:
++ prevtext = parent.text
++ else:
++ prevtext = prev.tail
++ if prevtext is not None:
++ if not re.fullmatch(r'\s+', prevtext):
++ prevtext = None
++ i = 0
+ for lang in sorted(list(translations.keys()), reverse=True):
+ locale = self.get_its_locale_filter(node)
+ lmatch = match_locale_list(locale[0], lang)
+@@ -933,24 +871,25 @@ class Document (object):
+ continue
+ newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang)
+ if newnode != node:
+- newnode.setProp('xml:lang', lang)
+- node.addNextSibling(newnode)
+- if prevnode is not None:
+- node.addNextSibling(prevnode.copyNode(0))
+- if is_root:
+- # Because of the way we create nodes and rewrite the document,
+- # we end up with lots of redundant namespace definitions. We
+- # kill them off in one fell swoop at the end.
+- fix_node_ns(node, {})
+- self._check_errors()
++ newnode.set('{' + NS_XML + '}lang', lang)
++ node.addnext(newnode)
++ if i == 0:
++ # Move tail to first new node
++ newnode.tail = node.tail
++ if prevtext is not None:
++ node.tail = prevtext
++ else:
++ if prevtext is not None:
++ newnode.tail = prevtext
++ i += 1
+
+ def merge_translations(self, translations, language, node=None, strict=False):
+ is_root = False
+ if node is None:
+ is_root = True
+ self.generate_messages(comments=False)
+- node = self._doc.getRootElement()
+- if node is None or node.type != 'element':
++ node = self._doc.getroot()
++ if node is None:
+ return
+ drop = False
+ locale = self.get_its_locale_filter(node)
+@@ -962,26 +901,23 @@ class Document (object):
+ if match_locale_list(locale[0], language):
+ drop = True
+ if self.get_itst_drop(node) == 'yes' or drop:
+- prev = node.prev
+- node.unlinkNode()
+- node.freeNode()
+- if prev is not None and prev.isBlankNode():
+- prev.unlinkNode()
+- prev.freeNode()
++ xml_delete_node(node)
+ return
+ if is_root:
+ self.merge_credits(translations, language, node)
+ msg = self._msgs.get_message_by_node(node)
+ if msg is None:
+ self.translate_attrs(node, node)
+- children = [child for child in xml_child_iter(node)]
+- for child in children:
++ for child in node.iterchildren():
+ self.merge_translations(translations, language, node=child, strict=strict)
+ else:
+ newnode = self.get_translated(node, translations, strict=strict, lang=language)
+ if newnode != node:
+ self.translate_attrs(node, newnode)
+- node.replaceNode(newnode)
++ newnode.tail = node.tail
++ parent = node.getparent()
++ if parent is not None:
++ parent.replace(node, newnode)
+ if is_root:
+ # Apply language attributes to untranslated nodes. We don't do
+ # this before processing, because then these attributes would
+@@ -998,31 +934,27 @@ class Document (object):
+ origlang = self._its_lang.get(lcpar)
+ if origlang is not None:
+ break
+- lcpar = lcpar.parent
++ lcpar = lcpar.getparent()
+ if origlang is not None:
+- lcnode.setProp(attr, origlang)
++ lcnode.set(attr, origlang)
+ # And then set the language attribute on the root node.
+ if language is not None:
+ attr = self._itst_lang_attr.get(node)
+ if attr is not None:
+- node.setProp(attr, language)
+- # Because of the way we create nodes and rewrite the document,
+- # we end up with lots of redundant namespace definitions. We
+- # kill them off in one fell swoop at the end.
+- fix_node_ns(node, {})
+- self._check_errors()
++ node.set(attr, language)
+
+ def translate_attrs(self, oldnode, newnode):
+- trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes']
+- for attr in trans_attrs:
+- srccontent = attr.get_content()
++ for attrname, srccontent in oldnode.items():
++ attr = XMLAttr(oldnode, attrname)
++ if self._its_translate_nodes.get(attr, 'no') != 'yes':
++ continue
+ if not PY3:
+ srccontent = srccontent.decode('utf-8')
+ newcontent = translations.ugettext(srccontent)
+ if newcontent:
+ if not PY3:
+ newcontent = newcontent.encode('utf-8')
+- newnode.setProp(attr.name, newcontent)
++ newnode.set(attrname, newcontent)
+
+ def get_translated (self, node, translations, strict=False, lang=None):
+ msg = self._msgs.get_message_by_node(node)
+@@ -1037,106 +969,90 @@ class Document (object):
+ trans = translations.ugettext(msgstr)
+ if trans is None:
+ return node
+- nss = {}
+- def reg_ns(node, nss):
+- if node.parent is not None:
+- reg_ns(node.parent, nss)
+- nsdef = node.nsDefs()
+- while nsdef is not None:
+- nss[nsdef.name] = nsdef.content
+- nsdef = nsdef.next
+- reg_ns(node, nss)
+- nss['_'] = NS_BLANK
+- try:
+- blurb = node.doc.intSubset().serialize('utf-8')
+- except Exception:
+- blurb = ''
+- blurb += '<' + ustr(node.name, 'utf-8')
+- for nsname in list(nss.keys()):
++ blurb = ''
++ doc = node.getroottree()
++ if doc.docinfo.internalDTD:
++ # This is an ugly hack to serialize the DTD. We copy the
++ # document, replace the document element, serialize the
++ # document and remove the last line which contains the
++ # document element, leaving only the DTD.
++ copy = deepcopy(doc)
++ root = copy.getroot()
++ newroot = root.makeelement(root.tag)
++ copy._setroot(newroot)
++ blurb = re.sub('.*$', '', etree.tostring(copy, encoding='unicode'))
++ localname = ustr(xml_localname(node), 'utf-8')
++ blurb += '<' + localname
++ blurb += ' xmlns:_="%s"' % NS_BLANK
++ for nsname, nsuri in node.nsmap.items():
+ if nsname is None:
+- blurb += ' xmlns="%s"' % nss[nsname]
++ blurb += ' xmlns="%s"' % nsuri
+ else:
+- blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
+- blurb += '>%s</%s>' % (trans, ustr(node.name, 'utf-8'))
+- if not PY3:
+- blurb = blurb.encode('utf-8')
+- ctxt = libxml2.createDocParserCtxt(blurb)
+- if self._load_dtd:
+- ctxt.loadSubset(1)
+- if self._keep_entities:
+- ctxt.loadSubset(1)
+- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
+- ctxt.replaceEntities(0)
+- else:
+- ctxt.replaceEntities(1)
+- ctxt.parseDocument()
+- trnode = ctxt.doc().getRootElement()
++ blurb += ' xmlns:%s="%s"' % (nsname, nsuri)
++ blurb += '>%s</%s>' % (trans, localname)
++ parser = etree.XMLParser(load_dtd = self._load_dtd or self._keep_entities,
++ resolve_entities = not(self._keep_entities))
+ try:
+- self._check_errors()
+- except libxml2.parserError:
++ trnode = etree.fromstring(blurb, parser)
++ except:
+ if strict:
+ raise
+ else:
+ sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
+- (lang + ' ') if lang is not None else '',
+- msgstr.encode('utf-8')))
+- self._xml_err = ''
++ (lang + ' ') if lang is not None else '',
++ msgstr.encode('utf-8')))
+ return node
+- def scan_node(node):
+- children = [child for child in xml_child_iter(node)]
+- for child in children:
+- if child.type != 'element':
++ try:
++ for child in trnode.iterdescendants():
++ if isinstance(child, (etree._Entity, etree._Comment, etree._ProcessingInstruction)):
+ continue
+- if child.ns() is not None and child.ns().content == NS_BLANK:
+- ph_node = msg.get_placeholder(child.name).node
+- if self.has_child_elements(ph_node):
++ qname = etree.QName(child.tag)
++ if qname.namespace == NS_BLANK:
++ ph = msg.get_placeholder(qname.localname)
++ if ph is None:
++ sys.stderr.write('Warning: Could not find placeholder %s\n' % (
++ qname.localname))
++ continue
++ ph_node = ph.node
++ if len(ph_node):
+ self.merge_translations(translations, None, ph_node, strict=strict)
+- newnode = ph_node.copyNode(1)
+- newnode.setTreeDoc(self._doc)
+- child.replaceNode(newnode)
++ newnode = deepcopy(ph_node)
++ newnode.tail = child.tail
++ child.getparent().replace(child, newnode)
+ else:
+ repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
+- child.replaceNode(repl)
+- scan_node(child)
+- try:
+- scan_node(trnode)
++ repl.tail = child.tail
++ child.getparent().replace(child, repl)
+ except:
++ raise
+ if strict:
+ raise
+ else:
+ sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
+ (lang + ' ') if lang is not None else '',
+ msgstr.encode('utf-8')))
+- self._xml_err = ''
+- ctxt.doc().freeDoc()
+ return node
+- retnode = node.copyNode(2)
+- retnode.setTreeDoc(self._doc)
+- for child in xml_child_iter(trnode):
+- newnode = child.copyNode(1)
+- newnode.setTreeDoc(self._doc)
+- retnode.addChild(newnode)
++ retnode = self._doc.getroot().makeelement(node.tag, node.attrib, node.nsmap)
++ retnode.text = trnode.text
++ for child in trnode.iterchildren():
++ retnode.append(child)
+
+- ctxt.doc().freeDoc()
+ return retnode
+
+ def generate_messages(self, comments=True):
+ if self._itst_credits is not None:
+ self._msgs.add_credits()
+- for child in xml_child_iter(self._doc):
+- if child.type == 'element':
+- self.generate_message(child, None, comments=comments)
+- break
++ if self._doc is not None:
++ self.generate_message(self._doc.getroot(), None, comments=comments)
+
+ def generate_message(self, node, msg, comments=True, path=None):
+- if node.type in ('text', 'cdata') and msg is not None:
+- msg.add_text(node.content)
++ if isinstance(node, etree._Entity):
++ msg.add_entity_ref(node.name)
+ return
+- if node.type == 'entity_ref':
+- msg.add_entity_ref(node.name);
+- if node.type != 'element':
++ # Only allow elements
++ if isinstance(node, XMLAttr) or not isinstance(node.tag, str):
+ return
+- if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
++ if node.get('{' + NS_ITST + '}drop', 'no') == 'yes':
+ return
+ if self._itst_drop_nodes.get(node, 'no') == 'yes':
+ return
+@@ -1158,9 +1074,7 @@ class Document (object):
+ if msg is not None:
+ msg.add_placeholder(node)
+ msg = Message()
+- ctxt = None
+- if node.hasNsProp('context', NS_ITST):
+- ctxt = node.nsProp('context', NS_ITST)
++ ctxt = node.get('{' + NS_ITST + '}context')
+ if ctxt is None:
+ ctxt = self._itst_contexts.get(node)
+ if ctxt is not None:
+@@ -1173,27 +1087,38 @@ class Document (object):
+ msg.set_preserve_space()
+ if self.get_its_locale_filter(node) != ('*', 'include'):
+ msg.set_locale_filter(self.get_its_locale_filter(node))
+- msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
+- msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8')))
++ msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
++ parent = node.getparent()
++ if parent is None:
++ ptag = '#root'
++ else:
++ ptag = xml_localname(parent)
++ msg.add_marker('%s/%s' % (ustr(ptag, 'utf-8'), ustr(xml_localname(node), 'utf-8')))
+ else:
+ withinText = True
+ msg.add_start_tag(node)
+
+ if not withinText:
+ # Add msg for translatable node attributes
+- for attr in xml_attr_iter(node):
++ for attrname, attrval in node.items():
++ attr = XMLAttr(node, attrname)
+ if self._its_translate_nodes.get(attr, 'no') == 'yes':
+ attr_msg = Message()
+ if self.get_preserve_space(attr):
+ attr_msg.set_preserve_space()
+- attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
+- attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name))
+- attr_msg.add_text(attr.content)
++ attr_msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
++ attr_msg.add_marker('%s/%s@%s' % (
++ xml_localname(node.getparent()),
++ xml_localname(node),
++ etree.QName(attrname).localname))
++ attr_msg.add_text(attrval)
+ if comments:
+ for locnote in self.get_its_loc_notes(attr):
+ comment = Comment(locnote)
+ comment.add_marker ('%s/%s@%s' % (
+- node.parent.name, node.name, attr.name))
++ xml_localname(node.getparent()),
++ xml_localname(node),
++ etree.QName(attrname).localname))
+ attr_msg.add_comment(comment)
+ self._msgs.add_message(attr_msg, attr)
+
+@@ -1204,15 +1129,16 @@ class Document (object):
+ for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)):
+ comment = Comment(locnote)
+ if withinText:
+- comment.add_marker('.%s/%s' % (path, cnode.name))
++ comment.add_marker('.%s/%s' % (path, xml_localname(cnode)))
+ msg.add_comment(comment)
+ hasnote = True
+ if hasnote or not is_unit:
+ break
+- cnode = cnode.parent
++ cnode = cnode.getparent()
+
+ self.generate_external_resource_message(node)
+- for attr in xml_attr_iter(node):
++ for attrname in node.keys():
++ attr = XMLAttr(node, attrname)
+ self.generate_external_resource_message(attr)
+ idvalue = self.get_its_id_value(attr)
+ if idvalue is not None:
+@@ -1220,9 +1146,13 @@ class Document (object):
+ msg.add_id_value(basename + '#' + idvalue)
+
+ if withinText:
+- path = path + '/' + node.name
+- for child in xml_child_iter(node):
++ path = path + '/' + node.tag
++ if node.text is not None and msg is not None:
++ msg.add_text(node.text)
++ for child in node.iterchildren():
+ self.generate_message(child, msg, comments=comments, path=path)
++ if child.tail is not None and msg is not None:
++ msg.add_text(child.tail)
+
+ if translate:
+ if is_unit and not msg.is_empty():
+@@ -1234,12 +1164,17 @@ class Document (object):
+ if node not in self._its_externals:
+ return
+ resref = self._its_externals[node]
+- if node.type == 'element':
+- translate = self.get_its_translate(node)
+- marker = '%s/%s' % (node.parent.name, node.name)
++ if isinstance(node, XMLAttr):
++ elem = node.getparent()
++ translate = self.get_its_translate(elem)
++ marker = '%s/%s/@%s' % (
++ xml_localname(elem.getparent()),
++ xml_localname(elem),
++ xml_localname(node))
+ else:
+- translate = self.get_its_translate(node.parent)
+- marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name)
++ translate = self.get_its_translate(node)
++ marker = '%s/%s' % (xml_localname(node.getparent()),
++ xml_localname(node))
+ if translate == 'no':
+ return
+ msg = Message()
+@@ -1253,7 +1188,7 @@ class Document (object):
+ txt = "external ref='%s' md5='%s'" % (resref, filemd5)
+ msg.set_context('_')
+ msg.add_text(txt)
+- msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
++ msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
+ msg.add_marker(marker)
+ msg.add_comment(Comment('This is a reference to an external file such as an image or'
+ ' video. When the file changes, the md5 hash will change to'
+@@ -1265,44 +1200,41 @@ class Document (object):
+ def is_translation_unit (self, node):
+ return self.get_its_within_text(node) != 'yes'
+
+- def has_child_elements(self, node):
+- return len([child for child in xml_child_iter(node) if child.type=='element'])
+-
+ def get_preserve_space (self, node):
+- while node.type in ('attribute', 'element'):
+- if node.getSpacePreserve() == 1:
++ while node is not None:
++ if node.get('{' + NS_XML + '}space') == 'preserve':
+ return True
+ if node in self._its_preserve_space_nodes:
+ return (self._its_preserve_space_nodes[node] == 'preserve')
+- node = node.parent
++ node = node.getparent()
+ return False
+
+ def get_its_translate(self, node):
+ if node in self._its_translate_nodes_cache:
+ return self._its_translate_nodes_cache[node]
+ val = None
+- if node.hasNsProp('translate', NS_ITS):
+- val = node.nsProp('translate', NS_ITS)
+- elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None):
+- val = node.nsProp('translate', None)
++ if '{' + NS_ITS + '}translate' in node.attrib:
++ val = node.get('{' + NS_ITS + '}translate')
++ elif node.tag == '{' + NS_ITS + '}span' and 'translate' in node.attrib:
++ val = node.get('translate')
+ elif node in self._its_translate_nodes:
+ val = self._its_translate_nodes[node]
+ if val is not None:
+ self._its_translate_nodes_cache[node] = val
+ return val
+- if node.type == 'attribute':
++ if isinstance(node, XMLAttr):
+ return 'no'
+- if node.parent.type == 'element':
+- parval = self.get_its_translate(node.parent)
++ if node.getparent() is not None:
++ parval = self.get_its_translate(node.getparent())
+ self._its_translate_nodes_cache[node] = parval
+ return parval
+ return 'yes'
+
+ def get_its_within_text(self, node):
+- if node.hasNsProp('withinText', NS_ITS):
+- val = node.nsProp('withinText', NS_ITS)
+- elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None):
+- val = node.nsProp('withinText', None)
++ if '{' + NS_ITS + '}withinText' in node.attrib:
++ val = node.get('{' + NS_ITS + '}withinText')
++ elif node.tag == '{' + NS_ITS + '}span' and 'withinText' in node.attrib:
++ val = node.get('withinText')
+ else:
+ return self._its_within_text_nodes.get(node, 'no')
+ if val in ('yes', 'nested'):
+@@ -1312,73 +1244,63 @@ class Document (object):
+ def get_its_locale_filter(self, node):
+ if node in self._its_locale_filters_cache:
+ return self._its_locale_filters_cache[node]
+- if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS):
+- if node.hasNsProp('localeFilterList', NS_ITS):
+- lst = node.nsProp('localeFilterList', NS_ITS)
+- else:
+- lst = '*'
+- if node.hasNsProp('localeFilterType', NS_ITS):
+- typ = node.nsProp('localeFilterType', NS_ITS)
+- else:
+- typ = 'include'
++ if ('{' + NS_ITS + '}localeFilterList' in node.attrib or
++ '{' + NS_ITS + '}localeFilterType' in node.attrib):
++ lst = node.get('{' + NS_ITS + '}localeFilterList', '*')
++ typ = node.get('{' + NS_ITS + '}localeFilterType', 'include')
+ return (lst, typ)
+- if (xml_is_ns_name(node, NS_ITS, 'span') and
+- (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))):
+- if node.hasNsProp('localeFilterList', None):
+- lst = node.nsProp('localeFilterList', None)
+- else:
+- lst = '*'
+- if node.hasNsProp('localeFilterType', None):
+- typ = node.nsProp('localeFilterType', None)
+- else:
+- typ = 'include'
++ if (node.tag == '{' + NS_ITS + '}span' and
++ ('localeFilterList' in node.attrib or 'localeFilterType' in node.attrib)):
++ lst = node.get('localeFilterList', '*')
++ typ = node.get('localeFilterType', 'include')
+ return (lst, typ)
+ if node in self._its_locale_filters:
+ return self._its_locale_filters[node]
+- if node.parent.type == 'element':
+- parval = self.get_its_locale_filter(node.parent)
++ if node.getparent() is not None:
++ parval = self.get_its_locale_filter(node.getparent())
+ self._its_locale_filters_cache[node] = parval
+ return parval
+ return ('*', 'include')
+
+ def get_itst_drop(self, node):
+- if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
++ if node.get('{' + NS_ITST + '}drop') == 'yes':
+ return 'yes'
+ if self._itst_drop_nodes.get(node, 'no') == 'yes':
+ return 'yes'
+ return 'no'
+
+ def get_its_id_value(self, node):
+- if node.hasNsProp('id', NS_XML):
+- return node.nsProp('id', NS_XML)
++ if '{' + NS_XML + '}id' in node.attrib:
++ return node.get('{' + NS_XML + '}id')
+ return self._its_id_values.get(node, None)
+
+ def get_its_loc_notes(self, node, inherit=True):
+ if node in self._its_loc_notes_cache:
+ return self._its_loc_notes_cache[node]
+ ret = []
+- if ( node.hasNsProp('locNote', NS_ITS) or
+- node.hasNsProp('locNoteRef', NS_ITS) or
+- node.hasNsProp('locNoteType', NS_ITS) ):
+- notetype = node.nsProp('locNoteType', NS_ITS)
+- if node.hasNsProp('locNote', NS_ITS):
+- ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype))
+- elif node.hasNsProp('locNoteRef', NS_ITS):
+- ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype))
+- elif xml_is_ns_name(node, NS_ITS, 'span'):
+- if ( node.hasNsProp('locNote', None) or
+- node.hasNsProp('locNoteRef', None) or
+- node.hasNsProp('locNoteType', None) ):
+- notetype = node.nsProp('locNoteType', None)
+- if node.hasNsProp('locNote', None):
+- ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype))
+- elif node.hasNsProp('locNoteRef', None):
+- ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype))
++ if ( '{' + NS_ITS + '}locNote' in node.attrib or
++ '{' + NS_ITS + '}locNoteRef' in node.attrib or
++ '{' + NS_ITS + '}locNoteType' in node.attrib ):
++ notetype = node.get('{' + NS_ITS + '}locNoteType')
++ if '{' + NS_ITS + '}locNote' in node.attrib:
++ ret.append(LocNote(locnote=node.get('{' + NS_ITS + '}locNote'), locnotetype=notetype))
++ elif '{' + NS_ITS + '}locNoteRef' in node.attrib:
++ ret.append(LocNote(locnoteref=node.get('{' + NS_ITS + '}locNoteRef'), locnotetype=notetype))
++ elif node.tag == '{' + NS_ITS + '}span':
++ if ( 'locNote' in node.attrib or
++ 'locNoteRef' in node.attrib or
++ 'locNoteType' in node.attrib ):
++ notetype = node.get('locNoteType')
++ if 'locNote' in node.attrib:
++ ret.append(LocNote(locnote=node.get('locNote'), locnotetype=notetype))
++ elif 'locNoteRef' in node.attrib:
++ ret.append(LocNote(locnoteref=node.get('locNoteRef'), locnotetype=notetype))
+ for locnote in reversed(self._its_loc_notes.get(node, [])):
+ ret.append(locnote)
+ if (len(ret) == 0 and inherit and
+- node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'):
+- parval = self.get_its_loc_notes(node.parent)
++ not isinstance(node, XMLAttr) and
++ node.getparent() is not None):
++ parval = self.get_its_loc_notes(node.getparent())
+ self._its_loc_notes_cache[node] = parval
+ return parval
+ self._its_loc_notes_cache[node] = ret
+@@ -1386,12 +1308,12 @@ class Document (object):
+
+ def output_test_data(self, category, out, node=None):
+ if node is None:
+- node = self._doc.getRootElement()
++ node = self._doc.getroot()
+ compval = ''
+ if category == 'translate':
+ compval = 'translate="%s"' % self.get_its_translate(node)
+ elif category == 'withinText':
+- if node.type != 'attribute':
++ if not isinstance(node, XMLAttr):
+ compval = 'withinText="%s"' % self.get_its_within_text(node)
+ elif category == 'localeFilter':
+ compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node)
+@@ -1422,16 +1344,32 @@ class Document (object):
+ out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval))
+ else:
+ out.write('%s\r\n' % (xml_get_node_path(node)))
+- for attr in sorted(xml_attr_iter(node), key=ustr):
++ for attrname in sorted(node.keys(), key=ustr):
++ attr = XMLAttr(node, attrname)
+ self.output_test_data(category, out, attr)
+- for child in xml_child_iter(node):
+- if child.type == 'element':
+- self.output_test_data(category, out, child)
++ for child in node.iterchildren():
++ self.output_test_data(category, out, child)
+
+- @staticmethod
+- def _try_xpath_eval (xpath, expr):
++ def _try_xpath_eval (self, xpath, expr, node=None):
++ if node is None:
++ node = self._doc
++ elif isinstance(node, XMLAttr):
++ # lxml doesn't support attributes as XPath context nodes.
++ if expr == '.':
++ return [ node ]
++ sys.stderr.write('Warning: Unsupported XPath on attribute: %s\n' % expr)
++ return []
+ try:
+- return xpath.xpathEval(expr)
++ result = node.xpath(expr, namespaces=xpath[0], **xpath[1])
++ if not isinstance(result, str):
++ for i in range(len(result)):
++ val = result[i]
++ # Use lxml's "smart string" feature to determine
++ # the attribute node.
++ if (isinstance(val, etree._ElementUnicodeResult) and
++ val.is_attribute):
++ result[i] = XMLAttr(val.getparent(), val.attrname)
++ return result
+ except:
+ sys.stderr.write('Warning: Invalid XPath: %s\n' % expr)
+ return []
+@@ -1636,11 +1574,11 @@ if __name__ == '__main__':
+ raise
+ sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e))
+ sys.exit(1)
+- serialized = doc._doc.serialize('utf-8')
+- if PY3:
+- # For some reason, under py3, our serialized data is returns as a str.
+- # Let's encode it to bytes
+- serialized = serialized.encode('utf-8')
++ # lxml generates XML declarations with single quotes.
++ serialized = (
++ b'<?xml version="1.0" encoding="utf-8"?>\n' +
++ etree.tostring(doc._doc, encoding='utf-8') +
++ b'\n')
+ fout = out
+ fout_is_str = isinstance(fout, string_types)
+ if fout_is_str:
+@@ -1675,11 +1613,11 @@ if __name__ == '__main__':
+ for itsfile in opts.itsfile:
+ doc.apply_its_file(itsfile, userparams=userparams)
+ doc.join_translations(translations, strict=opts.strict)
+- serialized = doc._doc.serialize('utf-8')
+- if PY3:
+- # For some reason, under py3, our serialized data is returns as a str.
+- # Let's encode it to bytes
+- serialized = serialized.encode('utf-8')
++ # lxml generates XML declarations with single quotes.
++ serialized = (
++ b'<?xml version="1.0" encoding="utf-8"?>\n' +
++ etree.tostring(doc._doc, encoding='utf-8') +
++ b'\n')
+ out.write(serialized)
+ out.flush()
+