From: Dan Prince Date: Mon, 4 Feb 2013 03:25:12 +0000 (-0500) Subject: Add a safe_minidom_parse_string function. X-Git-Url: https://review.openstack.org/gitweb?p=openstack%2Fcinder.git;a=commitdiff_plain;h=fcf249d1f06938280d841cb13b61556971a58e0c Add a safe_minidom_parse_string function. Adds a new utils.safe_minidom_parse_string function and updates external API facing Cinder modules to use it. This ensures we have safe defaults on our incoming API XML parsing. Internally safe_minidom_parse_string uses a ProtectedExpatParser class to disable DTDs and entities from being parsed when using minidom. Fixes LP Bug #1100282 for Folsom. Change-Id: Ie8ae7a6e12fbf51de406d10ca21072140374abf5 --- diff --git a/cinder/api/openstack/common.py b/cinder/api/openstack/common.py index 255a0a7..91e488f 100644 --- a/cinder/api/openstack/common.py +++ b/cinder/api/openstack/common.py @@ -25,6 +25,7 @@ from cinder import flags from cinder.api.openstack import wsgi from cinder.api.openstack import xmlutil from cinder.openstack.common import log as logging +from cinder import utils LOG = logging.getLogger(__name__) @@ -247,7 +248,7 @@ class ViewBuilder(object): class MetadataDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -255,7 +256,7 @@ class MetadataDeserializer(wsgi.MetadataXMLDeserializer): class MetaItemDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} @@ -273,7 +274,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return metadata def _extract_metadata_container(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -285,7 +286,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return self._extract_metadata_container(datastring) def update(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} diff --git a/cinder/api/openstack/volume/contrib/volume_actions.py b/cinder/api/openstack/volume/contrib/volume_actions.py index 5c62766..eac8f17 100644 --- a/cinder/api/openstack/volume/contrib/volume_actions.py +++ b/cinder/api/openstack/volume/contrib/volume_actions.py @@ -13,7 +13,6 @@ # under the License. import webob -from xml.dom import minidom from cinder.api.openstack import extensions from cinder.api.openstack import wsgi @@ -23,6 +22,7 @@ from cinder import exception from cinder import flags from cinder.openstack.common import log as logging from cinder.openstack.common.rpc import common as rpc_common +from cinder import utils FLAGS = flags.FLAGS @@ -54,7 +54,7 @@ class VolumeToImageSerializer(xmlutil.TemplateBuilder): class VolumeToImageDeserializer(wsgi.XMLDeserializer): """Deserializer to handle xml-formatted requests""" def default(self, string): - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) action_node = dom.childNodes[0] action_name = action_node.tagName diff --git a/cinder/api/openstack/volume/volumes.py b/cinder/api/openstack/volume/volumes.py index 2c6852b..675c51f 100644 --- a/cinder/api/openstack/volume/volumes.py +++ b/cinder/api/openstack/volume/volumes.py @@ -17,7 +17,6 @@ from webob import exc import webob -from xml.dom import minidom from cinder.api.openstack import common from cinder.api.openstack import wsgi @@ -194,7 +193,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted volume create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) volume = self._extract_volume(dom) return {'body': {'volume': volume}} diff --git a/cinder/api/openstack/wsgi.py b/cinder/api/openstack/wsgi.py index fa0baea..6a19e02 100644 --- a/cinder/api/openstack/wsgi.py +++ b/cinder/api/openstack/wsgi.py @@ -24,6 +24,7 @@ from cinder import exception from cinder import wsgi from cinder.openstack.common import log as logging from cinder.openstack.common import jsonutils +from cinder import utils from lxml import etree from xml.dom import minidom @@ -151,7 +152,7 @@ class XMLDeserializer(TextDeserializer): plurals = set(self.metadata.get('plurals', {})) try: - node = minidom.parseString(datastring).childNodes[0] + node = utils.safe_minidom_parse_string(datastring).childNodes[0] return {node.nodeName: self._from_xml_node(node, plurals)} except expat.ExpatError: msg = _("cannot understand XML") @@ -548,7 +549,7 @@ def action_peek_json(body): def action_peek_xml(body): """Determine action to invoke.""" - dom = minidom.parseString(body) + dom = utils.safe_minidom_parse_string(body) action_node = dom.childNodes[0] return action_node.tagName diff --git a/cinder/tests/test_utils.py b/cinder/tests/test_utils.py index 92be797..c7cf47d 100644 --- a/cinder/tests/test_utils.py +++ b/cinder/tests/test_utils.py @@ -423,6 +423,39 @@ class GenericUtilsTestCase(test.TestCase): result = utils.service_is_up(service) self.assertFalse(result) + def test_safe_parse_xml(self): + + normal_body = (""" + + + hey + there + + """).strip() + + def killer_body(): + return ((""" + + ]> + + + %(d)s + + """) % { + 'a': 'A' * 10, + 'b': '&a;' * 10, + 'c': '&b;' * 10, + 'd': '&c;' * 9999, + }).strip() + + dom = utils.safe_minidom_parse_string(normal_body) + self.assertEqual(normal_body, str(dom.toxml())) + + self.assertRaises(ValueError, + utils.safe_minidom_parse_string, + killer_body()) + def test_xhtml_escape(self): self.assertEqual('"foo"', utils.xhtml_escape('"foo"')) self.assertEqual(''foo'', utils.xhtml_escape("'foo'")) diff --git a/cinder/utils.py b/cinder/utils.py index 100bbd6..6733369 100644 --- a/cinder/utils.py +++ b/cinder/utils.py @@ -42,6 +42,10 @@ import time import types import uuid import warnings +from xml.dom import minidom +from xml.parsers import expat +from xml import sax +from xml.sax import expatreader from xml.sax import saxutils from eventlet import event @@ -542,6 +546,46 @@ class LoopingCall(object): return self.done.wait() +class ProtectedExpatParser(expatreader.ExpatParser): + """An expat parser which disables DTD's and entities by default.""" + + def __init__(self, forbid_dtd=True, forbid_entities=True, + *args, **kwargs): + # Python 2.x old style class + expatreader.ExpatParser.__init__(self, *args, **kwargs) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + raise ValueError("Inline DTD forbidden") + + def entity_decl(self, entityName, is_parameter_entity, value, base, + systemId, publicId, notationName): + raise ValueError(" forbidden") + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + # expat 1.2 + raise ValueError(" forbidden") + + def reset(self): + expatreader.ExpatParser.reset(self) + if self.forbid_dtd: + self._parser.StartDoctypeDeclHandler = self.start_doctype_decl + if self.forbid_entities: + self._parser.EntityDeclHandler = self.entity_decl + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + + +def safe_minidom_parse_string(xml_string): + """Parse an XML string using minidom safely. + + """ + try: + return minidom.parseString(xml_string, parser=ProtectedExpatParser()) + except sax.SAXParseException as se: + raise expat.ExpatError() + + def xhtml_escape(value): """Escapes a string so it is valid within XML or XHTML.