From 0e4275bd1e43bfa4c6db887eabecc9661d2548a1 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Mon, 26 Dec 2022 22:35:23 +0000 Subject: [PATCH] Create and use utility to strip namespaces from XML data Namespaces cause problems with the xpath calls when dealing with PlayReadyHeader's on some versions. --- pywidevine/pssh.py | 4 ++-- pywidevine/utils.py | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pywidevine/pssh.py b/pywidevine/pssh.py index e9167fc..e4a4a2e 100644 --- a/pywidevine/pssh.py +++ b/pywidevine/pssh.py @@ -9,10 +9,10 @@ from uuid import UUID import construct from construct import Container from google.protobuf.message import DecodeError -from lxml import etree from pymp4.parser import Box from pywidevine.license_protocol_pb2 import WidevinePsshData +from pywidevine.utils import load_xml class PSSH: @@ -215,7 +215,7 @@ class PSSH: xml_string = self.init_data.decode("utf-16-le") # some of these init data has garbage(?) in front of it xml_string = xml_string[xml_string.index("<"):] - xml = etree.fromstring(xml_string) + xml = load_xml(xml_string) header_version = xml.attrib["version"] if header_version == "4.0.0.0": key_ids = xml.xpath("DATA/KID/text()") diff --git a/pywidevine/utils.py b/pywidevine/utils.py index 6556d3e..559d2cf 100644 --- a/pywidevine/utils.py +++ b/pywidevine/utils.py @@ -1,6 +1,9 @@ import shutil from pathlib import Path -from typing import Optional +from typing import Optional, Union + +from lxml import etree +from lxml.etree import ElementTree def get_binary_path(*names: str) -> Optional[Path]: @@ -10,3 +13,23 @@ def get_binary_path(*names: str) -> Optional[Path]: if path: return Path(path) return None + + +def load_xml(xml: Union[str, bytes]) -> ElementTree: + """Parse XML data to an ElementTree, without namespaces anywhere.""" + if not isinstance(xml, bytes): + xml = xml.encode("utf8") + root = etree.fromstring(xml) + for elem in root.getiterator(): + if not hasattr(elem.tag, "find"): + # e.g. comment elements + continue + elem.tag = etree.QName(elem).localname + for name, value in elem.attrib.items(): + local_name = etree.QName(name).localname + if local_name == name: + continue + del elem.attrib[name] + elem.attrib[local_name] = value + etree.cleanup_namespaces(root) + return root