Create and use utility to strip namespaces from XML data

Namespaces cause problems with the xpath calls when dealing with PlayReadyHeader's on some versions.
This commit is contained in:
rlaphoenix 2022-12-26 22:35:23 +00:00
parent e0365ff2bb
commit 0e4275bd1e
2 changed files with 26 additions and 3 deletions

View File

@ -9,10 +9,10 @@ from uuid import UUID
import construct
from construct import Container
from google.protobuf.message import DecodeError
from lxml import etree
from pymp4.parser import Box
from pywidevine.license_protocol_pb2 import WidevinePsshData
from pywidevine.utils import load_xml
class PSSH:
@ -215,7 +215,7 @@ class PSSH:
xml_string = self.init_data.decode("utf-16-le")
# some of these init data has garbage(?) in front of it
xml_string = xml_string[xml_string.index("<"):]
xml = etree.fromstring(xml_string)
xml = load_xml(xml_string)
header_version = xml.attrib["version"]
if header_version == "4.0.0.0":
key_ids = xml.xpath("DATA/KID/text()")

View File

@ -1,6 +1,9 @@
import shutil
from pathlib import Path
from typing import Optional
from typing import Optional, Union
from lxml import etree
from lxml.etree import ElementTree
def get_binary_path(*names: str) -> Optional[Path]:
@ -10,3 +13,23 @@ def get_binary_path(*names: str) -> Optional[Path]:
if path:
return Path(path)
return None
def load_xml(xml: Union[str, bytes]) -> ElementTree:
"""Parse XML data to an ElementTree, without namespaces anywhere."""
if not isinstance(xml, bytes):
xml = xml.encode("utf8")
root = etree.fromstring(xml)
for elem in root.getiterator():
if not hasattr(elem.tag, "find"):
# e.g. comment elements
continue
elem.tag = etree.QName(elem).localname
for name, value in elem.attrib.items():
local_name = etree.QName(name).localname
if local_name == name:
continue
del elem.attrib[name]
elem.attrib[local_name] = value
etree.cleanup_namespaces(root)
return root