Create and use utility to strip namespaces from XML data

Namespaces cause problems with the xpath calls when dealing with PlayReadyHeader's on some versions.
This commit is contained in:
rlaphoenix 2022-12-26 22:35:23 +00:00
parent e0365ff2bb
commit 0e4275bd1e
2 changed files with 26 additions and 3 deletions

View File

@ -9,10 +9,10 @@ from uuid import UUID
import construct import construct
from construct import Container from construct import Container
from google.protobuf.message import DecodeError from google.protobuf.message import DecodeError
from lxml import etree
from pymp4.parser import Box from pymp4.parser import Box
from pywidevine.license_protocol_pb2 import WidevinePsshData from pywidevine.license_protocol_pb2 import WidevinePsshData
from pywidevine.utils import load_xml
class PSSH: class PSSH:
@ -215,7 +215,7 @@ class PSSH:
xml_string = self.init_data.decode("utf-16-le") xml_string = self.init_data.decode("utf-16-le")
# some of these init data has garbage(?) in front of it # some of these init data has garbage(?) in front of it
xml_string = xml_string[xml_string.index("<"):] xml_string = xml_string[xml_string.index("<"):]
xml = etree.fromstring(xml_string) xml = load_xml(xml_string)
header_version = xml.attrib["version"] header_version = xml.attrib["version"]
if header_version == "4.0.0.0": if header_version == "4.0.0.0":
key_ids = xml.xpath("DATA/KID/text()") key_ids = xml.xpath("DATA/KID/text()")

View File

@ -1,6 +1,9 @@
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional, Union
from lxml import etree
from lxml.etree import ElementTree
def get_binary_path(*names: str) -> Optional[Path]: def get_binary_path(*names: str) -> Optional[Path]:
@ -10,3 +13,23 @@ def get_binary_path(*names: str) -> Optional[Path]:
if path: if path:
return Path(path) return Path(path)
return None return None
def load_xml(xml: Union[str, bytes]) -> ElementTree:
"""Parse XML data to an ElementTree, without namespaces anywhere."""
if not isinstance(xml, bytes):
xml = xml.encode("utf8")
root = etree.fromstring(xml)
for elem in root.getiterator():
if not hasattr(elem.tag, "find"):
# e.g. comment elements
continue
elem.tag = etree.QName(elem).localname
for name, value in elem.attrib.items():
local_name = etree.QName(name).localname
if local_name == name:
continue
del elem.attrib[name]
elem.attrib[local_name] = value
etree.cleanup_namespaces(root)
return root