359 lines
12 KiB
Python
Executable File
359 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2013 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
|
|
at 80 chars, enforcing standard attribute ordering, and standardizing
|
|
indentation.
|
|
|
|
This is quite a bit more complicated than just calling tree.toprettyxml();
|
|
we need additional customization, like special attribute ordering in tags
|
|
and wrapping text nodes, so we implement our own full custom XML pretty-printer.
|
|
"""
|
|
|
|
from __future__ import with_statement
|
|
|
|
import diffutil
|
|
import json
|
|
import logging
|
|
import shutil
|
|
import sys
|
|
import textwrap
|
|
import xml.dom.minidom
|
|
|
|
|
|
WRAP_COLUMN = 80
|
|
|
|
# Desired order for tag attributes; attributes listed here will appear first,
|
|
# and in the same order as in these lists.
|
|
# { tag_name: [attribute_name, ...] }
|
|
ATTRIBUTE_ORDER = {
|
|
'enum': ['name', 'type'],
|
|
'histogram': ['name', 'enum', 'units'],
|
|
'int': ['value', 'label'],
|
|
'fieldtrial': ['name', 'separator', 'ordering'],
|
|
'group': ['name', 'label'],
|
|
'affected-histogram': ['name'],
|
|
'with-group': ['name'],
|
|
}
|
|
|
|
# Tag names for top-level nodes whose children we don't want to indent.
|
|
TAGS_THAT_DONT_INDENT = [
|
|
'histogram-configuration',
|
|
'histograms',
|
|
'fieldtrials',
|
|
'enums'
|
|
]
|
|
|
|
# Extra vertical spacing rules for special tag names.
|
|
# {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
|
|
TAGS_THAT_HAVE_EXTRA_NEWLINE = {
|
|
'histogram-configuration': (2, 1, 1),
|
|
'histograms': (2, 1, 1),
|
|
'fieldtrials': (2, 1, 1),
|
|
'enums': (2, 1, 1),
|
|
'histogram': (1, 1, 1),
|
|
'enum': (1, 1, 1),
|
|
'fieldtrial': (1, 1, 1),
|
|
}
|
|
|
|
# Tags that we allow to be squished into a single line for brevity.
|
|
TAGS_THAT_ALLOW_SINGLE_LINE = [
|
|
'summary',
|
|
'int',
|
|
]
|
|
|
|
# Tags whose children we want to alphabetize. The key is the parent tag name,
|
|
# and the value is a pair of the tag name of the children we want to sort,
|
|
# and a key function that maps each child node to the desired sort key.
|
|
ALPHABETIZATION_RULES = {
|
|
'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
|
|
'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
|
|
'enum': ('int', lambda n: int(n.attributes['value'].value)),
|
|
'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
|
|
'fieldtrial': ('affected-histogram',
|
|
lambda n: n.attributes['name'].value.lower()),
|
|
}
|
|
|
|
|
|
class Error(Exception):
|
|
pass
|
|
|
|
|
|
def LastLineLength(s):
|
|
"""Returns the length of the last line in s.
|
|
|
|
Args:
|
|
s: A multi-line string, including newlines.
|
|
|
|
Returns:
|
|
The length of the last line in s, in characters.
|
|
"""
|
|
if s.rfind('\n') == -1: return len(s)
|
|
return len(s) - s.rfind('\n') - len('\n')
|
|
|
|
|
|
def XmlEscape(s):
|
|
"""XML-escapes the given string, replacing magic characters (&<>") with their
|
|
escaped equivalents."""
|
|
s = s.replace("&", "&").replace("<", "<")
|
|
s = s.replace("\"", """).replace(">", ">")
|
|
return s
|
|
|
|
|
|
def PrettyPrintNode(node, indent=0):
|
|
"""Pretty-prints the given XML node at the given indent level.
|
|
|
|
Args:
|
|
node: The minidom node to pretty-print.
|
|
indent: The current indent level.
|
|
|
|
Returns:
|
|
The pretty-printed string (including embedded newlines).
|
|
|
|
Raises:
|
|
Error if the XML has unknown tags or attributes.
|
|
"""
|
|
# Handle the top-level document node.
|
|
if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
|
|
return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
|
|
|
|
# Handle text nodes.
|
|
if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
|
|
# Wrap each paragraph in the text to fit in the 80 column limit.
|
|
wrapper = textwrap.TextWrapper()
|
|
wrapper.initial_indent = ' ' * indent
|
|
wrapper.subsequent_indent = ' ' * indent
|
|
wrapper.break_on_hyphens = False
|
|
wrapper.break_long_words = False
|
|
wrapper.width = WRAP_COLUMN
|
|
text = XmlEscape(node.data)
|
|
# Remove any common indent.
|
|
text = textwrap.dedent(text.strip('\n'))
|
|
lines = text.split('\n')
|
|
# Split the text into paragraphs at blank line boundaries.
|
|
paragraphs = [[]]
|
|
for l in lines:
|
|
if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
|
|
paragraphs.append([])
|
|
else:
|
|
paragraphs[-1].append(l)
|
|
# Remove trailing empty paragraph if present.
|
|
if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
|
|
paragraphs = paragraphs[:-1]
|
|
# Wrap each paragraph and separate with two newlines.
|
|
return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
|
|
|
|
# Handle element nodes.
|
|
if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
|
|
newlines_after_open, newlines_before_close, newlines_after_close = (
|
|
TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
|
|
# Open the tag.
|
|
s = ' ' * indent + '<' + node.tagName
|
|
|
|
# Calculate how much space to allow for the '>' or '/>'.
|
|
closing_chars = 1
|
|
if not node.childNodes:
|
|
closing_chars = 2
|
|
|
|
# Pretty-print the attributes.
|
|
attributes = node.attributes.keys()
|
|
if attributes:
|
|
# Reorder the attributes.
|
|
if not node.tagName in ATTRIBUTE_ORDER:
|
|
unrecognized_attributes = attributes;
|
|
else:
|
|
unrecognized_attributes = (
|
|
[a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
|
|
attributes = (
|
|
[a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
|
|
|
|
for a in unrecognized_attributes:
|
|
logging.error(
|
|
'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
|
|
if unrecognized_attributes:
|
|
raise Error()
|
|
|
|
for a in attributes:
|
|
value = XmlEscape(node.attributes[a].value)
|
|
# Replace sequences of whitespace with single spaces.
|
|
words = value.split()
|
|
a_str = ' %s="%s"' % (a, ' '.join(words))
|
|
# Start a new line if the attribute will make this line too long.
|
|
if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
|
|
s += '\n' + ' ' * (indent + 3)
|
|
# Output everything up to the first quote.
|
|
s += ' %s="' % (a)
|
|
value_indent_level = LastLineLength(s)
|
|
# Output one word at a time, splitting to the next line where necessary.
|
|
column = value_indent_level
|
|
for i, word in enumerate(words):
|
|
# This is slightly too conservative since not every word will be
|
|
# followed by the closing characters...
|
|
if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
|
|
s = s.rstrip() # remove any trailing whitespace
|
|
s += '\n' + ' ' * value_indent_level
|
|
column = value_indent_level
|
|
s += word + ' '
|
|
column += len(word) + 1
|
|
s = s.rstrip() # remove any trailing whitespace
|
|
s += '"'
|
|
s = s.rstrip() # remove any trailing whitespace
|
|
|
|
# Pretty-print the child nodes.
|
|
if node.childNodes:
|
|
s += '>'
|
|
# Calculate the new indent level for child nodes.
|
|
new_indent = indent
|
|
if node.tagName not in TAGS_THAT_DONT_INDENT:
|
|
new_indent += 2
|
|
child_nodes = node.childNodes
|
|
|
|
# Recursively pretty-print the child nodes.
|
|
child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
|
|
child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
|
|
|
|
# Determine whether we can fit the entire node on a single line.
|
|
close_tag = '</%s>' % node.tagName
|
|
space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
|
|
if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
|
|
len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
|
|
s += child_nodes[0].strip()
|
|
else:
|
|
s += '\n' * newlines_after_open + '\n'.join(child_nodes)
|
|
s += '\n' * newlines_before_close + ' ' * indent
|
|
s += close_tag
|
|
else:
|
|
s += '/>'
|
|
s += '\n' * newlines_after_close
|
|
return s
|
|
|
|
# Handle comment nodes.
|
|
if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
|
|
return '<!--%s-->\n' % node.data
|
|
|
|
# Ignore other node types. This could be a processing instruction (<? ... ?>)
|
|
# or cdata section (<![CDATA[...]]!>), neither of which are legal in the
|
|
# histograms XML at present.
|
|
logging.error('Ignoring unrecognized node data: %s' % node.toxml())
|
|
raise Error()
|
|
|
|
|
|
def unsafeAppendChild(parent, child):
|
|
"""Append child to parent's list of children, ignoring the possibility that it
|
|
is already in another node's childNodes list. Requires that the previous
|
|
parent of child is discarded (to avoid non-tree DOM graphs).
|
|
This can provide a significant speedup as O(n^2) operations are removed (in
|
|
particular, each child insertion avoids the need to traverse the old parent's
|
|
entire list of children)."""
|
|
child.parentNode = None
|
|
parent.appendChild(child)
|
|
child.parentNode = parent
|
|
|
|
|
|
def TransformByAlphabetizing(node):
|
|
"""Transform the given XML by alphabetizing specific node types according to
|
|
the rules in ALPHABETIZATION_RULES.
|
|
|
|
Args:
|
|
node: The minidom node to transform.
|
|
|
|
Returns:
|
|
The minidom node, with children appropriately alphabetized. Note that the
|
|
transformation is done in-place, i.e. the original minidom tree is modified
|
|
directly.
|
|
"""
|
|
if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
|
|
for c in node.childNodes: TransformByAlphabetizing(c)
|
|
return node
|
|
|
|
# Element node with a tag name that we alphabetize the children of?
|
|
if node.tagName in ALPHABETIZATION_RULES:
|
|
# Put subnodes in a list of node,key pairs to allow for custom sorting.
|
|
subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
|
|
subnodes = []
|
|
last_key = -1
|
|
for c in node.childNodes:
|
|
if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
|
|
c.tagName == subtag):
|
|
last_key = key_function(c)
|
|
# Subnodes that we don't want to rearrange use the last node's key,
|
|
# so they stay in the same relative position.
|
|
subnodes.append( (c, last_key) )
|
|
|
|
# Sort the subnode list.
|
|
subnodes.sort(key=lambda pair: pair[1])
|
|
|
|
# Re-add the subnodes, transforming each recursively.
|
|
while node.firstChild:
|
|
node.removeChild(node.firstChild)
|
|
for (c, _) in subnodes:
|
|
unsafeAppendChild(node, TransformByAlphabetizing(c))
|
|
return node
|
|
|
|
# Recursively handle other element nodes and other node types.
|
|
for c in node.childNodes: TransformByAlphabetizing(c)
|
|
return node
|
|
|
|
|
|
def PrettyPrint(raw_xml):
|
|
"""Pretty-print the given XML.
|
|
|
|
Args:
|
|
xml: The contents of the histograms XML file, as a string.
|
|
|
|
Returns:
|
|
The pretty-printed version.
|
|
"""
|
|
tree = xml.dom.minidom.parseString(raw_xml)
|
|
tree = TransformByAlphabetizing(tree)
|
|
return PrettyPrintNode(tree)
|
|
|
|
|
|
def main():
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
presubmit = ('--presubmit' in sys.argv)
|
|
|
|
logging.info('Loading histograms.xml...')
|
|
with open('histograms.xml', 'rb') as f:
|
|
xml = f.read()
|
|
|
|
# Check there are no CR ('\r') characters in the file.
|
|
if '\r' in xml:
|
|
logging.info('DOS-style line endings (CR characters) detected - these are '
|
|
'not allowed. Please run dos2unix histograms.xml')
|
|
sys.exit(1)
|
|
|
|
logging.info('Pretty-printing...')
|
|
try:
|
|
pretty = PrettyPrint(xml)
|
|
except Error:
|
|
logging.error('Aborting parsing due to fatal errors.')
|
|
sys.exit(1)
|
|
|
|
if xml == pretty:
|
|
logging.info('histograms.xml is correctly pretty-printed.')
|
|
sys.exit(0)
|
|
if presubmit:
|
|
logging.info('histograms.xml is not formatted correctly; run '
|
|
'pretty_print.py to fix.')
|
|
sys.exit(1)
|
|
if not diffutil.PromptUserToAcceptDiff(
|
|
xml, pretty,
|
|
'Is the prettified version acceptable?'):
|
|
logging.error('Aborting')
|
|
return
|
|
|
|
logging.info('Creating backup file histograms.before.pretty-print.xml')
|
|
shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
|
|
|
|
logging.info('Writing new histograms.xml file')
|
|
with open('histograms.xml', 'wb') as f:
|
|
f.write(pretty)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|