xref: /openbmc/qemu/scripts/xml-preprocess.py (revision 40886c4cf58fdadaa600dabb8c86c9b4394b9ac8)
1*9b286e76SMarc-André Lureau#!/usr/bin/env python3
2*9b286e76SMarc-André Lureau#
3*9b286e76SMarc-André Lureau# Copyright (c) 2017-2019 Tony Su
4*9b286e76SMarc-André Lureau# Copyright (c) 2023 Red Hat, Inc.
5*9b286e76SMarc-André Lureau#
6*9b286e76SMarc-André Lureau# SPDX-License-Identifier: MIT
7*9b286e76SMarc-André Lureau#
8*9b286e76SMarc-André Lureau# Adapted from https://github.com/peitaosu/XML-Preprocessor
9*9b286e76SMarc-André Lureau#
10*9b286e76SMarc-André Lureau"""This is a XML Preprocessor which can be used to process your XML file before
11*9b286e76SMarc-André Lureauyou use it, to process conditional statements, variables, iteration
12*9b286e76SMarc-André Lureaustatements, error/warning, execute command, etc.
13*9b286e76SMarc-André Lureau
14*9b286e76SMarc-André Lureau## XML Schema
15*9b286e76SMarc-André Lureau
16*9b286e76SMarc-André Lureau### Include Files
17*9b286e76SMarc-André Lureau```
18*9b286e76SMarc-André Lureau<?include path/to/file ?>
19*9b286e76SMarc-André Lureau```
20*9b286e76SMarc-André Lureau
21*9b286e76SMarc-André Lureau### Variables
22*9b286e76SMarc-André Lureau```
23*9b286e76SMarc-André Lureau$(env.EnvironmentVariable)
24*9b286e76SMarc-André Lureau
25*9b286e76SMarc-André Lureau$(sys.SystemVariable)
26*9b286e76SMarc-André Lureau
27*9b286e76SMarc-André Lureau$(var.CustomVariable)
28*9b286e76SMarc-André Lureau```
29*9b286e76SMarc-André Lureau
30*9b286e76SMarc-André Lureau### Conditional Statements
31*9b286e76SMarc-André Lureau```
32*9b286e76SMarc-André Lureau<?if ?>
33*9b286e76SMarc-André Lureau
34*9b286e76SMarc-André Lureau<?ifdef ?>
35*9b286e76SMarc-André Lureau
36*9b286e76SMarc-André Lureau<?ifndef ?>
37*9b286e76SMarc-André Lureau
38*9b286e76SMarc-André Lureau<?else?>
39*9b286e76SMarc-André Lureau
40*9b286e76SMarc-André Lureau<?elseif ?>
41*9b286e76SMarc-André Lureau
42*9b286e76SMarc-André Lureau<?endif?>
43*9b286e76SMarc-André Lureau```
44*9b286e76SMarc-André Lureau
45*9b286e76SMarc-André Lureau### Iteration Statements
46*9b286e76SMarc-André Lureau```
47*9b286e76SMarc-André Lureau<?foreach VARNAME in 1;2;3?>
48*9b286e76SMarc-André Lureau    $(var.VARNAME)
49*9b286e76SMarc-André Lureau<?endforeach?>
50*9b286e76SMarc-André Lureau```
51*9b286e76SMarc-André Lureau
52*9b286e76SMarc-André Lureau### Errors and Warnings
53*9b286e76SMarc-André Lureau```
54*9b286e76SMarc-André Lureau<?error "This is error message!" ?>
55*9b286e76SMarc-André Lureau
56*9b286e76SMarc-André Lureau<?warning "This is warning message!" ?>
57*9b286e76SMarc-André Lureau```
58*9b286e76SMarc-André Lureau
59*9b286e76SMarc-André Lureau### Commands
60*9b286e76SMarc-André Lureau```
61*9b286e76SMarc-André Lureau<? cmd "echo hello world" ?>
62*9b286e76SMarc-André Lureau```
63*9b286e76SMarc-André Lureau"""
64*9b286e76SMarc-André Lureau
65*9b286e76SMarc-André Lureauimport os
66*9b286e76SMarc-André Lureauimport platform
67*9b286e76SMarc-André Lureauimport re
68*9b286e76SMarc-André Lureauimport subprocess
69*9b286e76SMarc-André Lureauimport sys
70*9b286e76SMarc-André Lureaufrom typing import Optional
71*9b286e76SMarc-André Lureaufrom xml.dom import minidom
72*9b286e76SMarc-André Lureau
73*9b286e76SMarc-André Lureau
74*9b286e76SMarc-André Lureauclass Preprocessor():
75*9b286e76SMarc-André Lureau    """This class holds the XML preprocessing state"""
76*9b286e76SMarc-André Lureau
77*9b286e76SMarc-André Lureau    def __init__(self):
78*9b286e76SMarc-André Lureau        self.sys_vars = {
79*9b286e76SMarc-André Lureau            "ARCH": platform.architecture()[0],
80*9b286e76SMarc-André Lureau            "SOURCE": os.path.abspath(__file__),
81*9b286e76SMarc-André Lureau            "CURRENT": os.getcwd(),
82*9b286e76SMarc-André Lureau        }
83*9b286e76SMarc-André Lureau        self.cus_vars = {}
84*9b286e76SMarc-André Lureau
85*9b286e76SMarc-André Lureau    def _pp_include(self, xml_str: str) -> str:
86*9b286e76SMarc-André Lureau        include_regex = r"(<\?include([\w\s\\/.:_-]+)\s*\?>)"
87*9b286e76SMarc-André Lureau        matches = re.findall(include_regex, xml_str)
88*9b286e76SMarc-André Lureau        for group_inc, group_xml in matches:
89*9b286e76SMarc-André Lureau            inc_file_path = group_xml.strip()
90*9b286e76SMarc-André Lureau            with open(inc_file_path, "r", encoding="utf-8") as inc_file:
91*9b286e76SMarc-André Lureau                inc_file_content = inc_file.read()
92*9b286e76SMarc-André Lureau                xml_str = xml_str.replace(group_inc, inc_file_content)
93*9b286e76SMarc-André Lureau        return xml_str
94*9b286e76SMarc-André Lureau
95*9b286e76SMarc-André Lureau    def _pp_env_var(self, xml_str: str) -> str:
96*9b286e76SMarc-André Lureau        envvar_regex = r"(\$\(env\.(\w+)\))"
97*9b286e76SMarc-André Lureau        matches = re.findall(envvar_regex, xml_str)
98*9b286e76SMarc-André Lureau        for group_env, group_var in matches:
99*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_env, os.environ[group_var])
100*9b286e76SMarc-André Lureau        return xml_str
101*9b286e76SMarc-André Lureau
102*9b286e76SMarc-André Lureau    def _pp_sys_var(self, xml_str: str) -> str:
103*9b286e76SMarc-André Lureau        sysvar_regex = r"(\$\(sys\.(\w+)\))"
104*9b286e76SMarc-André Lureau        matches = re.findall(sysvar_regex, xml_str)
105*9b286e76SMarc-André Lureau        for group_sys, group_var in matches:
106*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_sys, self.sys_vars[group_var])
107*9b286e76SMarc-André Lureau        return xml_str
108*9b286e76SMarc-André Lureau
109*9b286e76SMarc-André Lureau    def _pp_cus_var(self, xml_str: str) -> str:
110*9b286e76SMarc-André Lureau        define_regex = r"(<\?define\s*(\w+)\s*=\s*([\w\s\"]+)\s*\?>)"
111*9b286e76SMarc-André Lureau        matches = re.findall(define_regex, xml_str)
112*9b286e76SMarc-André Lureau        for group_def, group_name, group_var in matches:
113*9b286e76SMarc-André Lureau            group_name = group_name.strip()
114*9b286e76SMarc-André Lureau            group_var = group_var.strip().strip("\"")
115*9b286e76SMarc-André Lureau            self.cus_vars[group_name] = group_var
116*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_def, "")
117*9b286e76SMarc-André Lureau        cusvar_regex = r"(\$\(var\.(\w+)\))"
118*9b286e76SMarc-André Lureau        matches = re.findall(cusvar_regex, xml_str)
119*9b286e76SMarc-André Lureau        for group_cus, group_var in matches:
120*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(
121*9b286e76SMarc-André Lureau                group_cus,
122*9b286e76SMarc-André Lureau                self.cus_vars.get(group_var, "")
123*9b286e76SMarc-André Lureau            )
124*9b286e76SMarc-André Lureau        return xml_str
125*9b286e76SMarc-André Lureau
126*9b286e76SMarc-André Lureau    def _pp_foreach(self, xml_str: str) -> str:
127*9b286e76SMarc-André Lureau        foreach_regex = r"(<\?foreach\s+(\w+)\s+in\s+([\w;]+)\s*\?>(.*)<\?endforeach\?>)"
128*9b286e76SMarc-André Lureau        matches = re.findall(foreach_regex, xml_str)
129*9b286e76SMarc-André Lureau        for group_for, group_name, group_vars, group_text in matches:
130*9b286e76SMarc-André Lureau            group_texts = ""
131*9b286e76SMarc-André Lureau            for var in group_vars.split(";"):
132*9b286e76SMarc-André Lureau                self.cus_vars[group_name] = var
133*9b286e76SMarc-André Lureau                group_texts += self._pp_cus_var(group_text)
134*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_for, group_texts)
135*9b286e76SMarc-André Lureau        return xml_str
136*9b286e76SMarc-André Lureau
137*9b286e76SMarc-André Lureau    def _pp_error_warning(self, xml_str: str) -> str:
138*9b286e76SMarc-André Lureau        error_regex = r"<\?error\s*\"([^\"]+)\"\s*\?>"
139*9b286e76SMarc-André Lureau        matches = re.findall(error_regex, xml_str)
140*9b286e76SMarc-André Lureau        for group_var in matches:
141*9b286e76SMarc-André Lureau            raise RuntimeError("[Error]: " + group_var)
142*9b286e76SMarc-André Lureau        warning_regex = r"(<\?warning\s*\"([^\"]+)\"\s*\?>)"
143*9b286e76SMarc-André Lureau        matches = re.findall(warning_regex, xml_str)
144*9b286e76SMarc-André Lureau        for group_wrn, group_var in matches:
145*9b286e76SMarc-André Lureau            print("[Warning]: " + group_var)
146*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_wrn, "")
147*9b286e76SMarc-André Lureau        return xml_str
148*9b286e76SMarc-André Lureau
149*9b286e76SMarc-André Lureau    def _pp_if_eval(self, xml_str: str) -> str:
150*9b286e76SMarc-André Lureau        ifelif_regex = (
151*9b286e76SMarc-André Lureau            r"(<\?(if|elseif)\s*([^\"\s=<>!]+)\s*([!=<>]+)\s*\"*([^\"=<>!]+)\"*\s*\?>)"
152*9b286e76SMarc-André Lureau        )
153*9b286e76SMarc-André Lureau        matches = re.findall(ifelif_regex, xml_str)
154*9b286e76SMarc-André Lureau        for ifelif, tag, left, operator, right in matches:
155*9b286e76SMarc-André Lureau            if "<" in operator or ">" in operator:
156*9b286e76SMarc-André Lureau                result = eval(f"{left} {operator} {right}")
157*9b286e76SMarc-André Lureau            else:
158*9b286e76SMarc-André Lureau                result = eval(f'"{left}" {operator} "{right}"')
159*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(ifelif, f"<?{tag} {result}?>")
160*9b286e76SMarc-André Lureau        return xml_str
161*9b286e76SMarc-André Lureau
162*9b286e76SMarc-André Lureau    def _pp_ifdef_ifndef(self, xml_str: str) -> str:
163*9b286e76SMarc-André Lureau        ifndef_regex = r"(<\?(ifdef|ifndef)\s*([\w]+)\s*\?>)"
164*9b286e76SMarc-André Lureau        matches = re.findall(ifndef_regex, xml_str)
165*9b286e76SMarc-André Lureau        for group_ifndef, group_tag, group_var in matches:
166*9b286e76SMarc-André Lureau            if group_tag == "ifdef":
167*9b286e76SMarc-André Lureau                result = group_var in self.cus_vars
168*9b286e76SMarc-André Lureau            else:
169*9b286e76SMarc-André Lureau                result = group_var not in self.cus_vars
170*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_ifndef, f"<?if {result}?>")
171*9b286e76SMarc-André Lureau        return xml_str
172*9b286e76SMarc-André Lureau
173*9b286e76SMarc-André Lureau    def _pp_if_elseif(self, xml_str: str) -> str:
174*9b286e76SMarc-André Lureau        if_elif_else_regex = (
175*9b286e76SMarc-André Lureau            r"(<\?if\s(True|False)\?>"
176*9b286e76SMarc-André Lureau            r"(.*?)"
177*9b286e76SMarc-André Lureau            r"<\?elseif\s(True|False)\?>"
178*9b286e76SMarc-André Lureau            r"(.*?)"
179*9b286e76SMarc-André Lureau            r"<\?else\?>"
180*9b286e76SMarc-André Lureau            r"(.*?)"
181*9b286e76SMarc-André Lureau            r"<\?endif\?>)"
182*9b286e76SMarc-André Lureau        )
183*9b286e76SMarc-André Lureau        if_else_regex = (
184*9b286e76SMarc-André Lureau            r"(<\?if\s(True|False)\?>"
185*9b286e76SMarc-André Lureau            r"(.*?)"
186*9b286e76SMarc-André Lureau            r"<\?else\?>"
187*9b286e76SMarc-André Lureau            r"(.*?)"
188*9b286e76SMarc-André Lureau            r"<\?endif\?>)"
189*9b286e76SMarc-André Lureau        )
190*9b286e76SMarc-André Lureau        if_regex = r"(<\?if\s(True|False)\?>(.*?)<\?endif\?>)"
191*9b286e76SMarc-André Lureau        matches = re.findall(if_elif_else_regex, xml_str, re.DOTALL)
192*9b286e76SMarc-André Lureau        for (group_full, group_if, group_if_elif, group_elif,
193*9b286e76SMarc-André Lureau             group_elif_else, group_else) in matches:
194*9b286e76SMarc-André Lureau            result = ""
195*9b286e76SMarc-André Lureau            if group_if == "True":
196*9b286e76SMarc-André Lureau                result = group_if_elif
197*9b286e76SMarc-André Lureau            elif group_elif == "True":
198*9b286e76SMarc-André Lureau                result = group_elif_else
199*9b286e76SMarc-André Lureau            else:
200*9b286e76SMarc-André Lureau                result = group_else
201*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
202*9b286e76SMarc-André Lureau        matches = re.findall(if_else_regex, xml_str, re.DOTALL)
203*9b286e76SMarc-André Lureau        for group_full, group_if, group_if_else, group_else in matches:
204*9b286e76SMarc-André Lureau            result = ""
205*9b286e76SMarc-André Lureau            if group_if == "True":
206*9b286e76SMarc-André Lureau                result = group_if_else
207*9b286e76SMarc-André Lureau            else:
208*9b286e76SMarc-André Lureau                result = group_else
209*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
210*9b286e76SMarc-André Lureau        matches = re.findall(if_regex, xml_str, re.DOTALL)
211*9b286e76SMarc-André Lureau        for group_full, group_if, group_text in matches:
212*9b286e76SMarc-André Lureau            result = ""
213*9b286e76SMarc-André Lureau            if group_if == "True":
214*9b286e76SMarc-André Lureau                result = group_text
215*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
216*9b286e76SMarc-André Lureau        return xml_str
217*9b286e76SMarc-André Lureau
218*9b286e76SMarc-André Lureau    def _pp_command(self, xml_str: str) -> str:
219*9b286e76SMarc-André Lureau        cmd_regex = r"(<\?cmd\s*\"([^\"]+)\"\s*\?>)"
220*9b286e76SMarc-André Lureau        matches = re.findall(cmd_regex, xml_str)
221*9b286e76SMarc-André Lureau        for group_cmd, group_exec in matches:
222*9b286e76SMarc-André Lureau            output = subprocess.check_output(
223*9b286e76SMarc-André Lureau                group_exec, shell=True,
224*9b286e76SMarc-André Lureau                text=True, stderr=subprocess.STDOUT
225*9b286e76SMarc-André Lureau            )
226*9b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_cmd, output)
227*9b286e76SMarc-André Lureau        return xml_str
228*9b286e76SMarc-André Lureau
229*9b286e76SMarc-André Lureau    def _pp_blanks(self, xml_str: str) -> str:
230*9b286e76SMarc-André Lureau        right_blank_regex = r">[\n\s\t\r]*"
231*9b286e76SMarc-André Lureau        left_blank_regex = r"[\n\s\t\r]*<"
232*9b286e76SMarc-André Lureau        xml_str = re.sub(right_blank_regex, ">", xml_str)
233*9b286e76SMarc-André Lureau        xml_str = re.sub(left_blank_regex, "<", xml_str)
234*9b286e76SMarc-André Lureau        return xml_str
235*9b286e76SMarc-André Lureau
236*9b286e76SMarc-André Lureau    def preprocess(self, xml_str: str) -> str:
237*9b286e76SMarc-André Lureau        fns = [
238*9b286e76SMarc-André Lureau            self._pp_blanks,
239*9b286e76SMarc-André Lureau            self._pp_include,
240*9b286e76SMarc-André Lureau            self._pp_foreach,
241*9b286e76SMarc-André Lureau            self._pp_env_var,
242*9b286e76SMarc-André Lureau            self._pp_sys_var,
243*9b286e76SMarc-André Lureau            self._pp_cus_var,
244*9b286e76SMarc-André Lureau            self._pp_if_eval,
245*9b286e76SMarc-André Lureau            self._pp_ifdef_ifndef,
246*9b286e76SMarc-André Lureau            self._pp_if_elseif,
247*9b286e76SMarc-André Lureau            self._pp_command,
248*9b286e76SMarc-André Lureau            self._pp_error_warning,
249*9b286e76SMarc-André Lureau        ]
250*9b286e76SMarc-André Lureau
251*9b286e76SMarc-André Lureau        while True:
252*9b286e76SMarc-André Lureau            changed = False
253*9b286e76SMarc-André Lureau            for func in fns:
254*9b286e76SMarc-André Lureau                out_xml = func(xml_str)
255*9b286e76SMarc-André Lureau                if not changed and out_xml != xml_str:
256*9b286e76SMarc-André Lureau                    changed = True
257*9b286e76SMarc-André Lureau                xml_str = out_xml
258*9b286e76SMarc-André Lureau            if not changed:
259*9b286e76SMarc-André Lureau                break
260*9b286e76SMarc-André Lureau
261*9b286e76SMarc-André Lureau        return xml_str
262*9b286e76SMarc-André Lureau
263*9b286e76SMarc-André Lureau
264*9b286e76SMarc-André Lureaudef preprocess_xml(path: str) -> str:
265*9b286e76SMarc-André Lureau    with open(path, "r", encoding="utf-8") as original_file:
266*9b286e76SMarc-André Lureau        input_xml = original_file.read()
267*9b286e76SMarc-André Lureau
268*9b286e76SMarc-André Lureau        proc = Preprocessor()
269*9b286e76SMarc-André Lureau        return proc.preprocess(input_xml)
270*9b286e76SMarc-André Lureau
271*9b286e76SMarc-André Lureau
272*9b286e76SMarc-André Lureaudef save_xml(xml_str: str, path: Optional[str]):
273*9b286e76SMarc-André Lureau    xml = minidom.parseString(xml_str)
274*9b286e76SMarc-André Lureau    with open(path, "w", encoding="utf-8") if path else sys.stdout as output_file:
275*9b286e76SMarc-André Lureau        output_file.write(xml.toprettyxml())
276*9b286e76SMarc-André Lureau
277*9b286e76SMarc-André Lureau
278*9b286e76SMarc-André Lureaudef main():
279*9b286e76SMarc-André Lureau    if len(sys.argv) < 2:
280*9b286e76SMarc-André Lureau        print("Usage: xml-preprocessor input.xml [output.xml]")
281*9b286e76SMarc-André Lureau        sys.exit(1)
282*9b286e76SMarc-André Lureau
283*9b286e76SMarc-André Lureau    output_file = None
284*9b286e76SMarc-André Lureau    if len(sys.argv) == 3:
285*9b286e76SMarc-André Lureau        output_file = sys.argv[2]
286*9b286e76SMarc-André Lureau
287*9b286e76SMarc-André Lureau    input_file = sys.argv[1]
288*9b286e76SMarc-André Lureau    output_xml = preprocess_xml(input_file)
289*9b286e76SMarc-André Lureau    save_xml(output_xml, output_file)
290*9b286e76SMarc-André Lureau
291*9b286e76SMarc-André Lureau
292*9b286e76SMarc-André Lureauif __name__ == "__main__":
293*9b286e76SMarc-André Lureau    main()
294