1*9b286e76SMarc-André Lureau#!/usr/bin/env python3 2*9b286e76SMarc-André Lureau# 3*9b286e76SMarc-André Lureau# Copyright (c) 2017-2019 Tony Su 4*9b286e76SMarc-André Lureau# Copyright (c) 2023 Red Hat, Inc. 5*9b286e76SMarc-André Lureau# 6*9b286e76SMarc-André Lureau# SPDX-License-Identifier: MIT 7*9b286e76SMarc-André Lureau# 8*9b286e76SMarc-André Lureau# Adapted from https://github.com/peitaosu/XML-Preprocessor 9*9b286e76SMarc-André Lureau# 10*9b286e76SMarc-André Lureau"""This is a XML Preprocessor which can be used to process your XML file before 11*9b286e76SMarc-André Lureauyou use it, to process conditional statements, variables, iteration 12*9b286e76SMarc-André Lureaustatements, error/warning, execute command, etc. 13*9b286e76SMarc-André Lureau 14*9b286e76SMarc-André Lureau## XML Schema 15*9b286e76SMarc-André Lureau 16*9b286e76SMarc-André Lureau### Include Files 17*9b286e76SMarc-André Lureau``` 18*9b286e76SMarc-André Lureau<?include path/to/file ?> 19*9b286e76SMarc-André Lureau``` 20*9b286e76SMarc-André Lureau 21*9b286e76SMarc-André Lureau### Variables 22*9b286e76SMarc-André Lureau``` 23*9b286e76SMarc-André Lureau$(env.EnvironmentVariable) 24*9b286e76SMarc-André Lureau 25*9b286e76SMarc-André Lureau$(sys.SystemVariable) 26*9b286e76SMarc-André Lureau 27*9b286e76SMarc-André Lureau$(var.CustomVariable) 28*9b286e76SMarc-André Lureau``` 29*9b286e76SMarc-André Lureau 30*9b286e76SMarc-André Lureau### Conditional Statements 31*9b286e76SMarc-André Lureau``` 32*9b286e76SMarc-André Lureau<?if ?> 33*9b286e76SMarc-André Lureau 34*9b286e76SMarc-André Lureau<?ifdef ?> 35*9b286e76SMarc-André Lureau 36*9b286e76SMarc-André Lureau<?ifndef ?> 37*9b286e76SMarc-André Lureau 38*9b286e76SMarc-André Lureau<?else?> 39*9b286e76SMarc-André Lureau 40*9b286e76SMarc-André Lureau<?elseif ?> 41*9b286e76SMarc-André Lureau 42*9b286e76SMarc-André Lureau<?endif?> 43*9b286e76SMarc-André Lureau``` 44*9b286e76SMarc-André Lureau 45*9b286e76SMarc-André Lureau### Iteration Statements 46*9b286e76SMarc-André Lureau``` 47*9b286e76SMarc-André Lureau<?foreach VARNAME in 1;2;3?> 48*9b286e76SMarc-André Lureau $(var.VARNAME) 49*9b286e76SMarc-André Lureau<?endforeach?> 50*9b286e76SMarc-André Lureau``` 51*9b286e76SMarc-André Lureau 52*9b286e76SMarc-André Lureau### Errors and Warnings 53*9b286e76SMarc-André Lureau``` 54*9b286e76SMarc-André Lureau<?error "This is error message!" ?> 55*9b286e76SMarc-André Lureau 56*9b286e76SMarc-André Lureau<?warning "This is warning message!" ?> 57*9b286e76SMarc-André Lureau``` 58*9b286e76SMarc-André Lureau 59*9b286e76SMarc-André Lureau### Commands 60*9b286e76SMarc-André Lureau``` 61*9b286e76SMarc-André Lureau<? cmd "echo hello world" ?> 62*9b286e76SMarc-André Lureau``` 63*9b286e76SMarc-André Lureau""" 64*9b286e76SMarc-André Lureau 65*9b286e76SMarc-André Lureauimport os 66*9b286e76SMarc-André Lureauimport platform 67*9b286e76SMarc-André Lureauimport re 68*9b286e76SMarc-André Lureauimport subprocess 69*9b286e76SMarc-André Lureauimport sys 70*9b286e76SMarc-André Lureaufrom typing import Optional 71*9b286e76SMarc-André Lureaufrom xml.dom import minidom 72*9b286e76SMarc-André Lureau 73*9b286e76SMarc-André Lureau 74*9b286e76SMarc-André Lureauclass Preprocessor(): 75*9b286e76SMarc-André Lureau """This class holds the XML preprocessing state""" 76*9b286e76SMarc-André Lureau 77*9b286e76SMarc-André Lureau def __init__(self): 78*9b286e76SMarc-André Lureau self.sys_vars = { 79*9b286e76SMarc-André Lureau "ARCH": platform.architecture()[0], 80*9b286e76SMarc-André Lureau "SOURCE": os.path.abspath(__file__), 81*9b286e76SMarc-André Lureau "CURRENT": os.getcwd(), 82*9b286e76SMarc-André Lureau } 83*9b286e76SMarc-André Lureau self.cus_vars = {} 84*9b286e76SMarc-André Lureau 85*9b286e76SMarc-André Lureau def _pp_include(self, xml_str: str) -> str: 86*9b286e76SMarc-André Lureau include_regex = r"(<\?include([\w\s\\/.:_-]+)\s*\?>)" 87*9b286e76SMarc-André Lureau matches = re.findall(include_regex, xml_str) 88*9b286e76SMarc-André Lureau for group_inc, group_xml in matches: 89*9b286e76SMarc-André Lureau inc_file_path = group_xml.strip() 90*9b286e76SMarc-André Lureau with open(inc_file_path, "r", encoding="utf-8") as inc_file: 91*9b286e76SMarc-André Lureau inc_file_content = inc_file.read() 92*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_inc, inc_file_content) 93*9b286e76SMarc-André Lureau return xml_str 94*9b286e76SMarc-André Lureau 95*9b286e76SMarc-André Lureau def _pp_env_var(self, xml_str: str) -> str: 96*9b286e76SMarc-André Lureau envvar_regex = r"(\$\(env\.(\w+)\))" 97*9b286e76SMarc-André Lureau matches = re.findall(envvar_regex, xml_str) 98*9b286e76SMarc-André Lureau for group_env, group_var in matches: 99*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_env, os.environ[group_var]) 100*9b286e76SMarc-André Lureau return xml_str 101*9b286e76SMarc-André Lureau 102*9b286e76SMarc-André Lureau def _pp_sys_var(self, xml_str: str) -> str: 103*9b286e76SMarc-André Lureau sysvar_regex = r"(\$\(sys\.(\w+)\))" 104*9b286e76SMarc-André Lureau matches = re.findall(sysvar_regex, xml_str) 105*9b286e76SMarc-André Lureau for group_sys, group_var in matches: 106*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_sys, self.sys_vars[group_var]) 107*9b286e76SMarc-André Lureau return xml_str 108*9b286e76SMarc-André Lureau 109*9b286e76SMarc-André Lureau def _pp_cus_var(self, xml_str: str) -> str: 110*9b286e76SMarc-André Lureau define_regex = r"(<\?define\s*(\w+)\s*=\s*([\w\s\"]+)\s*\?>)" 111*9b286e76SMarc-André Lureau matches = re.findall(define_regex, xml_str) 112*9b286e76SMarc-André Lureau for group_def, group_name, group_var in matches: 113*9b286e76SMarc-André Lureau group_name = group_name.strip() 114*9b286e76SMarc-André Lureau group_var = group_var.strip().strip("\"") 115*9b286e76SMarc-André Lureau self.cus_vars[group_name] = group_var 116*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_def, "") 117*9b286e76SMarc-André Lureau cusvar_regex = r"(\$\(var\.(\w+)\))" 118*9b286e76SMarc-André Lureau matches = re.findall(cusvar_regex, xml_str) 119*9b286e76SMarc-André Lureau for group_cus, group_var in matches: 120*9b286e76SMarc-André Lureau xml_str = xml_str.replace( 121*9b286e76SMarc-André Lureau group_cus, 122*9b286e76SMarc-André Lureau self.cus_vars.get(group_var, "") 123*9b286e76SMarc-André Lureau ) 124*9b286e76SMarc-André Lureau return xml_str 125*9b286e76SMarc-André Lureau 126*9b286e76SMarc-André Lureau def _pp_foreach(self, xml_str: str) -> str: 127*9b286e76SMarc-André Lureau foreach_regex = r"(<\?foreach\s+(\w+)\s+in\s+([\w;]+)\s*\?>(.*)<\?endforeach\?>)" 128*9b286e76SMarc-André Lureau matches = re.findall(foreach_regex, xml_str) 129*9b286e76SMarc-André Lureau for group_for, group_name, group_vars, group_text in matches: 130*9b286e76SMarc-André Lureau group_texts = "" 131*9b286e76SMarc-André Lureau for var in group_vars.split(";"): 132*9b286e76SMarc-André Lureau self.cus_vars[group_name] = var 133*9b286e76SMarc-André Lureau group_texts += self._pp_cus_var(group_text) 134*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_for, group_texts) 135*9b286e76SMarc-André Lureau return xml_str 136*9b286e76SMarc-André Lureau 137*9b286e76SMarc-André Lureau def _pp_error_warning(self, xml_str: str) -> str: 138*9b286e76SMarc-André Lureau error_regex = r"<\?error\s*\"([^\"]+)\"\s*\?>" 139*9b286e76SMarc-André Lureau matches = re.findall(error_regex, xml_str) 140*9b286e76SMarc-André Lureau for group_var in matches: 141*9b286e76SMarc-André Lureau raise RuntimeError("[Error]: " + group_var) 142*9b286e76SMarc-André Lureau warning_regex = r"(<\?warning\s*\"([^\"]+)\"\s*\?>)" 143*9b286e76SMarc-André Lureau matches = re.findall(warning_regex, xml_str) 144*9b286e76SMarc-André Lureau for group_wrn, group_var in matches: 145*9b286e76SMarc-André Lureau print("[Warning]: " + group_var) 146*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_wrn, "") 147*9b286e76SMarc-André Lureau return xml_str 148*9b286e76SMarc-André Lureau 149*9b286e76SMarc-André Lureau def _pp_if_eval(self, xml_str: str) -> str: 150*9b286e76SMarc-André Lureau ifelif_regex = ( 151*9b286e76SMarc-André Lureau r"(<\?(if|elseif)\s*([^\"\s=<>!]+)\s*([!=<>]+)\s*\"*([^\"=<>!]+)\"*\s*\?>)" 152*9b286e76SMarc-André Lureau ) 153*9b286e76SMarc-André Lureau matches = re.findall(ifelif_regex, xml_str) 154*9b286e76SMarc-André Lureau for ifelif, tag, left, operator, right in matches: 155*9b286e76SMarc-André Lureau if "<" in operator or ">" in operator: 156*9b286e76SMarc-André Lureau result = eval(f"{left} {operator} {right}") 157*9b286e76SMarc-André Lureau else: 158*9b286e76SMarc-André Lureau result = eval(f'"{left}" {operator} "{right}"') 159*9b286e76SMarc-André Lureau xml_str = xml_str.replace(ifelif, f"<?{tag} {result}?>") 160*9b286e76SMarc-André Lureau return xml_str 161*9b286e76SMarc-André Lureau 162*9b286e76SMarc-André Lureau def _pp_ifdef_ifndef(self, xml_str: str) -> str: 163*9b286e76SMarc-André Lureau ifndef_regex = r"(<\?(ifdef|ifndef)\s*([\w]+)\s*\?>)" 164*9b286e76SMarc-André Lureau matches = re.findall(ifndef_regex, xml_str) 165*9b286e76SMarc-André Lureau for group_ifndef, group_tag, group_var in matches: 166*9b286e76SMarc-André Lureau if group_tag == "ifdef": 167*9b286e76SMarc-André Lureau result = group_var in self.cus_vars 168*9b286e76SMarc-André Lureau else: 169*9b286e76SMarc-André Lureau result = group_var not in self.cus_vars 170*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_ifndef, f"<?if {result}?>") 171*9b286e76SMarc-André Lureau return xml_str 172*9b286e76SMarc-André Lureau 173*9b286e76SMarc-André Lureau def _pp_if_elseif(self, xml_str: str) -> str: 174*9b286e76SMarc-André Lureau if_elif_else_regex = ( 175*9b286e76SMarc-André Lureau r"(<\?if\s(True|False)\?>" 176*9b286e76SMarc-André Lureau r"(.*?)" 177*9b286e76SMarc-André Lureau r"<\?elseif\s(True|False)\?>" 178*9b286e76SMarc-André Lureau r"(.*?)" 179*9b286e76SMarc-André Lureau r"<\?else\?>" 180*9b286e76SMarc-André Lureau r"(.*?)" 181*9b286e76SMarc-André Lureau r"<\?endif\?>)" 182*9b286e76SMarc-André Lureau ) 183*9b286e76SMarc-André Lureau if_else_regex = ( 184*9b286e76SMarc-André Lureau r"(<\?if\s(True|False)\?>" 185*9b286e76SMarc-André Lureau r"(.*?)" 186*9b286e76SMarc-André Lureau r"<\?else\?>" 187*9b286e76SMarc-André Lureau r"(.*?)" 188*9b286e76SMarc-André Lureau r"<\?endif\?>)" 189*9b286e76SMarc-André Lureau ) 190*9b286e76SMarc-André Lureau if_regex = r"(<\?if\s(True|False)\?>(.*?)<\?endif\?>)" 191*9b286e76SMarc-André Lureau matches = re.findall(if_elif_else_regex, xml_str, re.DOTALL) 192*9b286e76SMarc-André Lureau for (group_full, group_if, group_if_elif, group_elif, 193*9b286e76SMarc-André Lureau group_elif_else, group_else) in matches: 194*9b286e76SMarc-André Lureau result = "" 195*9b286e76SMarc-André Lureau if group_if == "True": 196*9b286e76SMarc-André Lureau result = group_if_elif 197*9b286e76SMarc-André Lureau elif group_elif == "True": 198*9b286e76SMarc-André Lureau result = group_elif_else 199*9b286e76SMarc-André Lureau else: 200*9b286e76SMarc-André Lureau result = group_else 201*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 202*9b286e76SMarc-André Lureau matches = re.findall(if_else_regex, xml_str, re.DOTALL) 203*9b286e76SMarc-André Lureau for group_full, group_if, group_if_else, group_else in matches: 204*9b286e76SMarc-André Lureau result = "" 205*9b286e76SMarc-André Lureau if group_if == "True": 206*9b286e76SMarc-André Lureau result = group_if_else 207*9b286e76SMarc-André Lureau else: 208*9b286e76SMarc-André Lureau result = group_else 209*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 210*9b286e76SMarc-André Lureau matches = re.findall(if_regex, xml_str, re.DOTALL) 211*9b286e76SMarc-André Lureau for group_full, group_if, group_text in matches: 212*9b286e76SMarc-André Lureau result = "" 213*9b286e76SMarc-André Lureau if group_if == "True": 214*9b286e76SMarc-André Lureau result = group_text 215*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 216*9b286e76SMarc-André Lureau return xml_str 217*9b286e76SMarc-André Lureau 218*9b286e76SMarc-André Lureau def _pp_command(self, xml_str: str) -> str: 219*9b286e76SMarc-André Lureau cmd_regex = r"(<\?cmd\s*\"([^\"]+)\"\s*\?>)" 220*9b286e76SMarc-André Lureau matches = re.findall(cmd_regex, xml_str) 221*9b286e76SMarc-André Lureau for group_cmd, group_exec in matches: 222*9b286e76SMarc-André Lureau output = subprocess.check_output( 223*9b286e76SMarc-André Lureau group_exec, shell=True, 224*9b286e76SMarc-André Lureau text=True, stderr=subprocess.STDOUT 225*9b286e76SMarc-André Lureau ) 226*9b286e76SMarc-André Lureau xml_str = xml_str.replace(group_cmd, output) 227*9b286e76SMarc-André Lureau return xml_str 228*9b286e76SMarc-André Lureau 229*9b286e76SMarc-André Lureau def _pp_blanks(self, xml_str: str) -> str: 230*9b286e76SMarc-André Lureau right_blank_regex = r">[\n\s\t\r]*" 231*9b286e76SMarc-André Lureau left_blank_regex = r"[\n\s\t\r]*<" 232*9b286e76SMarc-André Lureau xml_str = re.sub(right_blank_regex, ">", xml_str) 233*9b286e76SMarc-André Lureau xml_str = re.sub(left_blank_regex, "<", xml_str) 234*9b286e76SMarc-André Lureau return xml_str 235*9b286e76SMarc-André Lureau 236*9b286e76SMarc-André Lureau def preprocess(self, xml_str: str) -> str: 237*9b286e76SMarc-André Lureau fns = [ 238*9b286e76SMarc-André Lureau self._pp_blanks, 239*9b286e76SMarc-André Lureau self._pp_include, 240*9b286e76SMarc-André Lureau self._pp_foreach, 241*9b286e76SMarc-André Lureau self._pp_env_var, 242*9b286e76SMarc-André Lureau self._pp_sys_var, 243*9b286e76SMarc-André Lureau self._pp_cus_var, 244*9b286e76SMarc-André Lureau self._pp_if_eval, 245*9b286e76SMarc-André Lureau self._pp_ifdef_ifndef, 246*9b286e76SMarc-André Lureau self._pp_if_elseif, 247*9b286e76SMarc-André Lureau self._pp_command, 248*9b286e76SMarc-André Lureau self._pp_error_warning, 249*9b286e76SMarc-André Lureau ] 250*9b286e76SMarc-André Lureau 251*9b286e76SMarc-André Lureau while True: 252*9b286e76SMarc-André Lureau changed = False 253*9b286e76SMarc-André Lureau for func in fns: 254*9b286e76SMarc-André Lureau out_xml = func(xml_str) 255*9b286e76SMarc-André Lureau if not changed and out_xml != xml_str: 256*9b286e76SMarc-André Lureau changed = True 257*9b286e76SMarc-André Lureau xml_str = out_xml 258*9b286e76SMarc-André Lureau if not changed: 259*9b286e76SMarc-André Lureau break 260*9b286e76SMarc-André Lureau 261*9b286e76SMarc-André Lureau return xml_str 262*9b286e76SMarc-André Lureau 263*9b286e76SMarc-André Lureau 264*9b286e76SMarc-André Lureaudef preprocess_xml(path: str) -> str: 265*9b286e76SMarc-André Lureau with open(path, "r", encoding="utf-8") as original_file: 266*9b286e76SMarc-André Lureau input_xml = original_file.read() 267*9b286e76SMarc-André Lureau 268*9b286e76SMarc-André Lureau proc = Preprocessor() 269*9b286e76SMarc-André Lureau return proc.preprocess(input_xml) 270*9b286e76SMarc-André Lureau 271*9b286e76SMarc-André Lureau 272*9b286e76SMarc-André Lureaudef save_xml(xml_str: str, path: Optional[str]): 273*9b286e76SMarc-André Lureau xml = minidom.parseString(xml_str) 274*9b286e76SMarc-André Lureau with open(path, "w", encoding="utf-8") if path else sys.stdout as output_file: 275*9b286e76SMarc-André Lureau output_file.write(xml.toprettyxml()) 276*9b286e76SMarc-André Lureau 277*9b286e76SMarc-André Lureau 278*9b286e76SMarc-André Lureaudef main(): 279*9b286e76SMarc-André Lureau if len(sys.argv) < 2: 280*9b286e76SMarc-André Lureau print("Usage: xml-preprocessor input.xml [output.xml]") 281*9b286e76SMarc-André Lureau sys.exit(1) 282*9b286e76SMarc-André Lureau 283*9b286e76SMarc-André Lureau output_file = None 284*9b286e76SMarc-André Lureau if len(sys.argv) == 3: 285*9b286e76SMarc-André Lureau output_file = sys.argv[2] 286*9b286e76SMarc-André Lureau 287*9b286e76SMarc-André Lureau input_file = sys.argv[1] 288*9b286e76SMarc-André Lureau output_xml = preprocess_xml(input_file) 289*9b286e76SMarc-André Lureau save_xml(output_xml, output_file) 290*9b286e76SMarc-André Lureau 291*9b286e76SMarc-André Lureau 292*9b286e76SMarc-André Lureauif __name__ == "__main__": 293*9b286e76SMarc-André Lureau main() 294