1"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" 2 3# We don't use soupsieve 4soupsieve = None 5 6 7class CSS(object): 8 """A proxy object against the soupsieve library, to simplify its 9 CSS selector API. 10 11 Acquire this object through the .css attribute on the 12 BeautifulSoup object, or on the Tag you want to use as the 13 starting point for a CSS selector. 14 15 The main advantage of doing this is that the tag to be selected 16 against doesn't need to be explicitly specified in the function 17 calls, since it's already scoped to a tag. 18 """ 19 20 def __init__(self, tag, api=soupsieve): 21 """Constructor. 22 23 You don't need to instantiate this class yourself; instead, 24 access the .css attribute on the BeautifulSoup object, or on 25 the Tag you want to use as the starting point for your CSS 26 selector. 27 28 :param tag: All CSS selectors will use this as their starting 29 point. 30 31 :param api: A plug-in replacement for the soupsieve module, 32 designed mainly for use in tests. 33 """ 34 if api is None: 35 raise NotImplementedError( 36 "Cannot execute CSS selectors because the soupsieve package is not installed." 37 ) 38 self.api = api 39 self.tag = tag 40 41 def escape(self, ident): 42 """Escape a CSS identifier. 43 44 This is a simple wrapper around soupselect.escape(). See the 45 documentation for that function for more information. 46 """ 47 if soupsieve is None: 48 raise NotImplementedError( 49 "Cannot escape CSS identifiers because the soupsieve package is not installed." 50 ) 51 return self.api.escape(ident) 52 53 def _ns(self, ns, select): 54 """Normalize a dictionary of namespaces.""" 55 if not isinstance(select, self.api.SoupSieve) and ns is None: 56 # If the selector is a precompiled pattern, it already has 57 # a namespace context compiled in, which cannot be 58 # replaced. 59 ns = self.tag._namespaces 60 return ns 61 62 def _rs(self, results): 63 """Normalize a list of results to a Resultset. 64 65 A ResultSet is more consistent with the rest of Beautiful 66 Soup's API, and ResultSet.__getattr__ has a helpful error 67 message if you try to treat a list of results as a single 68 result (a common mistake). 69 """ 70 # Import here to avoid circular import 71 from bs4.element import ResultSet 72 return ResultSet(None, results) 73 74 def compile(self, select, namespaces=None, flags=0, **kwargs): 75 """Pre-compile a selector and return the compiled object. 76 77 :param selector: A CSS selector. 78 79 :param namespaces: A dictionary mapping namespace prefixes 80 used in the CSS selector to namespace URIs. By default, 81 Beautiful Soup will use the prefixes it encountered while 82 parsing the document. 83 84 :param flags: Flags to be passed into Soup Sieve's 85 soupsieve.compile() method. 86 87 :param kwargs: Keyword arguments to be passed into SoupSieve's 88 soupsieve.compile() method. 89 90 :return: A precompiled selector object. 91 :rtype: soupsieve.SoupSieve 92 """ 93 return self.api.compile( 94 select, self._ns(namespaces, select), flags, **kwargs 95 ) 96 97 def select_one(self, select, namespaces=None, flags=0, **kwargs): 98 """Perform a CSS selection operation on the current Tag and return the 99 first result. 100 101 This uses the Soup Sieve library. For more information, see 102 that library's documentation for the soupsieve.select_one() 103 method. 104 105 :param selector: A CSS selector. 106 107 :param namespaces: A dictionary mapping namespace prefixes 108 used in the CSS selector to namespace URIs. By default, 109 Beautiful Soup will use the prefixes it encountered while 110 parsing the document. 111 112 :param flags: Flags to be passed into Soup Sieve's 113 soupsieve.select_one() method. 114 115 :param kwargs: Keyword arguments to be passed into SoupSieve's 116 soupsieve.select_one() method. 117 118 :return: A Tag, or None if the selector has no match. 119 :rtype: bs4.element.Tag 120 121 """ 122 return self.api.select_one( 123 select, self.tag, self._ns(namespaces, select), flags, **kwargs 124 ) 125 126 def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): 127 """Perform a CSS selection operation on the current Tag. 128 129 This uses the Soup Sieve library. For more information, see 130 that library's documentation for the soupsieve.select() 131 method. 132 133 :param selector: A string containing a CSS selector. 134 135 :param namespaces: A dictionary mapping namespace prefixes 136 used in the CSS selector to namespace URIs. By default, 137 Beautiful Soup will pass in the prefixes it encountered while 138 parsing the document. 139 140 :param limit: After finding this number of results, stop looking. 141 142 :param flags: Flags to be passed into Soup Sieve's 143 soupsieve.select() method. 144 145 :param kwargs: Keyword arguments to be passed into SoupSieve's 146 soupsieve.select() method. 147 148 :return: A ResultSet of Tag objects. 149 :rtype: bs4.element.ResultSet 150 151 """ 152 if limit is None: 153 limit = 0 154 155 return self._rs( 156 self.api.select( 157 select, self.tag, self._ns(namespaces, select), limit, flags, 158 **kwargs 159 ) 160 ) 161 162 def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): 163 """Perform a CSS selection operation on the current Tag. 164 165 This uses the Soup Sieve library. For more information, see 166 that library's documentation for the soupsieve.iselect() 167 method. It is the same as select(), but it returns a generator 168 instead of a list. 169 170 :param selector: A string containing a CSS selector. 171 172 :param namespaces: A dictionary mapping namespace prefixes 173 used in the CSS selector to namespace URIs. By default, 174 Beautiful Soup will pass in the prefixes it encountered while 175 parsing the document. 176 177 :param limit: After finding this number of results, stop looking. 178 179 :param flags: Flags to be passed into Soup Sieve's 180 soupsieve.iselect() method. 181 182 :param kwargs: Keyword arguments to be passed into SoupSieve's 183 soupsieve.iselect() method. 184 185 :return: A generator 186 :rtype: types.GeneratorType 187 """ 188 return self.api.iselect( 189 select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs 190 ) 191 192 def closest(self, select, namespaces=None, flags=0, **kwargs): 193 """Find the Tag closest to this one that matches the given selector. 194 195 This uses the Soup Sieve library. For more information, see 196 that library's documentation for the soupsieve.closest() 197 method. 198 199 :param selector: A string containing a CSS selector. 200 201 :param namespaces: A dictionary mapping namespace prefixes 202 used in the CSS selector to namespace URIs. By default, 203 Beautiful Soup will pass in the prefixes it encountered while 204 parsing the document. 205 206 :param flags: Flags to be passed into Soup Sieve's 207 soupsieve.closest() method. 208 209 :param kwargs: Keyword arguments to be passed into SoupSieve's 210 soupsieve.closest() method. 211 212 :return: A Tag, or None if there is no match. 213 :rtype: bs4.Tag 214 215 """ 216 return self.api.closest( 217 select, self.tag, self._ns(namespaces, select), flags, **kwargs 218 ) 219 220 def match(self, select, namespaces=None, flags=0, **kwargs): 221 """Check whether this Tag matches the given CSS selector. 222 223 This uses the Soup Sieve library. For more information, see 224 that library's documentation for the soupsieve.match() 225 method. 226 227 :param: a CSS selector. 228 229 :param namespaces: A dictionary mapping namespace prefixes 230 used in the CSS selector to namespace URIs. By default, 231 Beautiful Soup will pass in the prefixes it encountered while 232 parsing the document. 233 234 :param flags: Flags to be passed into Soup Sieve's 235 soupsieve.match() method. 236 237 :param kwargs: Keyword arguments to be passed into SoupSieve's 238 soupsieve.match() method. 239 240 :return: True if this Tag matches the selector; False otherwise. 241 :rtype: bool 242 """ 243 return self.api.match( 244 select, self.tag, self._ns(namespaces, select), flags, **kwargs 245 ) 246 247 def filter(self, select, namespaces=None, flags=0, **kwargs): 248 """Filter this Tag's direct children based on the given CSS selector. 249 250 This uses the Soup Sieve library. It works the same way as 251 passing this Tag into that library's soupsieve.filter() 252 method. More information, for more information see the 253 documentation for soupsieve.filter(). 254 255 :param namespaces: A dictionary mapping namespace prefixes 256 used in the CSS selector to namespace URIs. By default, 257 Beautiful Soup will pass in the prefixes it encountered while 258 parsing the document. 259 260 :param flags: Flags to be passed into Soup Sieve's 261 soupsieve.filter() method. 262 263 :param kwargs: Keyword arguments to be passed into SoupSieve's 264 soupsieve.filter() method. 265 266 :return: A ResultSet of Tag objects. 267 :rtype: bs4.element.ResultSet 268 269 """ 270 return self._rs( 271 self.api.filter( 272 select, self.tag, self._ns(namespaces, select), flags, **kwargs 273 ) 274 ) 275