xref: /openbmc/openbmc/poky/bitbake/lib/bs4/__init__.py (revision edff49234e31f23dc79f823473c9e286a21596c1)
1"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".
2
3http://www.crummy.com/software/BeautifulSoup/
4
5Beautiful Soup uses a pluggable XML or HTML parser to parse a
6(possibly invalid) document into a tree representation. Beautiful Soup
7provides methods and Pythonic idioms that make it easy to navigate,
8search, and modify the parse tree.
9
10Beautiful Soup works with Python 3.6 and up. It works better if lxml
11and/or html5lib is installed.
12
13For more than you ever wanted to know about Beautiful Soup, see the
14documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
15"""
16
17__author__ = "Leonard Richardson (leonardr@segfault.org)"
18__version__ = "4.12.3"
19__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
20# Use of this source code is governed by the MIT license.
21__license__ = "MIT"
22
23__all__ = ['BeautifulSoup']
24
25from collections import Counter
26import os
27import re
28import sys
29import traceback
30import warnings
31
32# The very first thing we do is give a useful error if someone is
33# running this code under Python 2.
34if sys.version_info.major < 3:
35    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
36
37from .builder import (
38    builder_registry,
39    ParserRejectedMarkup,
40    XMLParsedAsHTMLWarning,
41    HTMLParserTreeBuilder
42)
43from .dammit import UnicodeDammit
44from .element import (
45    CData,
46    Comment,
47    CSS,
48    DEFAULT_OUTPUT_ENCODING,
49    Declaration,
50    Doctype,
51    NavigableString,
52    PageElement,
53    ProcessingInstruction,
54    PYTHON_SPECIFIC_ENCODINGS,
55    ResultSet,
56    Script,
57    Stylesheet,
58    SoupStrainer,
59    Tag,
60    TemplateString,
61    )
62
63# Define some custom warnings.
64class GuessedAtParserWarning(UserWarning):
65    """The warning issued when BeautifulSoup has to guess what parser to
66    use -- probably because no parser was specified in the constructor.
67    """
68
69class MarkupResemblesLocatorWarning(UserWarning):
70    """The warning issued when BeautifulSoup is given 'markup' that
71    actually looks like a resource locator -- a URL or a path to a file
72    on disk.
73    """
74
75
76class BeautifulSoup(Tag):
77    """A data structure representing a parsed HTML or XML document.
78
79    Most of the methods you'll call on a BeautifulSoup object are inherited from
80    PageElement or Tag.
81
82    Internally, this class defines the basic interface called by the
83    tree builders when converting an HTML/XML document into a data
84    structure. The interface abstracts away the differences between
85    parsers. To write a new tree builder, you'll need to understand
86    these methods as a whole.
87
88    These methods will be called by the BeautifulSoup constructor:
89      * reset()
90      * feed(markup)
91
92    The tree builder may call these methods from its feed() implementation:
93      * handle_starttag(name, attrs) # See note about return value
94      * handle_endtag(name)
95      * handle_data(data) # Appends to the current data node
96      * endData(containerClass) # Ends the current data node
97
98    No matter how complicated the underlying parser is, you should be
99    able to build a tree using 'start tag' events, 'end tag' events,
100    'data' events, and "done with data" events.
101
102    If you encounter an empty-element tag (aka a self-closing tag,
103    like HTML's <br> tag), call handle_starttag and then
104    handle_endtag.
105    """
106
107    # Since BeautifulSoup subclasses Tag, it's possible to treat it as
108    # a Tag with a .name. This name makes it clear the BeautifulSoup
109    # object isn't a real markup tag.
110    ROOT_TAG_NAME = '[document]'
111
112    # If the end-user gives no indication which tree builder they
113    # want, look for one with these features.
114    DEFAULT_BUILDER_FEATURES = ['html', 'fast']
115
116    # A string containing all ASCII whitespace characters, used in
117    # endData() to detect data chunks that seem 'empty'.
118    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
119
120    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
121
122    def __init__(self, markup="", features=None, builder=None,
123                 parse_only=None, from_encoding=None, exclude_encodings=None,
124                 element_classes=None, **kwargs):
125        """Constructor.
126
127        :param markup: A string or a file-like object representing
128         markup to be parsed.
129
130        :param features: Desirable features of the parser to be
131         used. This may be the name of a specific parser ("lxml",
132         "lxml-xml", "html.parser", or "html5lib") or it may be the
133         type of markup to be used ("html", "html5", "xml"). It's
134         recommended that you name a specific parser, so that
135         Beautiful Soup gives you the same results across platforms
136         and virtual environments.
137
138        :param builder: A TreeBuilder subclass to instantiate (or
139         instance to use) instead of looking one up based on
140         `features`. You only need to use this if you've implemented a
141         custom TreeBuilder.
142
143        :param parse_only: A SoupStrainer. Only parts of the document
144         matching the SoupStrainer will be considered. This is useful
145         when parsing part of a document that would otherwise be too
146         large to fit into memory.
147
148        :param from_encoding: A string indicating the encoding of the
149         document to be parsed. Pass this in if Beautiful Soup is
150         guessing wrongly about the document's encoding.
151
152        :param exclude_encodings: A list of strings indicating
153         encodings known to be wrong. Pass this in if you don't know
154         the document's encoding but you know Beautiful Soup's guess is
155         wrong.
156
157        :param element_classes: A dictionary mapping BeautifulSoup
158         classes like Tag and NavigableString, to other classes you'd
159         like to be instantiated instead as the parse tree is
160         built. This is useful for subclassing Tag or NavigableString
161         to modify default behavior.
162
163        :param kwargs: For backwards compatibility purposes, the
164         constructor accepts certain keyword arguments used in
165         Beautiful Soup 3. None of these arguments do anything in
166         Beautiful Soup 4; they will result in a warning and then be
167         ignored.
168
169         Apart from this, any keyword arguments passed into the
170         BeautifulSoup constructor are propagated to the TreeBuilder
171         constructor. This makes it possible to configure a
172         TreeBuilder by passing in arguments, not just by saying which
173         one to use.
174        """
175        if 'convertEntities' in kwargs:
176            del kwargs['convertEntities']
177            warnings.warn(
178                "BS4 does not respect the convertEntities argument to the "
179                "BeautifulSoup constructor. Entities are always converted "
180                "to Unicode characters.")
181
182        if 'markupMassage' in kwargs:
183            del kwargs['markupMassage']
184            warnings.warn(
185                "BS4 does not respect the markupMassage argument to the "
186                "BeautifulSoup constructor. The tree builder is responsible "
187                "for any necessary markup massage.")
188
189        if 'smartQuotesTo' in kwargs:
190            del kwargs['smartQuotesTo']
191            warnings.warn(
192                "BS4 does not respect the smartQuotesTo argument to the "
193                "BeautifulSoup constructor. Smart quotes are always converted "
194                "to Unicode characters.")
195
196        if 'selfClosingTags' in kwargs:
197            del kwargs['selfClosingTags']
198            warnings.warn(
199                "BS4 does not respect the selfClosingTags argument to the "
200                "BeautifulSoup constructor. The tree builder is responsible "
201                "for understanding self-closing tags.")
202
203        if 'isHTML' in kwargs:
204            del kwargs['isHTML']
205            warnings.warn(
206                "BS4 does not respect the isHTML argument to the "
207                "BeautifulSoup constructor. Suggest you use "
208                "features='lxml' for HTML and features='lxml-xml' for "
209                "XML.")
210
211        def deprecated_argument(old_name, new_name):
212            if old_name in kwargs:
213                warnings.warn(
214                    'The "%s" argument to the BeautifulSoup constructor '
215                    'has been renamed to "%s."' % (old_name, new_name),
216                    DeprecationWarning, stacklevel=3
217                )
218                return kwargs.pop(old_name)
219            return None
220
221        parse_only = parse_only or deprecated_argument(
222            "parseOnlyThese", "parse_only")
223
224        from_encoding = from_encoding or deprecated_argument(
225            "fromEncoding", "from_encoding")
226
227        if from_encoding and isinstance(markup, str):
228            warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
229            from_encoding = None
230
231        self.element_classes = element_classes or dict()
232
233        # We need this information to track whether or not the builder
234        # was specified well enough that we can omit the 'you need to
235        # specify a parser' warning.
236        original_builder = builder
237        original_features = features
238
239        if isinstance(builder, type):
240            # A builder class was passed in; it needs to be instantiated.
241            builder_class = builder
242            builder = None
243        elif builder is None:
244            if isinstance(features, str):
245                features = [features]
246            if features is None or len(features) == 0:
247                features = self.DEFAULT_BUILDER_FEATURES
248            builder_class = builder_registry.lookup(*features)
249            if builder_class is None:
250                raise FeatureNotFound(
251                    "Couldn't find a tree builder with the features you "
252                    "requested: %s. Do you need to install a parser library?"
253                    % ",".join(features))
254
255        # At this point either we have a TreeBuilder instance in
256        # builder, or we have a builder_class that we can instantiate
257        # with the remaining **kwargs.
258        if builder is None:
259            builder = builder_class(**kwargs)
260            if not original_builder and not (
261                    original_features == builder.NAME or
262                    original_features in builder.ALTERNATE_NAMES
263            ) and markup:
264                # The user did not tell us which TreeBuilder to use,
265                # and we had to guess. Issue a warning.
266                if builder.is_xml:
267                    markup_type = "XML"
268                else:
269                    markup_type = "HTML"
270
271                # This code adapted from warnings.py so that we get the same line
272                # of code as our warnings.warn() call gets, even if the answer is wrong
273                # (as it may be in a multithreading situation).
274                caller = None
275                try:
276                    caller = sys._getframe(1)
277                except ValueError:
278                    pass
279                if caller:
280                    globals = caller.f_globals
281                    line_number = caller.f_lineno
282                else:
283                    globals = sys.__dict__
284                    line_number= 1
285                filename = globals.get('__file__')
286                if filename:
287                    fnl = filename.lower()
288                    if fnl.endswith((".pyc", ".pyo")):
289                        filename = filename[:-1]
290                if filename:
291                    # If there is no filename at all, the user is most likely in a REPL,
292                    # and the warning is not necessary.
293                    values = dict(
294                        filename=filename,
295                        line_number=line_number,
296                        parser=builder.NAME,
297                        markup_type=markup_type
298                    )
299                    warnings.warn(
300                        self.NO_PARSER_SPECIFIED_WARNING % values,
301                        GuessedAtParserWarning, stacklevel=2
302                    )
303        else:
304            if kwargs:
305                warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
306
307        self.builder = builder
308        self.is_xml = builder.is_xml
309        self.known_xml = self.is_xml
310        self._namespaces = dict()
311        self.parse_only = parse_only
312
313        if hasattr(markup, 'read'):        # It's a file-type object.
314            markup = markup.read()
315        elif len(markup) <= 256 and (
316                (isinstance(markup, bytes) and not b'<' in markup)
317                or (isinstance(markup, str) and not '<' in markup)
318        ):
319            # Issue warnings for a couple beginner problems
320            # involving passing non-markup to Beautiful Soup.
321            # Beautiful Soup will still parse the input as markup,
322            # since that is sometimes the intended behavior.
323            if not self._markup_is_url(markup):
324                self._markup_resembles_filename(markup)
325
326        rejections = []
327        success = False
328        for (self.markup, self.original_encoding, self.declared_html_encoding,
329         self.contains_replacement_characters) in (
330             self.builder.prepare_markup(
331                 markup, from_encoding, exclude_encodings=exclude_encodings)):
332            self.reset()
333            self.builder.initialize_soup(self)
334            try:
335                self._feed()
336                success = True
337                break
338            except ParserRejectedMarkup as e:
339                rejections.append(e)
340                pass
341
342        if not success:
343            other_exceptions = [str(e) for e in rejections]
344            raise ParserRejectedMarkup(
345                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
346            )
347
348        # Clear out the markup and remove the builder's circular
349        # reference to this object.
350        self.markup = None
351        self.builder.soup = None
352
353    def _clone(self):
354        """Create a new BeautifulSoup object with the same TreeBuilder,
355        but not associated with any markup.
356
357        This is the first step of the deepcopy process.
358        """
359        clone = type(self)("", None, self.builder)
360
361        # Keep track of the encoding of the original document,
362        # since we won't be parsing it again.
363        clone.original_encoding = self.original_encoding
364        return clone
365
366    def __getstate__(self):
367        # Frequently a tree builder can't be pickled.
368        d = dict(self.__dict__)
369        if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
370            d['builder'] = type(self.builder)
371        # Store the contents as a Unicode string.
372        d['contents'] = []
373        d['markup'] = self.decode()
374
375        # If _most_recent_element is present, it's a Tag object left
376        # over from initial parse. It might not be picklable and we
377        # don't need it.
378        if '_most_recent_element' in d:
379            del d['_most_recent_element']
380        return d
381
382    def __setstate__(self, state):
383        # If necessary, restore the TreeBuilder by looking it up.
384        self.__dict__ = state
385        if isinstance(self.builder, type):
386            self.builder = self.builder()
387        elif not self.builder:
388            # We don't know which builder was used to build this
389            # parse tree, so use a default we know is always available.
390            self.builder = HTMLParserTreeBuilder()
391        self.builder.soup = self
392        self.reset()
393        self._feed()
394        return state
395
396
397    @classmethod
398    def _decode_markup(cls, markup):
399        """Ensure `markup` is bytes so it's safe to send into warnings.warn.
400
401        TODO: warnings.warn had this problem back in 2010 but it might not
402        anymore.
403        """
404        if isinstance(markup, bytes):
405            decoded = markup.decode('utf-8', 'replace')
406        else:
407            decoded = markup
408        return decoded
409
410    @classmethod
411    def _markup_is_url(cls, markup):
412        """Error-handling method to raise a warning if incoming markup looks
413        like a URL.
414
415        :param markup: A string.
416        :return: Whether or not the markup resembles a URL
417            closely enough to justify a warning.
418        """
419        if isinstance(markup, bytes):
420            space = b' '
421            cant_start_with = (b"http:", b"https:")
422        elif isinstance(markup, str):
423            space = ' '
424            cant_start_with = ("http:", "https:")
425        else:
426            return False
427
428        if any(markup.startswith(prefix) for prefix in cant_start_with):
429            if not space in markup:
430                warnings.warn(
431                    'The input looks more like a URL than markup. You may want to use'
432                    ' an HTTP client like requests to get the document behind'
433                    ' the URL, and feed that document to Beautiful Soup.',
434                    MarkupResemblesLocatorWarning,
435                    stacklevel=3
436                )
437                return True
438        return False
439
440    @classmethod
441    def _markup_resembles_filename(cls, markup):
442        """Error-handling method to raise a warning if incoming markup
443        resembles a filename.
444
445        :param markup: A bytestring or string.
446        :return: Whether or not the markup resembles a filename
447            closely enough to justify a warning.
448        """
449        path_characters = '/\\'
450        extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt']
451        if isinstance(markup, bytes):
452            path_characters = path_characters.encode("utf8")
453            extensions = [x.encode('utf8') for x in extensions]
454        filelike = False
455        if any(x in markup for x in path_characters):
456            filelike = True
457        else:
458            lower = markup.lower()
459            if any(lower.endswith(ext) for ext in extensions):
460                filelike = True
461        if filelike:
462            warnings.warn(
463                'The input looks more like a filename than markup. You may'
464                ' want to open this file and pass the filehandle into'
465                ' Beautiful Soup.',
466                MarkupResemblesLocatorWarning, stacklevel=3
467            )
468            return True
469        return False
470
471    def _feed(self):
472        """Internal method that parses previously set markup, creating a large
473        number of Tag and NavigableString objects.
474        """
475        # Convert the document to Unicode.
476        self.builder.reset()
477
478        self.builder.feed(self.markup)
479        # Close out any unfinished strings and close all the open tags.
480        self.endData()
481        while self.currentTag.name != self.ROOT_TAG_NAME:
482            self.popTag()
483
484    def reset(self):
485        """Reset this object to a state as though it had never parsed any
486        markup.
487        """
488        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
489        self.hidden = 1
490        self.builder.reset()
491        self.current_data = []
492        self.currentTag = None
493        self.tagStack = []
494        self.open_tag_counter = Counter()
495        self.preserve_whitespace_tag_stack = []
496        self.string_container_stack = []
497        self._most_recent_element = None
498        self.pushTag(self)
499
500    def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
501                sourceline=None, sourcepos=None, **kwattrs):
502        """Create a new Tag associated with this BeautifulSoup object.
503
504        :param name: The name of the new Tag.
505        :param namespace: The URI of the new Tag's XML namespace, if any.
506        :param prefix: The prefix for the new Tag's XML namespace, if any.
507        :param attrs: A dictionary of this Tag's attribute values; can
508            be used instead of `kwattrs` for attributes like 'class'
509            that are reserved words in Python.
510        :param sourceline: The line number where this tag was
511            (purportedly) found in its source document.
512        :param sourcepos: The character position within `sourceline` where this
513            tag was (purportedly) found.
514        :param kwattrs: Keyword arguments for the new Tag's attribute values.
515
516        """
517        kwattrs.update(attrs)
518        return self.element_classes.get(Tag, Tag)(
519            None, self.builder, name, namespace, nsprefix, kwattrs,
520            sourceline=sourceline, sourcepos=sourcepos
521        )
522
523    def string_container(self, base_class=None):
524        container = base_class or NavigableString
525
526        # There may be a general override of NavigableString.
527        container = self.element_classes.get(
528            container, container
529        )
530
531        # On top of that, we may be inside a tag that needs a special
532        # container class.
533        if self.string_container_stack and container is NavigableString:
534            container = self.builder.string_containers.get(
535                self.string_container_stack[-1].name, container
536            )
537        return container
538
539    def new_string(self, s, subclass=None):
540        """Create a new NavigableString associated with this BeautifulSoup
541        object.
542        """
543        container = self.string_container(subclass)
544        return container(s)
545
546    def insert_before(self, *args):
547        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
548        it because there is nothing before or after it in the parse tree.
549        """
550        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
551
552    def insert_after(self, *args):
553        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
554        it because there is nothing before or after it in the parse tree.
555        """
556        raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
557
558    def popTag(self):
559        """Internal method called by _popToTag when a tag is closed."""
560        tag = self.tagStack.pop()
561        if tag.name in self.open_tag_counter:
562            self.open_tag_counter[tag.name] -= 1
563        if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
564            self.preserve_whitespace_tag_stack.pop()
565        if self.string_container_stack and tag == self.string_container_stack[-1]:
566            self.string_container_stack.pop()
567        #print("Pop", tag.name)
568        if self.tagStack:
569            self.currentTag = self.tagStack[-1]
570        return self.currentTag
571
572    def pushTag(self, tag):
573        """Internal method called by handle_starttag when a tag is opened."""
574        #print("Push", tag.name)
575        if self.currentTag is not None:
576            self.currentTag.contents.append(tag)
577        self.tagStack.append(tag)
578        self.currentTag = self.tagStack[-1]
579        if tag.name != self.ROOT_TAG_NAME:
580            self.open_tag_counter[tag.name] += 1
581        if tag.name in self.builder.preserve_whitespace_tags:
582            self.preserve_whitespace_tag_stack.append(tag)
583        if tag.name in self.builder.string_containers:
584            self.string_container_stack.append(tag)
585
586    def endData(self, containerClass=None):
587        """Method called by the TreeBuilder when the end of a data segment
588        occurs.
589        """
590        if self.current_data:
591            current_data = ''.join(self.current_data)
592            # If whitespace is not preserved, and this string contains
593            # nothing but ASCII spaces, replace it with a single space
594            # or newline.
595            if not self.preserve_whitespace_tag_stack:
596                strippable = True
597                for i in current_data:
598                    if i not in self.ASCII_SPACES:
599                        strippable = False
600                        break
601                if strippable:
602                    if '\n' in current_data:
603                        current_data = '\n'
604                    else:
605                        current_data = ' '
606
607            # Reset the data collector.
608            self.current_data = []
609
610            # Should we add this string to the tree at all?
611            if self.parse_only and len(self.tagStack) <= 1 and \
612                   (not self.parse_only.text or \
613                    not self.parse_only.search(current_data)):
614                return
615
616            containerClass = self.string_container(containerClass)
617            o = containerClass(current_data)
618            self.object_was_parsed(o)
619
620    def object_was_parsed(self, o, parent=None, most_recent_element=None):
621        """Method called by the TreeBuilder to integrate an object into the parse tree."""
622        if parent is None:
623            parent = self.currentTag
624        if most_recent_element is not None:
625            previous_element = most_recent_element
626        else:
627            previous_element = self._most_recent_element
628
629        next_element = previous_sibling = next_sibling = None
630        if isinstance(o, Tag):
631            next_element = o.next_element
632            next_sibling = o.next_sibling
633            previous_sibling = o.previous_sibling
634            if previous_element is None:
635                previous_element = o.previous_element
636
637        fix = parent.next_element is not None
638
639        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
640
641        self._most_recent_element = o
642        parent.contents.append(o)
643
644        # Check if we are inserting into an already parsed node.
645        if fix:
646            self._linkage_fixer(parent)
647
648    def _linkage_fixer(self, el):
649        """Make sure linkage of this fragment is sound."""
650
651        first = el.contents[0]
652        child = el.contents[-1]
653        descendant = child
654
655        if child is first and el.parent is not None:
656            # Parent should be linked to first child
657            el.next_element = child
658            # We are no longer linked to whatever this element is
659            prev_el = child.previous_element
660            if prev_el is not None and prev_el is not el:
661                prev_el.next_element = None
662            # First child should be linked to the parent, and no previous siblings.
663            child.previous_element = el
664            child.previous_sibling = None
665
666        # We have no sibling as we've been appended as the last.
667        child.next_sibling = None
668
669        # This index is a tag, dig deeper for a "last descendant"
670        if isinstance(child, Tag) and child.contents:
671            descendant = child._last_descendant(False)
672
673        # As the final step, link last descendant. It should be linked
674        # to the parent's next sibling (if found), else walk up the chain
675        # and find a parent with a sibling. It should have no next sibling.
676        descendant.next_element = None
677        descendant.next_sibling = None
678        target = el
679        while True:
680            if target is None:
681                break
682            elif target.next_sibling is not None:
683                descendant.next_element = target.next_sibling
684                target.next_sibling.previous_element = child
685                break
686            target = target.parent
687
688    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
689        """Pops the tag stack up to and including the most recent
690        instance of the given tag.
691
692        If there are no open tags with the given name, nothing will be
693        popped.
694
695        :param name: Pop up to the most recent tag with this name.
696        :param nsprefix: The namespace prefix that goes with `name`.
697        :param inclusivePop: It this is false, pops the tag stack up
698          to but *not* including the most recent instqance of the
699          given tag.
700
701        """
702        #print("Popping to %s" % name)
703        if name == self.ROOT_TAG_NAME:
704            # The BeautifulSoup object itself can never be popped.
705            return
706
707        most_recently_popped = None
708
709        stack_size = len(self.tagStack)
710        for i in range(stack_size - 1, 0, -1):
711            if not self.open_tag_counter.get(name):
712                break
713            t = self.tagStack[i]
714            if (name == t.name and nsprefix == t.prefix):
715                if inclusivePop:
716                    most_recently_popped = self.popTag()
717                break
718            most_recently_popped = self.popTag()
719
720        return most_recently_popped
721
722    def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
723                        sourcepos=None, namespaces=None):
724        """Called by the tree builder when a new tag is encountered.
725
726        :param name: Name of the tag.
727        :param nsprefix: Namespace prefix for the tag.
728        :param attrs: A dictionary of attribute values.
729        :param sourceline: The line number where this tag was found in its
730            source document.
731        :param sourcepos: The character position within `sourceline` where this
732            tag was found.
733        :param namespaces: A dictionary of all namespace prefix mappings
734            currently in scope in the document.
735
736        If this method returns None, the tag was rejected by an active
737        SoupStrainer. You should proceed as if the tag had not occurred
738        in the document. For instance, if this was a self-closing tag,
739        don't call handle_endtag.
740        """
741        # print("Start tag %s: %s" % (name, attrs))
742        self.endData()
743
744        if (self.parse_only and len(self.tagStack) <= 1
745            and (self.parse_only.text
746                 or not self.parse_only.search_tag(name, attrs))):
747            return None
748
749        tag = self.element_classes.get(Tag, Tag)(
750            self, self.builder, name, namespace, nsprefix, attrs,
751            self.currentTag, self._most_recent_element,
752            sourceline=sourceline, sourcepos=sourcepos,
753            namespaces=namespaces
754        )
755        if tag is None:
756            return tag
757        if self._most_recent_element is not None:
758            self._most_recent_element.next_element = tag
759        self._most_recent_element = tag
760        self.pushTag(tag)
761        return tag
762
763    def handle_endtag(self, name, nsprefix=None):
764        """Called by the tree builder when an ending tag is encountered.
765
766        :param name: Name of the tag.
767        :param nsprefix: Namespace prefix for the tag.
768        """
769        #print("End tag: " + name)
770        self.endData()
771        self._popToTag(name, nsprefix)
772
773    def handle_data(self, data):
774        """Called by the tree builder when a chunk of textual data is encountered."""
775        self.current_data.append(data)
776
777    def decode(self, pretty_print=False,
778               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
779               formatter="minimal", iterator=None):
780        """Returns a string or Unicode representation of the parse tree
781            as an HTML or XML document.
782
783        :param pretty_print: If this is True, indentation will be used to
784            make the document more readable.
785        :param eventual_encoding: The encoding of the final document.
786            If this is None, the document will be a Unicode string.
787        """
788        if self.is_xml:
789            # Print the XML declaration
790            encoding_part = ''
791            if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:
792                # This is a special Python encoding; it can't actually
793                # go into an XML document because it means nothing
794                # outside of Python.
795                eventual_encoding = None
796            if eventual_encoding != None:
797                encoding_part = ' encoding="%s"' % eventual_encoding
798            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
799        else:
800            prefix = ''
801        if not pretty_print:
802            indent_level = None
803        else:
804            indent_level = 0
805        return prefix + super(BeautifulSoup, self).decode(
806            indent_level, eventual_encoding, formatter, iterator)
807
808# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
809_s = BeautifulSoup
810_soup = BeautifulSoup
811
812class BeautifulStoneSoup(BeautifulSoup):
813    """Deprecated interface to an XML parser."""
814
815    def __init__(self, *args, **kwargs):
816        kwargs['features'] = 'xml'
817        warnings.warn(
818            'The BeautifulStoneSoup class is deprecated. Instead of using '
819            'it, pass features="xml" into the BeautifulSoup constructor.',
820            DeprecationWarning, stacklevel=2
821        )
822        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
823
824
825class StopParsing(Exception):
826    """Exception raised by a TreeBuilder if it's unable to continue parsing."""
827    pass
828
829class FeatureNotFound(ValueError):
830    """Exception raised by the BeautifulSoup constructor if no parser with the
831    requested features is found.
832    """
833    pass
834
835
836#If this file is run as a script, act as an HTML pretty-printer.
837if __name__ == '__main__':
838    import sys
839    soup = BeautifulSoup(sys.stdin)
840    print((soup.prettify()))
841