1"""Tests of the builder registry."""
2
3import unittest
4import warnings
5
6from bs4 import BeautifulSoup
7from bs4.builder import (
8    builder_registry as registry,
9    HTMLParserTreeBuilder,
10    TreeBuilderRegistry,
11)
12
13try:
14    from bs4.builder import HTML5TreeBuilder
15    HTML5LIB_PRESENT = True
16except ImportError:
17    HTML5LIB_PRESENT = False
18
19try:
20    from bs4.builder import (
21        LXMLTreeBuilderForXML,
22        LXMLTreeBuilder,
23        )
24    LXML_PRESENT = True
25except ImportError:
26    LXML_PRESENT = False
27
28
29class BuiltInRegistryTest(unittest.TestCase):
30    """Test the built-in registry with the default builders registered."""
31
32    def test_combination(self):
33        if LXML_PRESENT:
34            self.assertEqual(registry.lookup('fast', 'html'),
35                             LXMLTreeBuilder)
36
37        if LXML_PRESENT:
38            self.assertEqual(registry.lookup('permissive', 'xml'),
39                             LXMLTreeBuilderForXML)
40        self.assertEqual(registry.lookup('strict', 'html'),
41                          HTMLParserTreeBuilder)
42        if HTML5LIB_PRESENT:
43            self.assertEqual(registry.lookup('html5lib', 'html'),
44                              HTML5TreeBuilder)
45
46    def test_lookup_by_markup_type(self):
47        if LXML_PRESENT:
48            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
49            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
50        else:
51            self.assertEqual(registry.lookup('xml'), None)
52            if HTML5LIB_PRESENT:
53                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
54            else:
55                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
56
57    def test_named_library(self):
58        if LXML_PRESENT:
59            self.assertEqual(registry.lookup('lxml', 'xml'),
60                             LXMLTreeBuilderForXML)
61            self.assertEqual(registry.lookup('lxml', 'html'),
62                             LXMLTreeBuilder)
63        if HTML5LIB_PRESENT:
64            self.assertEqual(registry.lookup('html5lib'),
65                              HTML5TreeBuilder)
66
67        self.assertEqual(registry.lookup('html.parser'),
68                          HTMLParserTreeBuilder)
69
70    def test_beautifulsoup_constructor_does_lookup(self):
71
72        with warnings.catch_warnings(record=True) as w:
73            # This will create a warning about not explicitly
74            # specifying a parser, but we'll ignore it.
75
76            # You can pass in a string.
77            BeautifulSoup("", features="html")
78            # Or a list of strings.
79            BeautifulSoup("", features=["html", "fast"])
80
81        # You'll get an exception if BS can't find an appropriate
82        # builder.
83        self.assertRaises(ValueError, BeautifulSoup,
84                          "", features="no-such-feature")
85
86class RegistryTest(unittest.TestCase):
87    """Test the TreeBuilderRegistry class in general."""
88
89    def setUp(self):
90        self.registry = TreeBuilderRegistry()
91
92    def builder_for_features(self, *feature_list):
93        cls = type('Builder_' + '_'.join(feature_list),
94                   (object,), {'features' : feature_list})
95
96        self.registry.register(cls)
97        return cls
98
99    def test_register_with_no_features(self):
100        builder = self.builder_for_features()
101
102        # Since the builder advertises no features, you can't find it
103        # by looking up features.
104        self.assertEqual(self.registry.lookup('foo'), None)
105
106        # But you can find it by doing a lookup with no features, if
107        # this happens to be the only registered builder.
108        self.assertEqual(self.registry.lookup(), builder)
109
110    def test_register_with_features_makes_lookup_succeed(self):
111        builder = self.builder_for_features('foo', 'bar')
112        self.assertEqual(self.registry.lookup('foo'), builder)
113        self.assertEqual(self.registry.lookup('bar'), builder)
114
115    def test_lookup_fails_when_no_builder_implements_feature(self):
116        builder = self.builder_for_features('foo', 'bar')
117        self.assertEqual(self.registry.lookup('baz'), None)
118
119    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
120        builder1 = self.builder_for_features('foo')
121        builder2 = self.builder_for_features('bar')
122        self.assertEqual(self.registry.lookup(), builder2)
123
124    def test_lookup_fails_when_no_tree_builders_registered(self):
125        self.assertEqual(self.registry.lookup(), None)
126
127    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
128        has_one = self.builder_for_features('foo')
129        has_the_other = self.builder_for_features('bar')
130        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
131        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
132        lacks_one = self.builder_for_features('bar')
133        has_the_other = self.builder_for_features('foo')
134
135        # There are two builders featuring 'foo' and 'bar', but
136        # the one that also features 'quux' was registered later.
137        self.assertEqual(self.registry.lookup('foo', 'bar'),
138                          has_both_late)
139
140        # There is only one builder featuring 'foo', 'bar', and 'baz'.
141        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
142                          has_both_early)
143
144    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
145        builder1 = self.builder_for_features('foo', 'bar')
146        builder2 = self.builder_for_features('foo', 'baz')
147        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
148