1"""Tests of the builder registry.""" 2 3import unittest 4import warnings 5 6from bs4 import BeautifulSoup 7from bs4.builder import ( 8 builder_registry as registry, 9 HTMLParserTreeBuilder, 10 TreeBuilderRegistry, 11) 12 13try: 14 from bs4.builder import HTML5TreeBuilder 15 HTML5LIB_PRESENT = True 16except ImportError: 17 HTML5LIB_PRESENT = False 18 19try: 20 from bs4.builder import ( 21 LXMLTreeBuilderForXML, 22 LXMLTreeBuilder, 23 ) 24 LXML_PRESENT = True 25except ImportError: 26 LXML_PRESENT = False 27 28 29class BuiltInRegistryTest(unittest.TestCase): 30 """Test the built-in registry with the default builders registered.""" 31 32 def test_combination(self): 33 if LXML_PRESENT: 34 self.assertEqual(registry.lookup('fast', 'html'), 35 LXMLTreeBuilder) 36 37 if LXML_PRESENT: 38 self.assertEqual(registry.lookup('permissive', 'xml'), 39 LXMLTreeBuilderForXML) 40 self.assertEqual(registry.lookup('strict', 'html'), 41 HTMLParserTreeBuilder) 42 if HTML5LIB_PRESENT: 43 self.assertEqual(registry.lookup('html5lib', 'html'), 44 HTML5TreeBuilder) 45 46 def test_lookup_by_markup_type(self): 47 if LXML_PRESENT: 48 self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) 49 self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) 50 else: 51 self.assertEqual(registry.lookup('xml'), None) 52 if HTML5LIB_PRESENT: 53 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) 54 else: 55 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) 56 57 def test_named_library(self): 58 if LXML_PRESENT: 59 self.assertEqual(registry.lookup('lxml', 'xml'), 60 LXMLTreeBuilderForXML) 61 self.assertEqual(registry.lookup('lxml', 'html'), 62 LXMLTreeBuilder) 63 if HTML5LIB_PRESENT: 64 self.assertEqual(registry.lookup('html5lib'), 65 HTML5TreeBuilder) 66 67 self.assertEqual(registry.lookup('html.parser'), 68 HTMLParserTreeBuilder) 69 70 def test_beautifulsoup_constructor_does_lookup(self): 71 72 with warnings.catch_warnings(record=True) as w: 73 # This will create a warning about not explicitly 74 # specifying a parser, but we'll ignore it. 75 76 # You can pass in a string. 77 BeautifulSoup("", features="html") 78 # Or a list of strings. 79 BeautifulSoup("", features=["html", "fast"]) 80 81 # You'll get an exception if BS can't find an appropriate 82 # builder. 83 self.assertRaises(ValueError, BeautifulSoup, 84 "", features="no-such-feature") 85 86class RegistryTest(unittest.TestCase): 87 """Test the TreeBuilderRegistry class in general.""" 88 89 def setUp(self): 90 self.registry = TreeBuilderRegistry() 91 92 def builder_for_features(self, *feature_list): 93 cls = type('Builder_' + '_'.join(feature_list), 94 (object,), {'features' : feature_list}) 95 96 self.registry.register(cls) 97 return cls 98 99 def test_register_with_no_features(self): 100 builder = self.builder_for_features() 101 102 # Since the builder advertises no features, you can't find it 103 # by looking up features. 104 self.assertEqual(self.registry.lookup('foo'), None) 105 106 # But you can find it by doing a lookup with no features, if 107 # this happens to be the only registered builder. 108 self.assertEqual(self.registry.lookup(), builder) 109 110 def test_register_with_features_makes_lookup_succeed(self): 111 builder = self.builder_for_features('foo', 'bar') 112 self.assertEqual(self.registry.lookup('foo'), builder) 113 self.assertEqual(self.registry.lookup('bar'), builder) 114 115 def test_lookup_fails_when_no_builder_implements_feature(self): 116 builder = self.builder_for_features('foo', 'bar') 117 self.assertEqual(self.registry.lookup('baz'), None) 118 119 def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): 120 builder1 = self.builder_for_features('foo') 121 builder2 = self.builder_for_features('bar') 122 self.assertEqual(self.registry.lookup(), builder2) 123 124 def test_lookup_fails_when_no_tree_builders_registered(self): 125 self.assertEqual(self.registry.lookup(), None) 126 127 def test_lookup_gets_most_recent_builder_supporting_all_features(self): 128 has_one = self.builder_for_features('foo') 129 has_the_other = self.builder_for_features('bar') 130 has_both_early = self.builder_for_features('foo', 'bar', 'baz') 131 has_both_late = self.builder_for_features('foo', 'bar', 'quux') 132 lacks_one = self.builder_for_features('bar') 133 has_the_other = self.builder_for_features('foo') 134 135 # There are two builders featuring 'foo' and 'bar', but 136 # the one that also features 'quux' was registered later. 137 self.assertEqual(self.registry.lookup('foo', 'bar'), 138 has_both_late) 139 140 # There is only one builder featuring 'foo', 'bar', and 'baz'. 141 self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), 142 has_both_early) 143 144 def test_lookup_fails_when_cannot_reconcile_requested_features(self): 145 builder1 = self.builder_for_features('foo', 'bar') 146 builder2 = self.builder_for_features('foo', 'baz') 147 self.assertEqual(self.registry.lookup('bar', 'baz'), None) 148