1"""Tests to ensure that the lxml tree builder generates good trees.""" 2 3import warnings 4 5try: 6 import lxml.etree 7 LXML_PRESENT = True 8 LXML_VERSION = lxml.etree.LXML_VERSION 9except ImportError as e: 10 LXML_PRESENT = False 11 LXML_VERSION = (0,) 12 13if LXML_PRESENT: 14 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML 15 16from bs4 import BeautifulStoneSoup 17from bs4.testing import skipIf 18from bs4.testing import ( 19 HTMLTreeBuilderSmokeTest, 20 XMLTreeBuilderSmokeTest, 21 SoupTest, 22 skipIf, 23) 24 25@skipIf( 26 not LXML_PRESENT, 27 "lxml seems not to be present, not testing its tree builder.") 28class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 29 """See ``HTMLTreeBuilderSmokeTest``.""" 30 31 @property 32 def default_builder(self): 33 return LXMLTreeBuilder() 34 35 def test_out_of_range_entity(self): 36 self.assertSoupEquals( 37 "<p>foo�bar</p>", "<p>foobar</p>") 38 self.assertSoupEquals( 39 "<p>foo�bar</p>", "<p>foobar</p>") 40 self.assertSoupEquals( 41 "<p>foo�bar</p>", "<p>foobar</p>") 42 43 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this 44 # test if an old version of lxml is installed. 45 46 @skipIf( 47 not LXML_PRESENT or LXML_VERSION < (2,3,5,0), 48 "Skipping doctype test for old version of lxml to avoid segfault.") 49 def test_empty_doctype(self): 50 soup = self.soup("<!DOCTYPE>") 51 doctype = soup.contents[0] 52 self.assertEqual("", doctype.strip()) 53 54 def test_beautifulstonesoup_is_xml_parser(self): 55 # Make sure that the deprecated BSS class uses an xml builder 56 # if one is installed. 57 with warnings.catch_warnings(record=True) as w: 58 soup = BeautifulStoneSoup("<b />") 59 self.assertEqual("<b/>", str(soup.b)) 60 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) 61 62@skipIf( 63 not LXML_PRESENT, 64 "lxml seems not to be present, not testing its XML tree builder.") 65class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): 66 """See ``HTMLTreeBuilderSmokeTest``.""" 67 68 @property 69 def default_builder(self): 70 return LXMLTreeBuilderForXML() 71