1# This file is part of Hypothesis, which may be found at 2# https://github.com/HypothesisWorks/hypothesis/ 3# 4# Most of this work is copyright (C) 2013-2021 David R. MacIver 5# (david@drmaciver.com), but it contains contributions by others. See 6# CONTRIBUTING.rst for a full list of people who may hold copyright, and 7# consult the git log if you need to determine who owns an individual 8# contribution. 9# 10# This Source Code Form is subject to the terms of the Mozilla Public License, 11# v. 2.0. If a copy of the MPL was not distributed with this file, You can 12# obtain one at https://mozilla.org/MPL/2.0/. 13# 14# END HEADER 15# 16# SPDX-License-Identifier: MPL-2.0 17 18"""This example demonstrates testing a run length encoding scheme. That is, we 19take a sequence and represent it by a shorter sequence where each 'run' of 20consecutive equal elements is represented as a single element plus a count. So 21e.g. 22 23[1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]] 24 25This demonstrates the useful decode(encode(x)) == x invariant that is often 26a fruitful source of testing with Hypothesis. 27 28It also has an example of testing invariants in response to changes in the 29underlying data. 30""" 31 32from hypothesis import assume, given, strategies as st 33 34 35def run_length_encode(seq): 36 """Encode a sequence as a new run-length encoded sequence.""" 37 if not seq: 38 return [] 39 # By starting off the count at zero we simplify the iteration logic 40 # slightly. 41 result = [[seq[0], 0]] 42 for s in seq: 43 if ( 44 # If you uncomment this line this branch will be skipped and we'll 45 # always append a new run of length 1. Note which tests fail. 46 # False and 47 s 48 == result[-1][0] 49 # Try uncommenting this line and see what problems occur: 50 # and result[-1][-1] < 2 51 ): 52 result[-1][1] += 1 53 else: 54 result.append([s, 1]) 55 return result 56 57 58def run_length_decode(seq): 59 """Take a previously encoded sequence and reconstruct the original from 60 it.""" 61 result = [] 62 for s, i in seq: 63 for _ in range(i): 64 result.append(s) 65 return result 66 67 68# We use lists of a type that should have a relatively high duplication rate, 69# otherwise we'd almost never get any runs. 70Lists = st.lists(st.integers(0, 10)) 71 72 73@given(Lists) 74def test_decodes_to_starting_sequence(ls): 75 """If we encode a sequence and then decode the result, we should get the 76 original sequence back. 77 78 Otherwise we've done something very wrong. 79 """ 80 assert run_length_decode(run_length_encode(ls)) == ls 81 82 83@given(Lists, st.data()) 84def test_duplicating_an_element_does_not_increase_length(ls, data): 85 """The previous test could be passed by simply returning the input sequence 86 so we need something that tests the compression property of our encoding. 87 88 In this test we deliberately introduce or extend a run and assert 89 that this does not increase the length of our encoding, because they 90 should be part of the same run in the final result. 91 """ 92 # We use assume to get a valid index into the list. We could also have used 93 # e.g. flatmap, but this is relatively straightforward and will tend to 94 # perform better. 95 assume(ls) 96 i = data.draw(st.integers(0, len(ls) - 1)) 97 ls2 = list(ls) 98 # duplicating the value at i right next to it guarantees they are part of 99 # the same run in the resulting compression. 100 ls2.insert(i, ls2[i]) 101 assert len(run_length_encode(ls2)) == len(run_length_encode(ls)) 102