1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Most of this work is copyright (C) 2013-2021 David R. MacIver
5# (david@drmaciver.com), but it contains contributions by others. See
6# CONTRIBUTING.rst for a full list of people who may hold copyright, and
7# consult the git log if you need to determine who owns an individual
8# contribution.
9#
10# This Source Code Form is subject to the terms of the Mozilla Public License,
11# v. 2.0. If a copy of the MPL was not distributed with this file, You can
12# obtain one at https://mozilla.org/MPL/2.0/.
13#
14# END HEADER
15#
16# SPDX-License-Identifier: MPL-2.0
17
18"""This file demonstrates testing a binary search.
19
20It's a useful example because the result of the binary search is so clearly
21determined by the invariants it must satisfy, so we can simply test for those
22invariants.
23
24It also demonstrates the useful testing technique of testing how the answer
25should change (or not) in response to movements in the underlying data.
26"""
27
28from hypothesis import given, strategies as st
29
30
31def binary_search(ls, v):
32    """Take a list ls and a value v such that ls is sorted and v is comparable
33    with the elements of ls.
34
35    Return an index i such that 0 <= i <= len(v) with the properties:
36
37    1. ls.insert(i, v) is sorted
38    2. ls.insert(j, v) is not sorted for j < i
39    """
40    # Without this check we will get an index error on the next line when the
41    # list is empty.
42    if not ls:
43        return 0
44
45    # Without this check we will miss the case where the insertion point should
46    # be zero: The invariant we maintain in the next section is that lo is
47    # always strictly lower than the insertion point.
48    if v <= ls[0]:
49        return 0
50
51    # Invariant: There is no insertion point i with i <= lo
52    lo = 0
53
54    # Invariant: There is an insertion point i with i <= hi
55    hi = len(ls)
56    while lo + 1 < hi:
57        mid = (lo + hi) // 2
58        if v > ls[mid]:
59            # Inserting v anywhere below mid would result in an unsorted list
60            # because it's > the value at mid. Therefore mid is a valid new lo
61            lo = mid
62        # Uncommenting the following lines will cause this to return a valid
63        # insertion point which is not always minimal.
64        # elif v == ls[mid]:
65        #   return mid
66        else:
67            # Either v == ls[mid] in which case mid is a valid insertion point
68            # or v < ls[mid], in which case all valid insertion points must be
69            # < hi. Either way, mid is a valid new hi.
70            hi = mid
71    assert lo + 1 == hi
72    # We now know that there is a valid insertion point <= hi and there is no
73    # valid insertion point < hi because hi - 1 is lo. Therefore hi is the
74    # answer we were seeking
75    return hi
76
77
78def is_sorted(ls):
79    """Is this list sorted?"""
80    for i in range(len(ls) - 1):
81        if ls[i] > ls[i + 1]:
82            return False
83    return True
84
85
86Values = st.integers()
87
88# We generate arbitrary lists and turn this into generating sorting lists
89# by just sorting them.
90SortedLists = st.lists(Values).map(sorted)
91
92# We could also do it this way, but that would be a bad idea:
93# SortedLists = st.lists(Values).filter(is_sorted)
94# The problem is that Hypothesis will only generate long sorted lists with very
95# low probability, so we are much better off post-processing values into the
96# form we want than filtering them out.
97
98
99@given(ls=SortedLists, v=Values)
100def test_insert_is_sorted(ls, v):
101    """We test the first invariant: binary_search should return an index such
102    that inserting the value provided at that index would result in a sorted
103    set."""
104    ls.insert(binary_search(ls, v), v)
105    assert is_sorted(ls)
106
107
108@given(ls=SortedLists, v=Values)
109def test_is_minimal(ls, v):
110    """We test the second invariant: binary_search should return an index such
111    that no smaller index is a valid insertion point for v."""
112    for i in range(binary_search(ls, v)):
113        ls2 = list(ls)
114        ls2.insert(i, v)
115        assert not is_sorted(ls2)
116
117
118@given(ls=SortedLists, v=Values)
119def test_inserts_into_same_place_twice(ls, v):
120    """In this we test a *consequence* of the second invariant: When we insert
121    a value into a list twice, the insertion point should be the same both
122    times. This is because we know that v is > the previous element and == the
123    next element.
124
125    In theory if the former passes, this should always pass. In practice,
126    failures are detected by this test with much higher probability because it
127    deliberately puts the data into a shape that is likely to trigger a
128    failure.
129
130    This is an instance of a good general category of test: Testing how the
131    function moves in responses to changes in the underlying data.
132    """
133    i = binary_search(ls, v)
134    ls.insert(i, v)
135    assert binary_search(ls, v) == i
136