xref: /openbmc/openbmc/poky/bitbake/lib/hashserv/tests.py (revision c9537f57ab488bf5d90132917b0184e2527970a5)
1#! /usr/bin/env python3
2#
3# Copyright (C) 2018-2019 Garmin Ltd.
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7
8from . import create_server, create_client
9from .server import DEFAULT_ANON_PERMS, ALL_PERMISSIONS
10from bb.asyncrpc import InvokeError
11import hashlib
12import logging
13import multiprocessing
14import os
15import sys
16import tempfile
17import threading
18import unittest
19import socket
20import time
21import signal
22import subprocess
23import json
24import re
25from pathlib import Path
26
27
28THIS_DIR = Path(__file__).parent
29BIN_DIR = THIS_DIR.parent.parent / "bin"
30
31def server_prefunc(server, idx):
32    logging.basicConfig(level=logging.DEBUG, filename='bbhashserv-%d.log' % idx, filemode='w',
33                        format='%(levelname)s %(filename)s:%(lineno)d %(message)s')
34    server.logger.debug("Running server %d" % idx)
35    sys.stdout = open('bbhashserv-stdout-%d.log' % idx, 'w')
36    sys.stderr = sys.stdout
37
38class HashEquivalenceTestSetup(object):
39    METHOD = 'TestMethod'
40
41    server_index = 0
42    client_index = 0
43
44    def start_server(self, dbpath=None, upstream=None, read_only=False, prefunc=server_prefunc, anon_perms=DEFAULT_ANON_PERMS, admin_username=None, admin_password=None):
45        self.server_index += 1
46        if dbpath is None:
47            dbpath = self.make_dbpath()
48
49        def cleanup_server(server):
50            if server.process.exitcode is not None:
51                return
52
53            server.process.terminate()
54            server.process.join()
55
56        server = create_server(self.get_server_addr(self.server_index),
57                               dbpath,
58                               upstream=upstream,
59                               read_only=read_only,
60                               anon_perms=anon_perms,
61                               admin_username=admin_username,
62                               admin_password=admin_password)
63        server.dbpath = dbpath
64
65        server.serve_as_process(prefunc=prefunc, args=(self.server_index,))
66        self.addCleanup(cleanup_server, server)
67
68        return server
69
70    def make_dbpath(self):
71        return os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index)
72
73    def start_client(self, server_address, username=None, password=None):
74        def cleanup_client(client):
75            client.close()
76
77        client = create_client(server_address, username=username, password=password)
78        self.addCleanup(cleanup_client, client)
79
80        return client
81
82    def start_test_server(self):
83        self.server = self.start_server()
84        return self.server.address
85
86    def start_auth_server(self):
87        auth_server = self.start_server(self.server.dbpath, anon_perms=[], admin_username="admin", admin_password="password")
88        self.auth_server_address = auth_server.address
89        self.admin_client = self.start_client(auth_server.address, username="admin", password="password")
90        return self.admin_client
91
92    def auth_client(self, user):
93        return self.start_client(self.auth_server_address, user["username"], user["token"])
94
95    def setUp(self):
96        self.temp_dir = tempfile.TemporaryDirectory(prefix='bb-hashserv')
97        self.addCleanup(self.temp_dir.cleanup)
98
99        self.server_address = self.start_test_server()
100
101        self.client = self.start_client(self.server_address)
102
103    def assertClientGetHash(self, client, taskhash, unihash):
104        result = client.get_unihash(self.METHOD, taskhash)
105        self.assertEqual(result, unihash)
106
107    def assertUserPerms(self, user, permissions):
108        with self.auth_client(user) as client:
109            info = client.get_user()
110            self.assertEqual(info, {
111                "username": user["username"],
112                "permissions": permissions,
113            })
114
115    def assertUserCanAuth(self, user):
116        with self.start_client(self.auth_server_address) as client:
117            client.auth(user["username"], user["token"])
118
119    def assertUserCannotAuth(self, user):
120        with self.start_client(self.auth_server_address) as client, self.assertRaises(InvokeError):
121            client.auth(user["username"], user["token"])
122
123    def create_test_hash(self, client):
124        # Simple test that hashes can be created
125        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
126        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
127        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
128
129        self.assertClientGetHash(client, taskhash, None)
130
131        result = client.report_unihash(taskhash, self.METHOD, outhash, unihash)
132        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
133        return taskhash, outhash, unihash
134
135    def run_hashclient(self, args, **kwargs):
136        try:
137            p = subprocess.run(
138                [BIN_DIR / "bitbake-hashclient"] + args,
139                stdout=subprocess.PIPE,
140                stderr=subprocess.STDOUT,
141                encoding="utf-8",
142                **kwargs
143            )
144        except subprocess.CalledProcessError as e:
145            print(e.output)
146            raise e
147
148        print(p.stdout)
149        return p
150
151
152class HashEquivalenceCommonTests(object):
153    def auth_perms(self, *permissions):
154        self.client_index += 1
155        user = self.create_user(f"user-{self.client_index}", permissions)
156        return self.auth_client(user)
157
158    def create_user(self, username, permissions, *, client=None):
159        def remove_user(username):
160            try:
161                self.admin_client.delete_user(username)
162            except bb.asyncrpc.InvokeError:
163                pass
164
165        if client is None:
166            client = self.admin_client
167
168        user = client.new_user(username, permissions)
169        self.addCleanup(remove_user, username)
170
171        return user
172
173    def test_create_hash(self):
174        return self.create_test_hash(self.client)
175
176    def test_create_equivalent(self):
177        # Tests that a second reported task with the same outhash will be
178        # assigned the same unihash
179        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
180        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
181        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
182
183        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
184        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
185
186        # Report a different task with the same outhash. The returned unihash
187        # should match the first task
188        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
189        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
190        result = self.client.report_unihash(taskhash2, self.METHOD, outhash, unihash2)
191        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
192
193    def test_duplicate_taskhash(self):
194        # Tests that duplicate reports of the same taskhash with different
195        # outhash & unihash always return the unihash from the first reported
196        # taskhash
197        taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
198        outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
199        unihash = '218e57509998197d570e2c98512d0105985dffc9'
200        self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
201
202        self.assertClientGetHash(self.client, taskhash, unihash)
203
204        outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
205        unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
206        self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
207
208        self.assertClientGetHash(self.client, taskhash, unihash)
209
210        outhash3 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
211        unihash3 = '9217a7d6398518e5dc002ed58f2cbbbc78696603'
212        self.client.report_unihash(taskhash, self.METHOD, outhash3, unihash3)
213
214        self.assertClientGetHash(self.client, taskhash, unihash)
215
216    def test_remove_taskhash(self):
217        taskhash, outhash, unihash = self.create_test_hash(self.client)
218        result = self.client.remove({"taskhash": taskhash})
219        self.assertGreater(result["count"], 0)
220        self.assertClientGetHash(self.client, taskhash, None)
221
222        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
223        self.assertIsNone(result_outhash)
224
225    def test_remove_unihash(self):
226        taskhash, outhash, unihash = self.create_test_hash(self.client)
227        result = self.client.remove({"unihash": unihash})
228        self.assertGreater(result["count"], 0)
229        self.assertClientGetHash(self.client, taskhash, None)
230
231    def test_remove_outhash(self):
232        taskhash, outhash, unihash = self.create_test_hash(self.client)
233        result = self.client.remove({"outhash": outhash})
234        self.assertGreater(result["count"], 0)
235
236        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
237        self.assertIsNone(result_outhash)
238
239    def test_remove_method(self):
240        taskhash, outhash, unihash = self.create_test_hash(self.client)
241        result = self.client.remove({"method": self.METHOD})
242        self.assertGreater(result["count"], 0)
243        self.assertClientGetHash(self.client, taskhash, None)
244
245        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
246        self.assertIsNone(result_outhash)
247
248    def test_clean_unused(self):
249        taskhash, outhash, unihash = self.create_test_hash(self.client)
250
251        # Clean the database, which should not remove anything because all hashes an in-use
252        result = self.client.clean_unused(0)
253        self.assertEqual(result["count"], 0)
254        self.assertClientGetHash(self.client, taskhash, unihash)
255
256        # Remove the unihash. The row in the outhash table should still be present
257        self.client.remove({"unihash": unihash})
258        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
259        self.assertIsNotNone(result_outhash)
260
261        # Now clean with no minimum age which will remove the outhash
262        result = self.client.clean_unused(0)
263        self.assertEqual(result["count"], 1)
264        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
265        self.assertIsNone(result_outhash)
266
267    def test_huge_message(self):
268        # Simple test that hashes can be created
269        taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'
270        outhash = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
271        unihash = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
272
273        self.assertClientGetHash(self.client, taskhash, None)
274
275        siginfo = "0" * (self.client.max_chunk * 4)
276
277        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash, {
278            'outhash_siginfo': siginfo
279        })
280        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
281
282        result_unihash = self.client.get_taskhash(self.METHOD, taskhash, True)
283        self.assertEqual(result_unihash['taskhash'], taskhash)
284        self.assertEqual(result_unihash['unihash'], unihash)
285        self.assertEqual(result_unihash['method'], self.METHOD)
286
287        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
288        self.assertEqual(result_outhash['taskhash'], taskhash)
289        self.assertEqual(result_outhash['method'], self.METHOD)
290        self.assertEqual(result_outhash['unihash'], unihash)
291        self.assertEqual(result_outhash['outhash'], outhash)
292        self.assertEqual(result_outhash['outhash_siginfo'], siginfo)
293
294    def test_stress(self):
295        def query_server(failures):
296            client = Client(self.server_address)
297            try:
298                for i in range(1000):
299                    taskhash = hashlib.sha256()
300                    taskhash.update(str(i).encode('utf-8'))
301                    taskhash = taskhash.hexdigest()
302                    result = client.get_unihash(self.METHOD, taskhash)
303                    if result != taskhash:
304                        failures.append("taskhash mismatch: %s != %s" % (result, taskhash))
305            finally:
306                client.close()
307
308        # Report hashes
309        for i in range(1000):
310            taskhash = hashlib.sha256()
311            taskhash.update(str(i).encode('utf-8'))
312            taskhash = taskhash.hexdigest()
313            self.client.report_unihash(taskhash, self.METHOD, taskhash, taskhash)
314
315        failures = []
316        threads = [threading.Thread(target=query_server, args=(failures,)) for t in range(100)]
317
318        for t in threads:
319            t.start()
320
321        for t in threads:
322            t.join()
323
324        self.assertFalse(failures)
325
326    def test_upstream_server(self):
327        # Tests upstream server support. This is done by creating two servers
328        # that share a database file. The downstream server has it upstream
329        # set to the test server, whereas the side server doesn't. This allows
330        # verification that the hash requests are being proxied to the upstream
331        # server by verifying that they appear on the downstream client, but not
332        # the side client. It also verifies that the results are pulled into
333        # the downstream database by checking that the downstream and side servers
334        # match after the downstream is done waiting for all backfill tasks
335        down_server = self.start_server(upstream=self.server_address)
336        down_client = self.start_client(down_server.address)
337        side_server = self.start_server(dbpath=down_server.dbpath)
338        side_client = self.start_client(side_server.address)
339
340        def check_hash(taskhash, unihash, old_sidehash):
341            nonlocal down_client
342            nonlocal side_client
343
344            # check upstream server
345            self.assertClientGetHash(self.client, taskhash, unihash)
346
347            # Hash should *not* be present on the side server
348            self.assertClientGetHash(side_client, taskhash, old_sidehash)
349
350            # Hash should be present on the downstream server, since it
351            # will defer to the upstream server. This will trigger
352            # the backfill in the downstream server
353            self.assertClientGetHash(down_client, taskhash, unihash)
354
355            # After waiting for the downstream client to finish backfilling the
356            # task from the upstream server, it should appear in the side server
357            # since the database is populated
358            down_client.backfill_wait()
359            self.assertClientGetHash(side_client, taskhash, unihash)
360
361        # Basic report
362        taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
363        outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
364        unihash = '218e57509998197d570e2c98512d0105985dffc9'
365        self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
366
367        check_hash(taskhash, unihash, None)
368
369        # Duplicated taskhash with multiple output hashes and unihashes.
370        # All servers should agree with the originally reported hash
371        outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
372        unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
373        self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
374
375        check_hash(taskhash, unihash, unihash)
376
377        # Report an equivalent task. The sideload will originally report
378        # no unihash until backfilled
379        taskhash3 = "044c2ec8aaf480685a00ff6ff49e6162e6ad34e1"
380        unihash3 = "def64766090d28f627e816454ed46894bb3aab36"
381        self.client.report_unihash(taskhash3, self.METHOD, outhash, unihash3)
382
383        check_hash(taskhash3, unihash, None)
384
385        # Test that reporting a unihash in the downstream client isn't
386        # propagating to the upstream server
387        taskhash4 = "e3da00593d6a7fb435c7e2114976c59c5fd6d561"
388        outhash4 = "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a"
389        unihash4 = "3b5d3d83f07f259e9086fcb422c855286e18a57d"
390        down_client.report_unihash(taskhash4, self.METHOD, outhash4, unihash4)
391        down_client.backfill_wait()
392
393        self.assertClientGetHash(down_client, taskhash4, unihash4)
394        self.assertClientGetHash(side_client, taskhash4, unihash4)
395        self.assertClientGetHash(self.client, taskhash4, None)
396
397        # Test that reporting a unihash in the downstream is able to find a
398        # match which was previously reported to the upstream server
399        taskhash5 = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
400        outhash5 = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
401        unihash5 = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
402        result = self.client.report_unihash(taskhash5, self.METHOD, outhash5, unihash5)
403
404        taskhash6 = '35788efcb8dfb0a02659d81cf2bfd695fb30fafa'
405        unihash6 = 'f46d3fbb439bd9b921095da657a4de906510d2ce'
406        result = down_client.report_unihash(taskhash6, self.METHOD, outhash5, unihash6)
407        self.assertEqual(result['unihash'], unihash5, 'Server failed to copy unihash from upstream')
408
409        # Tests read through from server with
410        taskhash7 = '9d81d76242cc7cfaf7bf74b94b9cd2e29324ed74'
411        outhash7 = '8470d56547eea6236d7c81a644ce74670ca0bbda998e13c629ef6bb3f0d60b69'
412        unihash7 = '05d2a63c81e32f0a36542ca677e8ad852365c538'
413        self.client.report_unihash(taskhash7, self.METHOD, outhash7, unihash7)
414
415        result = down_client.get_taskhash(self.METHOD, taskhash7, True)
416        self.assertEqual(result['unihash'], unihash7, 'Server failed to copy unihash from upstream')
417        self.assertEqual(result['outhash'], outhash7, 'Server failed to copy unihash from upstream')
418        self.assertEqual(result['taskhash'], taskhash7, 'Server failed to copy unihash from upstream')
419        self.assertEqual(result['method'], self.METHOD)
420
421        taskhash8 = '86978a4c8c71b9b487330b0152aade10c1ee58aa'
422        outhash8 = 'ca8c128e9d9e4a28ef24d0508aa20b5cf880604eacd8f65c0e366f7e0cc5fbcf'
423        unihash8 = 'd8bcf25369d40590ad7d08c84d538982f2023e01'
424        self.client.report_unihash(taskhash8, self.METHOD, outhash8, unihash8)
425
426        result = down_client.get_outhash(self.METHOD, outhash8, taskhash8)
427        self.assertEqual(result['unihash'], unihash8, 'Server failed to copy unihash from upstream')
428        self.assertEqual(result['outhash'], outhash8, 'Server failed to copy unihash from upstream')
429        self.assertEqual(result['taskhash'], taskhash8, 'Server failed to copy unihash from upstream')
430        self.assertEqual(result['method'], self.METHOD)
431
432        taskhash9 = 'ae6339531895ddf5b67e663e6a374ad8ec71d81c'
433        outhash9 = 'afc78172c81880ae10a1fec994b5b4ee33d196a001a1b66212a15ebe573e00b5'
434        unihash9 = '6662e699d6e3d894b24408ff9a4031ef9b038ee8'
435        self.client.report_unihash(taskhash9, self.METHOD, outhash9, unihash9)
436
437        result = down_client.get_taskhash(self.METHOD, taskhash9, False)
438        self.assertEqual(result['unihash'], unihash9, 'Server failed to copy unihash from upstream')
439        self.assertEqual(result['taskhash'], taskhash9, 'Server failed to copy unihash from upstream')
440        self.assertEqual(result['method'], self.METHOD)
441
442    def test_unihash_exsits(self):
443        taskhash, outhash, unihash = self.create_test_hash(self.client)
444        self.assertTrue(self.client.unihash_exists(unihash))
445        self.assertFalse(self.client.unihash_exists('6662e699d6e3d894b24408ff9a4031ef9b038ee8'))
446
447    def test_ro_server(self):
448        rw_server = self.start_server()
449        rw_client = self.start_client(rw_server.address)
450
451        ro_server = self.start_server(dbpath=rw_server.dbpath, read_only=True)
452        ro_client = self.start_client(ro_server.address)
453
454        # Report a hash via the read-write server
455        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
456        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
457        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
458
459        result = rw_client.report_unihash(taskhash, self.METHOD, outhash, unihash)
460        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
461
462        # Check the hash via the read-only server
463        self.assertClientGetHash(ro_client, taskhash, unihash)
464
465        # Ensure that reporting via the read-only server fails
466        taskhash2 = 'c665584ee6817aa99edfc77a44dd853828279370'
467        outhash2 = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
468        unihash2 = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
469
470        result = ro_client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
471        self.assertEqual(result['unihash'], unihash2)
472
473        # Ensure that the database was not modified
474        self.assertClientGetHash(rw_client, taskhash2, None)
475
476
477    def test_slow_server_start(self):
478        # Ensures that the server will exit correctly even if it gets a SIGTERM
479        # before entering the main loop
480
481        event = multiprocessing.Event()
482
483        def prefunc(server, idx):
484            nonlocal event
485            server_prefunc(server, idx)
486            event.wait()
487
488        def do_nothing(signum, frame):
489            pass
490
491        old_signal = signal.signal(signal.SIGTERM, do_nothing)
492        self.addCleanup(signal.signal, signal.SIGTERM, old_signal)
493
494        server = self.start_server(prefunc=prefunc)
495        server.process.terminate()
496        time.sleep(30)
497        event.set()
498        server.process.join(300)
499        self.assertIsNotNone(server.process.exitcode, "Server did not exit in a timely manner!")
500
501    def test_diverging_report_race(self):
502        # Tests that a reported task will correctly pick up an updated unihash
503
504        # This is a baseline report added to the database to ensure that there
505        # is something to match against as equivalent
506        outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa'
507        taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
508        unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
509        result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1)
510
511        # Add a report that is equivalent to Task 1. It should ignore the
512        # provided unihash and report the unihash from task 1
513        taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273'
514        unihash2 = taskhash2
515        result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2)
516        self.assertEqual(result['unihash'], unihash1)
517
518        # Add another report for Task 2, but with a different outhash (e.g. the
519        # task is non-deterministic). It should still be marked with the Task 1
520        # unihash because it has the Task 2 taskhash, which is equivalent to
521        # Task 1
522        outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c'
523        result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2)
524        self.assertEqual(result['unihash'], unihash1)
525
526
527    def test_diverging_report_reverse_race(self):
528        # Same idea as the previous test, but Tasks 2 and 3 are reported in
529        # reverse order the opposite order
530
531        outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa'
532        taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
533        unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
534        result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1)
535
536        taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273'
537        unihash2 = taskhash2
538
539        # Report Task 3 first. Since there is nothing else in the database it
540        # will use the client provided unihash
541        outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c'
542        result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2)
543        self.assertEqual(result['unihash'], unihash2)
544
545        # Report Task 2. This is equivalent to Task 1 but there is already a mapping for
546        # taskhash2 so it will report unihash2
547        result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2)
548        self.assertEqual(result['unihash'], unihash2)
549
550        # The originally reported unihash for Task 3 should be unchanged even if it
551        # shares a taskhash with Task 2
552        self.assertClientGetHash(self.client, taskhash2, unihash2)
553
554    def test_get_unihash_batch(self):
555        TEST_INPUT = (
556            # taskhash                                   outhash                                                            unihash
557            ('8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a', 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e','218e57509998197d570e2c98512d0105985dffc9'),
558            # Duplicated taskhash with multiple output hashes and unihashes.
559            ('8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a', '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d', 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'),
560            # Equivalent hash
561            ("044c2ec8aaf480685a00ff6ff49e6162e6ad34e1", '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d', "def64766090d28f627e816454ed46894bb3aab36"),
562            ("e3da00593d6a7fb435c7e2114976c59c5fd6d561", "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a", "3b5d3d83f07f259e9086fcb422c855286e18a57d"),
563            ('35788efcb8dfb0a02659d81cf2bfd695fb30faf9', '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f', 'f46d3fbb439bd9b921095da657a4de906510d2cd'),
564            ('35788efcb8dfb0a02659d81cf2bfd695fb30fafa', '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f', 'f46d3fbb439bd9b921095da657a4de906510d2ce'),
565            ('9d81d76242cc7cfaf7bf74b94b9cd2e29324ed74', '8470d56547eea6236d7c81a644ce74670ca0bbda998e13c629ef6bb3f0d60b69', '05d2a63c81e32f0a36542ca677e8ad852365c538'),
566        )
567        EXTRA_QUERIES = (
568            "6b6be7a84ab179b4240c4302518dc3f6",
569        )
570
571        for taskhash, outhash, unihash in TEST_INPUT:
572            self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
573
574
575        result = self.client.get_unihash_batch(
576            [(self.METHOD, data[0]) for data in TEST_INPUT] +
577            [(self.METHOD, e) for e in EXTRA_QUERIES]
578        )
579
580        self.assertListEqual(result, [
581            "218e57509998197d570e2c98512d0105985dffc9",
582            "218e57509998197d570e2c98512d0105985dffc9",
583            "218e57509998197d570e2c98512d0105985dffc9",
584            "3b5d3d83f07f259e9086fcb422c855286e18a57d",
585            "f46d3fbb439bd9b921095da657a4de906510d2cd",
586            "f46d3fbb439bd9b921095da657a4de906510d2cd",
587            "05d2a63c81e32f0a36542ca677e8ad852365c538",
588            None,
589        ])
590
591    def test_unihash_exists_batch(self):
592        TEST_INPUT = (
593            # taskhash                                   outhash                                                            unihash
594            ('8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a', 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e','218e57509998197d570e2c98512d0105985dffc9'),
595            # Duplicated taskhash with multiple output hashes and unihashes.
596            ('8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a', '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d', 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'),
597            # Equivalent hash
598            ("044c2ec8aaf480685a00ff6ff49e6162e6ad34e1", '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d', "def64766090d28f627e816454ed46894bb3aab36"),
599            ("e3da00593d6a7fb435c7e2114976c59c5fd6d561", "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a", "3b5d3d83f07f259e9086fcb422c855286e18a57d"),
600            ('35788efcb8dfb0a02659d81cf2bfd695fb30faf9', '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f', 'f46d3fbb439bd9b921095da657a4de906510d2cd'),
601            ('35788efcb8dfb0a02659d81cf2bfd695fb30fafa', '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f', 'f46d3fbb439bd9b921095da657a4de906510d2ce'),
602            ('9d81d76242cc7cfaf7bf74b94b9cd2e29324ed74', '8470d56547eea6236d7c81a644ce74670ca0bbda998e13c629ef6bb3f0d60b69', '05d2a63c81e32f0a36542ca677e8ad852365c538'),
603        )
604        EXTRA_QUERIES = (
605            "6b6be7a84ab179b4240c4302518dc3f6",
606        )
607
608        result_unihashes = set()
609
610
611        for taskhash, outhash, unihash in TEST_INPUT:
612            result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
613            result_unihashes.add(result["unihash"])
614
615        query = []
616        expected = []
617
618        for _, _, unihash in TEST_INPUT:
619            query.append(unihash)
620            expected.append(unihash in result_unihashes)
621
622
623        for unihash in EXTRA_QUERIES:
624            query.append(unihash)
625            expected.append(False)
626
627        result = self.client.unihash_exists_batch(query)
628        self.assertListEqual(result, expected)
629
630    def test_auth_read_perms(self):
631        admin_client = self.start_auth_server()
632
633        # Create hashes with non-authenticated server
634        taskhash, outhash, unihash = self.create_test_hash(self.client)
635
636        # Validate hash can be retrieved using authenticated client
637        with self.auth_perms("@read") as client:
638            self.assertClientGetHash(client, taskhash, unihash)
639
640        with self.auth_perms() as client, self.assertRaises(InvokeError):
641            self.assertClientGetHash(client, taskhash, unihash)
642
643    def test_auth_report_perms(self):
644        admin_client = self.start_auth_server()
645
646        # Without read permission, the user is completely denied
647        with self.auth_perms() as client, self.assertRaises(InvokeError):
648            self.create_test_hash(client)
649
650        # Read permission allows the call to succeed, but it doesn't record
651        # anythin in the database
652        with self.auth_perms("@read") as client:
653            taskhash, outhash, unihash = self.create_test_hash(client)
654            self.assertClientGetHash(client, taskhash, None)
655
656        # Report permission alone is insufficient
657        with self.auth_perms("@report") as client, self.assertRaises(InvokeError):
658            self.create_test_hash(client)
659
660        # Read and report permission actually modify the database
661        with self.auth_perms("@read", "@report") as client:
662            taskhash, outhash, unihash = self.create_test_hash(client)
663            self.assertClientGetHash(client, taskhash, unihash)
664
665    def test_auth_no_token_refresh_from_anon_user(self):
666        self.start_auth_server()
667
668        with self.start_client(self.auth_server_address) as client, self.assertRaises(InvokeError):
669            client.refresh_token()
670
671    def test_auth_self_token_refresh(self):
672        admin_client = self.start_auth_server()
673
674        # Create a new user with no permissions
675        user = self.create_user("test-user", [])
676
677        with self.auth_client(user) as client:
678            new_user = client.refresh_token()
679
680        self.assertEqual(user["username"], new_user["username"])
681        self.assertNotEqual(user["token"], new_user["token"])
682        self.assertUserCanAuth(new_user)
683        self.assertUserCannotAuth(user)
684
685        # Explicitly specifying with your own username is fine also
686        with self.auth_client(new_user) as client:
687            new_user2 = client.refresh_token(user["username"])
688
689        self.assertEqual(user["username"], new_user2["username"])
690        self.assertNotEqual(user["token"], new_user2["token"])
691        self.assertUserCanAuth(new_user2)
692        self.assertUserCannotAuth(new_user)
693        self.assertUserCannotAuth(user)
694
695    def test_auth_token_refresh(self):
696        admin_client = self.start_auth_server()
697
698        user = self.create_user("test-user", [])
699
700        with self.auth_perms() as client, self.assertRaises(InvokeError):
701            client.refresh_token(user["username"])
702
703        with self.auth_perms("@user-admin") as client:
704            new_user = client.refresh_token(user["username"])
705
706        self.assertEqual(user["username"], new_user["username"])
707        self.assertNotEqual(user["token"], new_user["token"])
708        self.assertUserCanAuth(new_user)
709        self.assertUserCannotAuth(user)
710
711    def test_auth_self_get_user(self):
712        admin_client = self.start_auth_server()
713
714        user = self.create_user("test-user", [])
715        user_info = user.copy()
716        del user_info["token"]
717
718        with self.auth_client(user) as client:
719            info = client.get_user()
720            self.assertEqual(info, user_info)
721
722            # Explicitly asking for your own username is fine also
723            info = client.get_user(user["username"])
724            self.assertEqual(info, user_info)
725
726    def test_auth_get_user(self):
727        admin_client = self.start_auth_server()
728
729        user = self.create_user("test-user", [])
730        user_info = user.copy()
731        del user_info["token"]
732
733        with self.auth_perms() as client, self.assertRaises(InvokeError):
734            client.get_user(user["username"])
735
736        with self.auth_perms("@user-admin") as client:
737            info = client.get_user(user["username"])
738            self.assertEqual(info, user_info)
739
740            info = client.get_user("nonexist-user")
741            self.assertIsNone(info)
742
743    def test_auth_reconnect(self):
744        admin_client = self.start_auth_server()
745
746        user = self.create_user("test-user", [])
747        user_info = user.copy()
748        del user_info["token"]
749
750        with self.auth_client(user) as client:
751            info = client.get_user()
752            self.assertEqual(info, user_info)
753
754            client.disconnect()
755
756            info = client.get_user()
757            self.assertEqual(info, user_info)
758
759    def test_auth_delete_user(self):
760        admin_client = self.start_auth_server()
761
762        user = self.create_user("test-user", [])
763
764        # self service
765        with self.auth_client(user) as client:
766            client.delete_user(user["username"])
767
768        self.assertIsNone(admin_client.get_user(user["username"]))
769        user = self.create_user("test-user", [])
770
771        with self.auth_perms() as client, self.assertRaises(InvokeError):
772            client.delete_user(user["username"])
773
774        with self.auth_perms("@user-admin") as client:
775            client.delete_user(user["username"])
776
777        # User doesn't exist, so even though the permission is correct, it's an
778        # error
779        with self.auth_perms("@user-admin") as client, self.assertRaises(InvokeError):
780            client.delete_user(user["username"])
781
782    def test_auth_set_user_perms(self):
783        admin_client = self.start_auth_server()
784
785        user = self.create_user("test-user", [])
786
787        self.assertUserPerms(user, [])
788
789        # No self service to change permissions
790        with self.auth_client(user) as client, self.assertRaises(InvokeError):
791            client.set_user_perms(user["username"], ["@all"])
792        self.assertUserPerms(user, [])
793
794        with self.auth_perms() as client, self.assertRaises(InvokeError):
795            client.set_user_perms(user["username"], ["@all"])
796        self.assertUserPerms(user, [])
797
798        with self.auth_perms("@user-admin") as client:
799            client.set_user_perms(user["username"], ["@all"])
800        self.assertUserPerms(user, sorted(list(ALL_PERMISSIONS)))
801
802        # Bad permissions
803        with self.auth_perms("@user-admin") as client, self.assertRaises(InvokeError):
804            client.set_user_perms(user["username"], ["@this-is-not-a-permission"])
805        self.assertUserPerms(user, sorted(list(ALL_PERMISSIONS)))
806
807    def test_auth_get_all_users(self):
808        admin_client = self.start_auth_server()
809
810        user = self.create_user("test-user", [])
811
812        with self.auth_client(user) as client, self.assertRaises(InvokeError):
813            client.get_all_users()
814
815        # Give the test user the correct permission
816        admin_client.set_user_perms(user["username"], ["@user-admin"])
817
818        with self.auth_client(user) as client:
819            all_users = client.get_all_users()
820
821        # Convert to a dictionary for easier comparison
822        all_users = {u["username"]: u for u in all_users}
823
824        self.assertEqual(all_users,
825            {
826                "admin": {
827                    "username": "admin",
828                    "permissions": sorted(list(ALL_PERMISSIONS)),
829                },
830                "test-user": {
831                    "username": "test-user",
832                    "permissions": ["@user-admin"],
833                }
834            }
835        )
836
837    def test_auth_new_user(self):
838        self.start_auth_server()
839
840        permissions = ["@read", "@report", "@db-admin", "@user-admin"]
841        permissions.sort()
842
843        with self.auth_perms() as client, self.assertRaises(InvokeError):
844            self.create_user("test-user", permissions, client=client)
845
846        with self.auth_perms("@user-admin") as client:
847            user = self.create_user("test-user", permissions, client=client)
848            self.assertIn("token", user)
849            self.assertEqual(user["username"], "test-user")
850            self.assertEqual(user["permissions"], permissions)
851
852    def test_auth_become_user(self):
853        admin_client = self.start_auth_server()
854
855        user = self.create_user("test-user", ["@read", "@report"])
856        user_info = user.copy()
857        del user_info["token"]
858
859        with self.auth_perms() as client, self.assertRaises(InvokeError):
860            client.become_user(user["username"])
861
862        with self.auth_perms("@user-admin") as client:
863            become = client.become_user(user["username"])
864            self.assertEqual(become, user_info)
865
866            info = client.get_user()
867            self.assertEqual(info, user_info)
868
869            # Verify become user is preserved across disconnect
870            client.disconnect()
871
872            info = client.get_user()
873            self.assertEqual(info, user_info)
874
875            # test-user doesn't have become_user permissions, so this should
876            # not work
877            with self.assertRaises(InvokeError):
878                client.become_user(user["username"])
879
880        # No self-service of become
881        with self.auth_client(user) as client, self.assertRaises(InvokeError):
882            client.become_user(user["username"])
883
884        # Give test user permissions to become
885        admin_client.set_user_perms(user["username"], ["@user-admin"])
886
887        # It's possible to become yourself (effectively a noop)
888        with self.auth_perms("@user-admin") as client:
889            become = client.become_user(client.username)
890
891    def test_auth_gc(self):
892        admin_client = self.start_auth_server()
893
894        with self.auth_perms() as client, self.assertRaises(InvokeError):
895            client.gc_mark("ABC", {"unihash": "123"})
896
897        with self.auth_perms() as client, self.assertRaises(InvokeError):
898            client.gc_status()
899
900        with self.auth_perms() as client, self.assertRaises(InvokeError):
901            client.gc_sweep("ABC")
902
903        with self.auth_perms("@db-admin") as client:
904            client.gc_mark("ABC", {"unihash": "123"})
905
906        with self.auth_perms("@db-admin") as client:
907            client.gc_status()
908
909        with self.auth_perms("@db-admin") as client:
910            client.gc_sweep("ABC")
911
912    def test_get_db_usage(self):
913        usage = self.client.get_db_usage()
914
915        self.assertTrue(isinstance(usage, dict))
916        for name in usage.keys():
917            self.assertTrue(isinstance(usage[name], dict))
918            self.assertIn("rows", usage[name])
919            self.assertTrue(isinstance(usage[name]["rows"], int))
920
921    def test_get_db_query_columns(self):
922        columns = self.client.get_db_query_columns()
923
924        self.assertTrue(isinstance(columns, list))
925        self.assertTrue(len(columns) > 0)
926
927        for col in columns:
928            self.client.remove({col: ""})
929
930    def test_auth_is_owner(self):
931        admin_client = self.start_auth_server()
932
933        user = self.create_user("test-user", ["@read", "@report"])
934        with self.auth_client(user) as client:
935            taskhash, outhash, unihash = self.create_test_hash(client)
936            data = client.get_taskhash(self.METHOD, taskhash, True)
937            self.assertEqual(data["owner"], user["username"])
938
939    def test_gc(self):
940        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
941        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
942        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
943
944        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
945        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
946
947        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
948        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
949        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
950
951        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
952        self.assertClientGetHash(self.client, taskhash2, unihash2)
953
954        # Mark the first unihash to be kept
955        ret = self.client.gc_mark("ABC", {"unihash": unihash, "method": self.METHOD})
956        self.assertEqual(ret, {"count": 1})
957
958        ret = self.client.gc_status()
959        self.assertEqual(ret, {"mark": "ABC", "keep": 1, "remove": 1})
960
961        # Second hash is still there; mark doesn't delete hashes
962        self.assertClientGetHash(self.client, taskhash2, unihash2)
963
964        ret = self.client.gc_sweep("ABC")
965        self.assertEqual(ret, {"count": 1})
966
967        # Hash is gone. Taskhash is returned for second hash
968        self.assertClientGetHash(self.client, taskhash2, None)
969        # First hash is still present
970        self.assertClientGetHash(self.client, taskhash, unihash)
971
972    def test_gc_stream(self):
973        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
974        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
975        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
976
977        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
978        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
979
980        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
981        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
982        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
983
984        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
985        self.assertClientGetHash(self.client, taskhash2, unihash2)
986
987        taskhash3 = 'a1117c1f5a7c9ab2f5a39cc6fe5e6152169d09c0'
988        outhash3 = '7289c414905303700a1117c1f5a7c9ab2f5a39cc6fe5e6152169d09c04f9a53c'
989        unihash3 = '905303700a1117c1f5a7c9ab2f5a39cc6fe5e615'
990
991        result = self.client.report_unihash(taskhash3, self.METHOD, outhash3, unihash3)
992        self.assertClientGetHash(self.client, taskhash3, unihash3)
993
994        # Mark the first unihash to be kept
995        ret = self.client.gc_mark_stream("ABC", (f"unihash {h}" for h in [unihash, unihash2]))
996        self.assertEqual(ret, {"count": 2})
997
998        ret = self.client.gc_status()
999        self.assertEqual(ret, {"mark": "ABC", "keep": 2, "remove": 1})
1000
1001        # Third hash is still there; mark doesn't delete hashes
1002        self.assertClientGetHash(self.client, taskhash3, unihash3)
1003
1004        ret = self.client.gc_sweep("ABC")
1005        self.assertEqual(ret, {"count": 1})
1006
1007        # Hash is gone. Taskhash is returned for second hash
1008        self.assertClientGetHash(self.client, taskhash3, None)
1009        # First hash is still present
1010        self.assertClientGetHash(self.client, taskhash, unihash)
1011        # Second hash is still present
1012        self.assertClientGetHash(self.client, taskhash2, unihash2)
1013
1014    def test_gc_switch_mark(self):
1015        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
1016        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
1017        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
1018
1019        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
1020        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
1021
1022        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
1023        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
1024        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
1025
1026        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
1027        self.assertClientGetHash(self.client, taskhash2, unihash2)
1028
1029        # Mark the first unihash to be kept
1030        ret = self.client.gc_mark("ABC", {"unihash": unihash, "method": self.METHOD})
1031        self.assertEqual(ret, {"count": 1})
1032
1033        ret = self.client.gc_status()
1034        self.assertEqual(ret, {"mark": "ABC", "keep": 1, "remove": 1})
1035
1036        # Second hash is still there; mark doesn't delete hashes
1037        self.assertClientGetHash(self.client, taskhash2, unihash2)
1038
1039        # Switch to a different mark and mark the second hash. This will start
1040        # a new collection cycle
1041        ret = self.client.gc_mark("DEF", {"unihash": unihash2, "method": self.METHOD})
1042        self.assertEqual(ret, {"count": 1})
1043
1044        ret = self.client.gc_status()
1045        self.assertEqual(ret, {"mark": "DEF", "keep": 1, "remove": 1})
1046
1047        # Both hashes are still present
1048        self.assertClientGetHash(self.client, taskhash2, unihash2)
1049        self.assertClientGetHash(self.client, taskhash, unihash)
1050
1051        # Sweep with the new mark
1052        ret = self.client.gc_sweep("DEF")
1053        self.assertEqual(ret, {"count": 1})
1054
1055        # First hash is gone, second is kept
1056        self.assertClientGetHash(self.client, taskhash2, unihash2)
1057        self.assertClientGetHash(self.client, taskhash, None)
1058
1059    def test_gc_switch_sweep_mark(self):
1060        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
1061        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
1062        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
1063
1064        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
1065        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
1066
1067        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
1068        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
1069        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
1070
1071        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
1072        self.assertClientGetHash(self.client, taskhash2, unihash2)
1073
1074        # Mark the first unihash to be kept
1075        ret = self.client.gc_mark("ABC", {"unihash": unihash, "method": self.METHOD})
1076        self.assertEqual(ret, {"count": 1})
1077
1078        ret = self.client.gc_status()
1079        self.assertEqual(ret, {"mark": "ABC", "keep": 1, "remove": 1})
1080
1081        # Sweeping with a different mark raises an error
1082        with self.assertRaises(InvokeError):
1083            self.client.gc_sweep("DEF")
1084
1085        # Both hashes are present
1086        self.assertClientGetHash(self.client, taskhash2, unihash2)
1087        self.assertClientGetHash(self.client, taskhash, unihash)
1088
1089    def test_gc_new_hashes(self):
1090        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
1091        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
1092        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
1093
1094        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
1095        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
1096
1097        # Start a new garbage collection
1098        ret = self.client.gc_mark("ABC", {"unihash": unihash, "method": self.METHOD})
1099        self.assertEqual(ret, {"count": 1})
1100
1101        ret = self.client.gc_status()
1102        self.assertEqual(ret, {"mark": "ABC", "keep": 1, "remove": 0})
1103
1104        # Add second hash. It should inherit the mark from the current garbage
1105        # collection operation
1106
1107        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
1108        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
1109        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
1110
1111        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
1112        self.assertClientGetHash(self.client, taskhash2, unihash2)
1113
1114        # Sweep should remove nothing
1115        ret = self.client.gc_sweep("ABC")
1116        self.assertEqual(ret, {"count": 0})
1117
1118        # Both hashes are present
1119        self.assertClientGetHash(self.client, taskhash2, unihash2)
1120        self.assertClientGetHash(self.client, taskhash, unihash)
1121
1122
1123class TestHashEquivalenceClient(HashEquivalenceTestSetup, unittest.TestCase):
1124    def get_server_addr(self, server_idx):
1125        return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx)
1126
1127    def test_get(self):
1128        taskhash, outhash, unihash = self.create_test_hash(self.client)
1129
1130        p = self.run_hashclient(["--address", self.server_address, "get", self.METHOD, taskhash])
1131        data = json.loads(p.stdout)
1132        self.assertEqual(data["unihash"], unihash)
1133        self.assertEqual(data["outhash"], outhash)
1134        self.assertEqual(data["taskhash"], taskhash)
1135        self.assertEqual(data["method"], self.METHOD)
1136
1137    def test_get_outhash(self):
1138        taskhash, outhash, unihash = self.create_test_hash(self.client)
1139
1140        p = self.run_hashclient(["--address", self.server_address, "get-outhash", self.METHOD, outhash, taskhash])
1141        data = json.loads(p.stdout)
1142        self.assertEqual(data["unihash"], unihash)
1143        self.assertEqual(data["outhash"], outhash)
1144        self.assertEqual(data["taskhash"], taskhash)
1145        self.assertEqual(data["method"], self.METHOD)
1146
1147    def test_stats(self):
1148        p = self.run_hashclient(["--address", self.server_address, "stats"], check=True)
1149        json.loads(p.stdout)
1150
1151    def test_stress(self):
1152        self.run_hashclient(["--address", self.server_address, "stress"], check=True)
1153
1154    def test_unihash_exsits(self):
1155        taskhash, outhash, unihash = self.create_test_hash(self.client)
1156
1157        p = self.run_hashclient([
1158            "--address", self.server_address,
1159            "unihash-exists", unihash,
1160        ], check=True)
1161        self.assertEqual(p.stdout.strip(), "true")
1162
1163        p = self.run_hashclient([
1164            "--address", self.server_address,
1165            "unihash-exists", '6662e699d6e3d894b24408ff9a4031ef9b038ee8',
1166        ], check=True)
1167        self.assertEqual(p.stdout.strip(), "false")
1168
1169    def test_unihash_exsits_quiet(self):
1170        taskhash, outhash, unihash = self.create_test_hash(self.client)
1171
1172        p = self.run_hashclient([
1173            "--address", self.server_address,
1174            "unihash-exists", unihash,
1175            "--quiet",
1176        ])
1177        self.assertEqual(p.returncode, 0)
1178        self.assertEqual(p.stdout.strip(), "")
1179
1180        p = self.run_hashclient([
1181            "--address", self.server_address,
1182            "unihash-exists", '6662e699d6e3d894b24408ff9a4031ef9b038ee8',
1183            "--quiet",
1184        ])
1185        self.assertEqual(p.returncode, 1)
1186        self.assertEqual(p.stdout.strip(), "")
1187
1188    def test_remove_taskhash(self):
1189        taskhash, outhash, unihash = self.create_test_hash(self.client)
1190        self.run_hashclient([
1191            "--address", self.server_address,
1192            "remove",
1193            "--where", "taskhash", taskhash,
1194        ], check=True)
1195        self.assertClientGetHash(self.client, taskhash, None)
1196
1197        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
1198        self.assertIsNone(result_outhash)
1199
1200    def test_remove_unihash(self):
1201        taskhash, outhash, unihash = self.create_test_hash(self.client)
1202        self.run_hashclient([
1203            "--address", self.server_address,
1204            "remove",
1205            "--where", "unihash", unihash,
1206        ], check=True)
1207        self.assertClientGetHash(self.client, taskhash, None)
1208
1209    def test_remove_outhash(self):
1210        taskhash, outhash, unihash = self.create_test_hash(self.client)
1211        self.run_hashclient([
1212            "--address", self.server_address,
1213            "remove",
1214            "--where", "outhash", outhash,
1215        ], check=True)
1216
1217        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
1218        self.assertIsNone(result_outhash)
1219
1220    def test_remove_method(self):
1221        taskhash, outhash, unihash = self.create_test_hash(self.client)
1222        self.run_hashclient([
1223            "--address", self.server_address,
1224            "remove",
1225            "--where", "method", self.METHOD,
1226        ], check=True)
1227        self.assertClientGetHash(self.client, taskhash, None)
1228
1229        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
1230        self.assertIsNone(result_outhash)
1231
1232    def test_clean_unused(self):
1233        taskhash, outhash, unihash = self.create_test_hash(self.client)
1234
1235        # Clean the database, which should not remove anything because all hashes an in-use
1236        self.run_hashclient([
1237            "--address", self.server_address,
1238            "clean-unused", "0",
1239        ], check=True)
1240        self.assertClientGetHash(self.client, taskhash, unihash)
1241
1242        # Remove the unihash. The row in the outhash table should still be present
1243        self.run_hashclient([
1244            "--address", self.server_address,
1245            "remove",
1246            "--where", "unihash", unihash,
1247        ], check=True)
1248        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
1249        self.assertIsNotNone(result_outhash)
1250
1251        # Now clean with no minimum age which will remove the outhash
1252        self.run_hashclient([
1253            "--address", self.server_address,
1254            "clean-unused", "0",
1255        ], check=True)
1256        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
1257        self.assertIsNone(result_outhash)
1258
1259    def test_refresh_token(self):
1260        admin_client = self.start_auth_server()
1261
1262        user = admin_client.new_user("test-user", ["@read", "@report"])
1263
1264        p = self.run_hashclient([
1265            "--address", self.auth_server_address,
1266            "--login", user["username"],
1267            "--password", user["token"],
1268            "refresh-token"
1269        ], check=True)
1270
1271        new_token = None
1272        for l in p.stdout.splitlines():
1273            l = l.rstrip()
1274            m = re.match(r'Token: +(.*)$', l)
1275            if m is not None:
1276                new_token = m.group(1)
1277
1278        self.assertTrue(new_token)
1279
1280        print("New token is %r" % new_token)
1281
1282        self.run_hashclient([
1283            "--address", self.auth_server_address,
1284            "--login", user["username"],
1285            "--password", new_token,
1286            "get-user"
1287        ], check=True)
1288
1289    def test_set_user_perms(self):
1290        admin_client = self.start_auth_server()
1291
1292        user = admin_client.new_user("test-user", ["@read"])
1293
1294        self.run_hashclient([
1295            "--address", self.auth_server_address,
1296            "--login", admin_client.username,
1297            "--password", admin_client.password,
1298            "set-user-perms",
1299            "-u", user["username"],
1300            "@read", "@report",
1301        ], check=True)
1302
1303        new_user = admin_client.get_user(user["username"])
1304
1305        self.assertEqual(set(new_user["permissions"]), {"@read", "@report"})
1306
1307    def test_get_user(self):
1308        admin_client = self.start_auth_server()
1309
1310        user = admin_client.new_user("test-user", ["@read"])
1311
1312        p = self.run_hashclient([
1313            "--address", self.auth_server_address,
1314            "--login", admin_client.username,
1315            "--password", admin_client.password,
1316            "get-user",
1317            "-u", user["username"],
1318        ], check=True)
1319
1320        self.assertIn("Username:", p.stdout)
1321        self.assertIn("Permissions:", p.stdout)
1322
1323        p = self.run_hashclient([
1324            "--address", self.auth_server_address,
1325            "--login", user["username"],
1326            "--password", user["token"],
1327            "get-user",
1328        ], check=True)
1329
1330        self.assertIn("Username:", p.stdout)
1331        self.assertIn("Permissions:", p.stdout)
1332
1333    def test_get_all_users(self):
1334        admin_client = self.start_auth_server()
1335
1336        admin_client.new_user("test-user1", ["@read"])
1337        admin_client.new_user("test-user2", ["@read"])
1338
1339        p = self.run_hashclient([
1340            "--address", self.auth_server_address,
1341            "--login", admin_client.username,
1342            "--password", admin_client.password,
1343            "get-all-users",
1344        ], check=True)
1345
1346        self.assertIn("admin", p.stdout)
1347        self.assertIn("test-user1", p.stdout)
1348        self.assertIn("test-user2", p.stdout)
1349
1350    def test_new_user(self):
1351        admin_client = self.start_auth_server()
1352
1353        p = self.run_hashclient([
1354            "--address", self.auth_server_address,
1355            "--login", admin_client.username,
1356            "--password", admin_client.password,
1357            "new-user",
1358            "-u", "test-user",
1359            "@read", "@report",
1360        ], check=True)
1361
1362        new_token = None
1363        for l in p.stdout.splitlines():
1364            l = l.rstrip()
1365            m = re.match(r'Token: +(.*)$', l)
1366            if m is not None:
1367                new_token = m.group(1)
1368
1369        self.assertTrue(new_token)
1370
1371        user = {
1372            "username": "test-user",
1373            "token": new_token,
1374        }
1375
1376        self.assertUserPerms(user, ["@read", "@report"])
1377
1378    def test_delete_user(self):
1379        admin_client = self.start_auth_server()
1380
1381        user = admin_client.new_user("test-user", ["@read"])
1382
1383        p = self.run_hashclient([
1384            "--address", self.auth_server_address,
1385            "--login", admin_client.username,
1386            "--password", admin_client.password,
1387            "delete-user",
1388            "-u", user["username"],
1389        ], check=True)
1390
1391        self.assertIsNone(admin_client.get_user(user["username"]))
1392
1393    def test_get_db_usage(self):
1394        p = self.run_hashclient([
1395            "--address", self.server_address,
1396            "get-db-usage",
1397        ], check=True)
1398
1399    def test_get_db_query_columns(self):
1400        p = self.run_hashclient([
1401            "--address", self.server_address,
1402            "get-db-query-columns",
1403        ], check=True)
1404
1405    def test_gc(self):
1406        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
1407        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
1408        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
1409
1410        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
1411        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
1412
1413        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
1414        outhash2 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
1415        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
1416
1417        result = self.client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
1418        self.assertClientGetHash(self.client, taskhash2, unihash2)
1419
1420        # Mark the first unihash to be kept
1421        self.run_hashclient([
1422            "--address", self.server_address,
1423            "gc-mark", "ABC",
1424            "--where", "unihash", unihash,
1425            "--where", "method", self.METHOD
1426        ], check=True)
1427
1428        # Second hash is still there; mark doesn't delete hashes
1429        self.assertClientGetHash(self.client, taskhash2, unihash2)
1430
1431        self.run_hashclient([
1432            "--address", self.server_address,
1433            "gc-sweep", "ABC",
1434        ], check=True)
1435
1436        # Hash is gone. Taskhash is returned for second hash
1437        self.assertClientGetHash(self.client, taskhash2, None)
1438        # First hash is still present
1439        self.assertClientGetHash(self.client, taskhash, unihash)
1440
1441
1442class TestHashEquivalenceUnixServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
1443    def get_server_addr(self, server_idx):
1444        return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx)
1445
1446
1447class TestHashEquivalenceUnixServerLongPath(HashEquivalenceTestSetup, unittest.TestCase):
1448    DEEP_DIRECTORY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ccccccccccccccccccccccccccccccccccccccccccc"
1449    def get_server_addr(self, server_idx):
1450        os.makedirs(os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY), exist_ok=True)
1451        return "unix://" + os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY, 'sock%d' % server_idx)
1452
1453
1454    def test_long_sock_path(self):
1455        # Simple test that hashes can be created
1456        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
1457        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
1458        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
1459
1460        self.assertClientGetHash(self.client, taskhash, None)
1461
1462        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
1463        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
1464
1465
1466class TestHashEquivalenceTCPServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
1467    def get_server_addr(self, server_idx):
1468        # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled.
1469        # If IPv6 is enabled, it should be safe to use localhost directly, in general
1470        # case it is more reliable to resolve the IP address explicitly.
1471        return socket.gethostbyname("localhost") + ":0"
1472
1473
1474class TestHashEquivalenceWebsocketServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
1475    def setUp(self):
1476        try:
1477            import websockets
1478        except ImportError as e:
1479            self.skipTest(str(e))
1480
1481        super().setUp()
1482
1483    def get_server_addr(self, server_idx):
1484        # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled.
1485        # If IPv6 is enabled, it should be safe to use localhost directly, in general
1486        # case it is more reliable to resolve the IP address explicitly.
1487        host = socket.gethostbyname("localhost")
1488        return "ws://%s:0" % host
1489
1490
1491class TestHashEquivalenceWebsocketsSQLAlchemyServer(TestHashEquivalenceWebsocketServer):
1492    def setUp(self):
1493        try:
1494            import sqlalchemy
1495            import aiosqlite
1496        except ImportError as e:
1497            self.skipTest(str(e))
1498
1499        super().setUp()
1500
1501    def make_dbpath(self):
1502        return "sqlite+aiosqlite:///%s" % os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index)
1503
1504
1505class TestHashEquivalenceExternalServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
1506    def get_env(self, name):
1507        v = os.environ.get(name)
1508        if not v:
1509            self.skipTest(f'{name} not defined to test an external server')
1510        return v
1511
1512    def start_test_server(self):
1513        return self.get_env('BB_TEST_HASHSERV')
1514
1515    def start_server(self, *args, **kwargs):
1516        self.skipTest('Cannot start local server when testing external servers')
1517
1518    def start_auth_server(self):
1519
1520        self.auth_server_address = self.server_address
1521        self.admin_client = self.start_client(
1522            self.server_address,
1523            username=self.get_env('BB_TEST_HASHSERV_USERNAME'),
1524            password=self.get_env('BB_TEST_HASHSERV_PASSWORD'),
1525        )
1526        return self.admin_client
1527
1528    def setUp(self):
1529        super().setUp()
1530        if "BB_TEST_HASHSERV_USERNAME" in os.environ:
1531            self.client = self.start_client(
1532                self.server_address,
1533                username=os.environ["BB_TEST_HASHSERV_USERNAME"],
1534                password=os.environ["BB_TEST_HASHSERV_PASSWORD"],
1535            )
1536        self.client.remove({"method": self.METHOD})
1537
1538    def tearDown(self):
1539        self.client.remove({"method": self.METHOD})
1540        super().tearDown()
1541
1542
1543    def test_auth_get_all_users(self):
1544        self.skipTest("Cannot test all users with external server")
1545
1546