xref: /openbmc/linux/kernel/rcu/refscale.c (revision 83c4a4ee)
1 // SPDX-License-Identifier: GPL-2.0+
2 //
3 // Scalability test comparing RCU vs other mechanisms
4 // for acquiring references on objects.
5 //
6 // Copyright (C) Google, 2020.
7 //
8 // Author: Joel Fernandes <joel@joelfernandes.org>
9 
10 #define pr_fmt(fmt) fmt
11 
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/completion.h>
15 #include <linux/cpu.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/kthread.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/module.h>
24 #include <linux/moduleparam.h>
25 #include <linux/notifier.h>
26 #include <linux/percpu.h>
27 #include <linux/rcupdate.h>
28 #include <linux/rcupdate_trace.h>
29 #include <linux/reboot.h>
30 #include <linux/sched.h>
31 #include <linux/spinlock.h>
32 #include <linux/smp.h>
33 #include <linux/stat.h>
34 #include <linux/srcu.h>
35 #include <linux/slab.h>
36 #include <linux/torture.h>
37 #include <linux/types.h>
38 
39 #include "rcu.h"
40 
41 #define SCALE_FLAG "-ref-scale: "
42 
43 #define SCALEOUT(s, x...) \
44 	pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
45 
46 #define VERBOSE_SCALEOUT(s, x...) \
47 	do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0)
48 
49 #define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \
50 	do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0)
51 
52 MODULE_LICENSE("GPL");
53 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
54 
55 static char *scale_type = "rcu";
56 module_param(scale_type, charp, 0444);
57 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
58 
59 torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
60 
61 // Wait until there are multiple CPUs before starting test.
62 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
63 	      "Holdoff time before test start (s)");
64 // Number of loops per experiment, all readers execute operations concurrently.
65 torture_param(long, loops, 10000, "Number of loops per experiment.");
66 // Number of readers, with -1 defaulting to about 75% of the CPUs.
67 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
68 // Number of runs.
69 torture_param(int, nruns, 30, "Number of experiments to run.");
70 // Reader delay in nanoseconds, 0 for no delay.
71 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
72 
73 #ifdef MODULE
74 # define REFSCALE_SHUTDOWN 0
75 #else
76 # define REFSCALE_SHUTDOWN 1
77 #endif
78 
79 torture_param(bool, shutdown, REFSCALE_SHUTDOWN,
80 	      "Shutdown at end of scalability tests.");
81 
82 struct reader_task {
83 	struct task_struct *task;
84 	int start_reader;
85 	wait_queue_head_t wq;
86 	u64 last_duration_ns;
87 };
88 
89 static struct task_struct *shutdown_task;
90 static wait_queue_head_t shutdown_wq;
91 
92 static struct task_struct *main_task;
93 static wait_queue_head_t main_wq;
94 static int shutdown_start;
95 
96 static struct reader_task *reader_tasks;
97 
98 // Number of readers that are part of the current experiment.
99 static atomic_t nreaders_exp;
100 
101 // Use to wait for all threads to start.
102 static atomic_t n_init;
103 static atomic_t n_started;
104 static atomic_t n_warmedup;
105 static atomic_t n_cooleddown;
106 
107 // Track which experiment is currently running.
108 static int exp_idx;
109 
110 // Operations vector for selecting different types of tests.
111 struct ref_scale_ops {
112 	void (*init)(void);
113 	void (*cleanup)(void);
114 	void (*readsection)(const int nloops);
115 	void (*delaysection)(const int nloops, const int udl, const int ndl);
116 	const char *name;
117 };
118 
119 static struct ref_scale_ops *cur_ops;
120 
121 static void un_delay(const int udl, const int ndl)
122 {
123 	if (udl)
124 		udelay(udl);
125 	if (ndl)
126 		ndelay(ndl);
127 }
128 
129 static void ref_rcu_read_section(const int nloops)
130 {
131 	int i;
132 
133 	for (i = nloops; i >= 0; i--) {
134 		rcu_read_lock();
135 		rcu_read_unlock();
136 	}
137 }
138 
139 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl)
140 {
141 	int i;
142 
143 	for (i = nloops; i >= 0; i--) {
144 		rcu_read_lock();
145 		un_delay(udl, ndl);
146 		rcu_read_unlock();
147 	}
148 }
149 
150 static void rcu_sync_scale_init(void)
151 {
152 }
153 
154 static struct ref_scale_ops rcu_ops = {
155 	.init		= rcu_sync_scale_init,
156 	.readsection	= ref_rcu_read_section,
157 	.delaysection	= ref_rcu_delay_section,
158 	.name		= "rcu"
159 };
160 
161 // Definitions for SRCU ref scale testing.
162 DEFINE_STATIC_SRCU(srcu_refctl_scale);
163 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale;
164 
165 static void srcu_ref_scale_read_section(const int nloops)
166 {
167 	int i;
168 	int idx;
169 
170 	for (i = nloops; i >= 0; i--) {
171 		idx = srcu_read_lock(srcu_ctlp);
172 		srcu_read_unlock(srcu_ctlp, idx);
173 	}
174 }
175 
176 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
177 {
178 	int i;
179 	int idx;
180 
181 	for (i = nloops; i >= 0; i--) {
182 		idx = srcu_read_lock(srcu_ctlp);
183 		un_delay(udl, ndl);
184 		srcu_read_unlock(srcu_ctlp, idx);
185 	}
186 }
187 
188 static struct ref_scale_ops srcu_ops = {
189 	.init		= rcu_sync_scale_init,
190 	.readsection	= srcu_ref_scale_read_section,
191 	.delaysection	= srcu_ref_scale_delay_section,
192 	.name		= "srcu"
193 };
194 
195 // Definitions for RCU Tasks ref scale testing: Empty read markers.
196 // These definitions also work for RCU Rude readers.
197 static void rcu_tasks_ref_scale_read_section(const int nloops)
198 {
199 	int i;
200 
201 	for (i = nloops; i >= 0; i--)
202 		continue;
203 }
204 
205 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
206 {
207 	int i;
208 
209 	for (i = nloops; i >= 0; i--)
210 		un_delay(udl, ndl);
211 }
212 
213 static struct ref_scale_ops rcu_tasks_ops = {
214 	.init		= rcu_sync_scale_init,
215 	.readsection	= rcu_tasks_ref_scale_read_section,
216 	.delaysection	= rcu_tasks_ref_scale_delay_section,
217 	.name		= "rcu-tasks"
218 };
219 
220 // Definitions for RCU Tasks Trace ref scale testing.
221 static void rcu_trace_ref_scale_read_section(const int nloops)
222 {
223 	int i;
224 
225 	for (i = nloops; i >= 0; i--) {
226 		rcu_read_lock_trace();
227 		rcu_read_unlock_trace();
228 	}
229 }
230 
231 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
232 {
233 	int i;
234 
235 	for (i = nloops; i >= 0; i--) {
236 		rcu_read_lock_trace();
237 		un_delay(udl, ndl);
238 		rcu_read_unlock_trace();
239 	}
240 }
241 
242 static struct ref_scale_ops rcu_trace_ops = {
243 	.init		= rcu_sync_scale_init,
244 	.readsection	= rcu_trace_ref_scale_read_section,
245 	.delaysection	= rcu_trace_ref_scale_delay_section,
246 	.name		= "rcu-trace"
247 };
248 
249 // Definitions for reference count
250 static atomic_t refcnt;
251 
252 static void ref_refcnt_section(const int nloops)
253 {
254 	int i;
255 
256 	for (i = nloops; i >= 0; i--) {
257 		atomic_inc(&refcnt);
258 		atomic_dec(&refcnt);
259 	}
260 }
261 
262 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl)
263 {
264 	int i;
265 
266 	for (i = nloops; i >= 0; i--) {
267 		atomic_inc(&refcnt);
268 		un_delay(udl, ndl);
269 		atomic_dec(&refcnt);
270 	}
271 }
272 
273 static struct ref_scale_ops refcnt_ops = {
274 	.init		= rcu_sync_scale_init,
275 	.readsection	= ref_refcnt_section,
276 	.delaysection	= ref_refcnt_delay_section,
277 	.name		= "refcnt"
278 };
279 
280 // Definitions for rwlock
281 static rwlock_t test_rwlock;
282 
283 static void ref_rwlock_init(void)
284 {
285 	rwlock_init(&test_rwlock);
286 }
287 
288 static void ref_rwlock_section(const int nloops)
289 {
290 	int i;
291 
292 	for (i = nloops; i >= 0; i--) {
293 		read_lock(&test_rwlock);
294 		read_unlock(&test_rwlock);
295 	}
296 }
297 
298 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl)
299 {
300 	int i;
301 
302 	for (i = nloops; i >= 0; i--) {
303 		read_lock(&test_rwlock);
304 		un_delay(udl, ndl);
305 		read_unlock(&test_rwlock);
306 	}
307 }
308 
309 static struct ref_scale_ops rwlock_ops = {
310 	.init		= ref_rwlock_init,
311 	.readsection	= ref_rwlock_section,
312 	.delaysection	= ref_rwlock_delay_section,
313 	.name		= "rwlock"
314 };
315 
316 // Definitions for rwsem
317 static struct rw_semaphore test_rwsem;
318 
319 static void ref_rwsem_init(void)
320 {
321 	init_rwsem(&test_rwsem);
322 }
323 
324 static void ref_rwsem_section(const int nloops)
325 {
326 	int i;
327 
328 	for (i = nloops; i >= 0; i--) {
329 		down_read(&test_rwsem);
330 		up_read(&test_rwsem);
331 	}
332 }
333 
334 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl)
335 {
336 	int i;
337 
338 	for (i = nloops; i >= 0; i--) {
339 		down_read(&test_rwsem);
340 		un_delay(udl, ndl);
341 		up_read(&test_rwsem);
342 	}
343 }
344 
345 static struct ref_scale_ops rwsem_ops = {
346 	.init		= ref_rwsem_init,
347 	.readsection	= ref_rwsem_section,
348 	.delaysection	= ref_rwsem_delay_section,
349 	.name		= "rwsem"
350 };
351 
352 static void rcu_scale_one_reader(void)
353 {
354 	if (readdelay <= 0)
355 		cur_ops->readsection(loops);
356 	else
357 		cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
358 }
359 
360 // Reader kthread.  Repeatedly does empty RCU read-side
361 // critical section, minimizing update-side interference.
362 static int
363 ref_scale_reader(void *arg)
364 {
365 	unsigned long flags;
366 	long me = (long)arg;
367 	struct reader_task *rt = &(reader_tasks[me]);
368 	u64 start;
369 	s64 duration;
370 
371 	VERBOSE_SCALEOUT("ref_scale_reader %ld: task started", me);
372 	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
373 	set_user_nice(current, MAX_NICE);
374 	atomic_inc(&n_init);
375 	if (holdoff)
376 		schedule_timeout_interruptible(holdoff * HZ);
377 repeat:
378 	VERBOSE_SCALEOUT("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, smp_processor_id());
379 
380 	// Wait for signal that this reader can start.
381 	wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) ||
382 			   torture_must_stop());
383 
384 	if (torture_must_stop())
385 		goto end;
386 
387 	// Make sure that the CPU is affinitized appropriately during testing.
388 	WARN_ON_ONCE(smp_processor_id() != me);
389 
390 	WRITE_ONCE(rt->start_reader, 0);
391 	if (!atomic_dec_return(&n_started))
392 		while (atomic_read_acquire(&n_started))
393 			cpu_relax();
394 
395 	VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d started", me, exp_idx);
396 
397 
398 	// To reduce noise, do an initial cache-warming invocation, check
399 	// in, and then keep warming until everyone has checked in.
400 	rcu_scale_one_reader();
401 	if (!atomic_dec_return(&n_warmedup))
402 		while (atomic_read_acquire(&n_warmedup))
403 			rcu_scale_one_reader();
404 	// Also keep interrupts disabled.  This also has the effect
405 	// of preventing entries into slow path for rcu_read_unlock().
406 	local_irq_save(flags);
407 	start = ktime_get_mono_fast_ns();
408 
409 	rcu_scale_one_reader();
410 
411 	duration = ktime_get_mono_fast_ns() - start;
412 	local_irq_restore(flags);
413 
414 	rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
415 	// To reduce runtime-skew noise, do maintain-load invocations until
416 	// everyone is done.
417 	if (!atomic_dec_return(&n_cooleddown))
418 		while (atomic_read_acquire(&n_cooleddown))
419 			rcu_scale_one_reader();
420 
421 	if (atomic_dec_and_test(&nreaders_exp))
422 		wake_up(&main_wq);
423 
424 	VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)",
425 			me, exp_idx, atomic_read(&nreaders_exp));
426 
427 	if (!torture_must_stop())
428 		goto repeat;
429 end:
430 	torture_kthread_stopping("ref_scale_reader");
431 	return 0;
432 }
433 
434 static void reset_readers(void)
435 {
436 	int i;
437 	struct reader_task *rt;
438 
439 	for (i = 0; i < nreaders; i++) {
440 		rt = &(reader_tasks[i]);
441 
442 		rt->last_duration_ns = 0;
443 	}
444 }
445 
446 // Print the results of each reader and return the sum of all their durations.
447 static u64 process_durations(int n)
448 {
449 	int i;
450 	struct reader_task *rt;
451 	char buf1[64];
452 	char *buf;
453 	u64 sum = 0;
454 
455 	buf = kmalloc(128 + nreaders * 32, GFP_KERNEL);
456 	if (!buf)
457 		return 0;
458 	buf[0] = 0;
459 	sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
460 		exp_idx);
461 
462 	for (i = 0; i < n && !torture_must_stop(); i++) {
463 		rt = &(reader_tasks[i]);
464 		sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns);
465 
466 		if (i % 5 == 0)
467 			strcat(buf, "\n");
468 		strcat(buf, buf1);
469 
470 		sum += rt->last_duration_ns;
471 	}
472 	strcat(buf, "\n");
473 
474 	SCALEOUT("%s\n", buf);
475 
476 	kfree(buf);
477 	return sum;
478 }
479 
480 // The main_func is the main orchestrator, it performs a bunch of
481 // experiments.  For every experiment, it orders all the readers
482 // involved to start and waits for them to finish the experiment. It
483 // then reads their timestamps and starts the next experiment. Each
484 // experiment progresses from 1 concurrent reader to N of them at which
485 // point all the timestamps are printed.
486 static int main_func(void *arg)
487 {
488 	bool errexit = false;
489 	int exp, r;
490 	char buf1[64];
491 	char *buf;
492 	u64 *result_avg;
493 
494 	set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids));
495 	set_user_nice(current, MAX_NICE);
496 
497 	VERBOSE_SCALEOUT("main_func task started");
498 	result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
499 	buf = kzalloc(64 + nruns * 32, GFP_KERNEL);
500 	if (!result_avg || !buf) {
501 		VERBOSE_SCALEOUT_ERRSTRING("out of memory");
502 		errexit = true;
503 	}
504 	if (holdoff)
505 		schedule_timeout_interruptible(holdoff * HZ);
506 
507 	// Wait for all threads to start.
508 	atomic_inc(&n_init);
509 	while (atomic_read(&n_init) < nreaders + 1)
510 		schedule_timeout_uninterruptible(1);
511 
512 	// Start exp readers up per experiment
513 	for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
514 		if (errexit)
515 			break;
516 		if (torture_must_stop())
517 			goto end;
518 
519 		reset_readers();
520 		atomic_set(&nreaders_exp, nreaders);
521 		atomic_set(&n_started, nreaders);
522 		atomic_set(&n_warmedup, nreaders);
523 		atomic_set(&n_cooleddown, nreaders);
524 
525 		exp_idx = exp;
526 
527 		for (r = 0; r < nreaders; r++) {
528 			smp_store_release(&reader_tasks[r].start_reader, 1);
529 			wake_up(&reader_tasks[r].wq);
530 		}
531 
532 		VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers",
533 				nreaders);
534 
535 		wait_event(main_wq,
536 			   !atomic_read(&nreaders_exp) || torture_must_stop());
537 
538 		VERBOSE_SCALEOUT("main_func: experiment ended");
539 
540 		if (torture_must_stop())
541 			goto end;
542 
543 		result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
544 	}
545 
546 	// Print the average of all experiments
547 	SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
548 
549 	if (!errexit) {
550 		buf[0] = 0;
551 		strcat(buf, "\n");
552 		strcat(buf, "Runs\tTime(ns)\n");
553 	}
554 
555 	for (exp = 0; exp < nruns; exp++) {
556 		u64 avg;
557 		u32 rem;
558 
559 		if (errexit)
560 			break;
561 		avg = div_u64_rem(result_avg[exp], 1000, &rem);
562 		sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem);
563 		strcat(buf, buf1);
564 	}
565 
566 	if (!errexit)
567 		SCALEOUT("%s", buf);
568 
569 	// This will shutdown everything including us.
570 	if (shutdown) {
571 		shutdown_start = 1;
572 		wake_up(&shutdown_wq);
573 	}
574 
575 	// Wait for torture to stop us
576 	while (!torture_must_stop())
577 		schedule_timeout_uninterruptible(1);
578 
579 end:
580 	torture_kthread_stopping("main_func");
581 	kfree(result_avg);
582 	kfree(buf);
583 	return 0;
584 }
585 
586 static void
587 ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag)
588 {
589 	pr_alert("%s" SCALE_FLAG
590 		 "--- %s:  verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
591 		 verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay);
592 }
593 
594 static void
595 ref_scale_cleanup(void)
596 {
597 	int i;
598 
599 	if (torture_cleanup_begin())
600 		return;
601 
602 	if (!cur_ops) {
603 		torture_cleanup_end();
604 		return;
605 	}
606 
607 	if (reader_tasks) {
608 		for (i = 0; i < nreaders; i++)
609 			torture_stop_kthread("ref_scale_reader",
610 					     reader_tasks[i].task);
611 	}
612 	kfree(reader_tasks);
613 
614 	torture_stop_kthread("main_task", main_task);
615 	kfree(main_task);
616 
617 	// Do scale-type-specific cleanup operations.
618 	if (cur_ops->cleanup != NULL)
619 		cur_ops->cleanup();
620 
621 	torture_cleanup_end();
622 }
623 
624 // Shutdown kthread.  Just waits to be awakened, then shuts down system.
625 static int
626 ref_scale_shutdown(void *arg)
627 {
628 	wait_event(shutdown_wq, shutdown_start);
629 
630 	smp_mb(); // Wake before output.
631 	ref_scale_cleanup();
632 	kernel_power_off();
633 
634 	return -EINVAL;
635 }
636 
637 static int __init
638 ref_scale_init(void)
639 {
640 	long i;
641 	int firsterr = 0;
642 	static struct ref_scale_ops *scale_ops[] = {
643 		&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops,
644 		&refcnt_ops, &rwlock_ops, &rwsem_ops,
645 	};
646 
647 	if (!torture_init_begin(scale_type, verbose))
648 		return -EBUSY;
649 
650 	for (i = 0; i < ARRAY_SIZE(scale_ops); i++) {
651 		cur_ops = scale_ops[i];
652 		if (strcmp(scale_type, cur_ops->name) == 0)
653 			break;
654 	}
655 	if (i == ARRAY_SIZE(scale_ops)) {
656 		pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type);
657 		pr_alert("rcu-scale types:");
658 		for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
659 			pr_cont(" %s", scale_ops[i]->name);
660 		pr_cont("\n");
661 		firsterr = -EINVAL;
662 		cur_ops = NULL;
663 		goto unwind;
664 	}
665 	if (cur_ops->init)
666 		cur_ops->init();
667 
668 	ref_scale_print_module_parms(cur_ops, "Start of test");
669 
670 	// Shutdown task
671 	if (shutdown) {
672 		init_waitqueue_head(&shutdown_wq);
673 		firsterr = torture_create_kthread(ref_scale_shutdown, NULL,
674 						  shutdown_task);
675 		if (firsterr)
676 			goto unwind;
677 		schedule_timeout_uninterruptible(1);
678 	}
679 
680 	// Reader tasks (default to ~75% of online CPUs).
681 	if (nreaders < 0)
682 		nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
683 	if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops))
684 		loops = 1;
685 	if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders))
686 		nreaders = 1;
687 	if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns))
688 		nruns = 1;
689 	reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
690 			       GFP_KERNEL);
691 	if (!reader_tasks) {
692 		VERBOSE_SCALEOUT_ERRSTRING("out of memory");
693 		firsterr = -ENOMEM;
694 		goto unwind;
695 	}
696 
697 	VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
698 
699 	for (i = 0; i < nreaders; i++) {
700 		firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
701 						  reader_tasks[i].task);
702 		if (firsterr)
703 			goto unwind;
704 
705 		init_waitqueue_head(&(reader_tasks[i].wq));
706 	}
707 
708 	// Main Task
709 	init_waitqueue_head(&main_wq);
710 	firsterr = torture_create_kthread(main_func, NULL, main_task);
711 	if (firsterr)
712 		goto unwind;
713 
714 	torture_init_end();
715 	return 0;
716 
717 unwind:
718 	torture_init_end();
719 	ref_scale_cleanup();
720 	if (shutdown) {
721 		WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
722 		kernel_power_off();
723 	}
724 	return firsterr;
725 }
726 
727 module_init(ref_scale_init);
728 module_exit(ref_scale_cleanup);
729