xref: /openbmc/linux/drivers/hwtracing/intel_th/msu.c (revision 615c164d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Intel(R) Trace Hub Memory Storage Unit
4  *
5  * Copyright (C) 2014-2015 Intel Corporation.
6  */
7 
8 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
9 
10 #include <linux/types.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/uaccess.h>
14 #include <linux/sizes.h>
15 #include <linux/printk.h>
16 #include <linux/slab.h>
17 #include <linux/mm.h>
18 #include <linux/fs.h>
19 #include <linux/io.h>
20 #include <linux/workqueue.h>
21 #include <linux/dma-mapping.h>
22 
23 #ifdef CONFIG_X86
24 #include <asm/set_memory.h>
25 #endif
26 
27 #include <linux/intel_th.h>
28 #include "intel_th.h"
29 #include "msu.h"
30 
31 #define msc_dev(x) (&(x)->thdev->dev)
32 
33 /*
34  * Lockout state transitions:
35  *   READY -> INUSE -+-> LOCKED -+-> READY -> etc.
36  *                   \-----------/
37  * WIN_READY:	window can be used by HW
38  * WIN_INUSE:	window is in use
39  * WIN_LOCKED:	window is filled up and is being processed by the buffer
40  * handling code
41  *
42  * All state transitions happen automatically, except for the LOCKED->READY,
43  * which needs to be signalled by the buffer code by calling
44  * intel_th_msc_window_unlock().
45  *
46  * When the interrupt handler has to switch to the next window, it checks
47  * whether it's READY, and if it is, it performs the switch and tracing
48  * continues. If it's LOCKED, it stops the trace.
49  */
50 enum lockout_state {
51 	WIN_READY = 0,
52 	WIN_INUSE,
53 	WIN_LOCKED
54 };
55 
56 /**
57  * struct msc_window - multiblock mode window descriptor
58  * @entry:	window list linkage (msc::win_list)
59  * @pgoff:	page offset into the buffer that this window starts at
60  * @lockout:	lockout state, see comment below
61  * @lo_lock:	lockout state serialization
62  * @nr_blocks:	number of blocks (pages) in this window
63  * @nr_segs:	number of segments in this window (<= @nr_blocks)
64  * @_sgt:	array of block descriptors
65  * @sgt:	array of block descriptors
66  */
67 struct msc_window {
68 	struct list_head	entry;
69 	unsigned long		pgoff;
70 	enum lockout_state	lockout;
71 	spinlock_t		lo_lock;
72 	unsigned int		nr_blocks;
73 	unsigned int		nr_segs;
74 	struct msc		*msc;
75 	struct sg_table		_sgt;
76 	struct sg_table		*sgt;
77 };
78 
79 /**
80  * struct msc_iter - iterator for msc buffer
81  * @entry:		msc::iter_list linkage
82  * @msc:		pointer to the MSC device
83  * @start_win:		oldest window
84  * @win:		current window
85  * @offset:		current logical offset into the buffer
86  * @start_block:	oldest block in the window
87  * @block:		block number in the window
88  * @block_off:		offset into current block
89  * @wrap_count:		block wrapping handling
90  * @eof:		end of buffer reached
91  */
92 struct msc_iter {
93 	struct list_head	entry;
94 	struct msc		*msc;
95 	struct msc_window	*start_win;
96 	struct msc_window	*win;
97 	unsigned long		offset;
98 	int			start_block;
99 	int			block;
100 	unsigned int		block_off;
101 	unsigned int		wrap_count;
102 	unsigned int		eof;
103 };
104 
105 /**
106  * struct msc - MSC device representation
107  * @reg_base:		register window base address
108  * @thdev:		intel_th_device pointer
109  * @mbuf:		MSU buffer, if assigned
110  * @mbuf_priv		MSU buffer's private data, if @mbuf
111  * @win_list:		list of windows in multiblock mode
112  * @single_sgt:		single mode buffer
113  * @cur_win:		current window
114  * @nr_pages:		total number of pages allocated for this buffer
115  * @single_sz:		amount of data in single mode
116  * @single_wrap:	single mode wrap occurred
117  * @base:		buffer's base pointer
118  * @base_addr:		buffer's base address
119  * @user_count:		number of users of the buffer
120  * @mmap_count:		number of mappings
121  * @buf_mutex:		mutex to serialize access to buffer-related bits
122 
123  * @enabled:		MSC is enabled
124  * @wrap:		wrapping is enabled
125  * @mode:		MSC operating mode
126  * @burst_len:		write burst length
127  * @index:		number of this MSC in the MSU
128  */
129 struct msc {
130 	void __iomem		*reg_base;
131 	void __iomem		*msu_base;
132 	struct intel_th_device	*thdev;
133 
134 	const struct msu_buffer	*mbuf;
135 	void			*mbuf_priv;
136 
137 	struct work_struct	work;
138 	struct list_head	win_list;
139 	struct sg_table		single_sgt;
140 	struct msc_window	*cur_win;
141 	unsigned long		nr_pages;
142 	unsigned long		single_sz;
143 	unsigned int		single_wrap : 1;
144 	void			*base;
145 	dma_addr_t		base_addr;
146 
147 	/* <0: no buffer, 0: no users, >0: active users */
148 	atomic_t		user_count;
149 
150 	atomic_t		mmap_count;
151 	struct mutex		buf_mutex;
152 
153 	struct list_head	iter_list;
154 
155 	/* config */
156 	unsigned int		enabled : 1,
157 				wrap	: 1,
158 				do_irq	: 1;
159 	unsigned int		mode;
160 	unsigned int		burst_len;
161 	unsigned int		index;
162 };
163 
164 static LIST_HEAD(msu_buffer_list);
165 static struct mutex msu_buffer_mutex;
166 
167 /**
168  * struct msu_buffer_entry - internal MSU buffer bookkeeping
169  * @entry:	link to msu_buffer_list
170  * @mbuf:	MSU buffer object
171  * @owner:	module that provides this MSU buffer
172  */
173 struct msu_buffer_entry {
174 	struct list_head	entry;
175 	const struct msu_buffer	*mbuf;
176 	struct module		*owner;
177 };
178 
179 static struct msu_buffer_entry *__msu_buffer_entry_find(const char *name)
180 {
181 	struct msu_buffer_entry *mbe;
182 
183 	lockdep_assert_held(&msu_buffer_mutex);
184 
185 	list_for_each_entry(mbe, &msu_buffer_list, entry) {
186 		if (!strcmp(mbe->mbuf->name, name))
187 			return mbe;
188 	}
189 
190 	return NULL;
191 }
192 
193 static const struct msu_buffer *
194 msu_buffer_get(const char *name)
195 {
196 	struct msu_buffer_entry *mbe;
197 
198 	mutex_lock(&msu_buffer_mutex);
199 	mbe = __msu_buffer_entry_find(name);
200 	if (mbe && !try_module_get(mbe->owner))
201 		mbe = NULL;
202 	mutex_unlock(&msu_buffer_mutex);
203 
204 	return mbe ? mbe->mbuf : NULL;
205 }
206 
207 static void msu_buffer_put(const struct msu_buffer *mbuf)
208 {
209 	struct msu_buffer_entry *mbe;
210 
211 	mutex_lock(&msu_buffer_mutex);
212 	mbe = __msu_buffer_entry_find(mbuf->name);
213 	if (mbe)
214 		module_put(mbe->owner);
215 	mutex_unlock(&msu_buffer_mutex);
216 }
217 
218 int intel_th_msu_buffer_register(const struct msu_buffer *mbuf,
219 				 struct module *owner)
220 {
221 	struct msu_buffer_entry *mbe;
222 	int ret = 0;
223 
224 	mbe = kzalloc(sizeof(*mbe), GFP_KERNEL);
225 	if (!mbe)
226 		return -ENOMEM;
227 
228 	mutex_lock(&msu_buffer_mutex);
229 	if (__msu_buffer_entry_find(mbuf->name)) {
230 		ret = -EEXIST;
231 		kfree(mbe);
232 		goto unlock;
233 	}
234 
235 	mbe->mbuf = mbuf;
236 	mbe->owner = owner;
237 	list_add_tail(&mbe->entry, &msu_buffer_list);
238 unlock:
239 	mutex_unlock(&msu_buffer_mutex);
240 
241 	return ret;
242 }
243 EXPORT_SYMBOL_GPL(intel_th_msu_buffer_register);
244 
245 void intel_th_msu_buffer_unregister(const struct msu_buffer *mbuf)
246 {
247 	struct msu_buffer_entry *mbe;
248 
249 	mutex_lock(&msu_buffer_mutex);
250 	mbe = __msu_buffer_entry_find(mbuf->name);
251 	if (mbe) {
252 		list_del(&mbe->entry);
253 		kfree(mbe);
254 	}
255 	mutex_unlock(&msu_buffer_mutex);
256 }
257 EXPORT_SYMBOL_GPL(intel_th_msu_buffer_unregister);
258 
259 static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
260 {
261 	/* header hasn't been written */
262 	if (!bdesc->valid_dw)
263 		return true;
264 
265 	/* valid_dw includes the header */
266 	if (!msc_data_sz(bdesc))
267 		return true;
268 
269 	return false;
270 }
271 
272 static inline struct msc_block_desc *
273 msc_win_block(struct msc_window *win, unsigned int block)
274 {
275 	return sg_virt(&win->sgt->sgl[block]);
276 }
277 
278 static inline size_t
279 msc_win_actual_bsz(struct msc_window *win, unsigned int block)
280 {
281 	return win->sgt->sgl[block].length;
282 }
283 
284 static inline dma_addr_t
285 msc_win_baddr(struct msc_window *win, unsigned int block)
286 {
287 	return sg_dma_address(&win->sgt->sgl[block]);
288 }
289 
290 static inline unsigned long
291 msc_win_bpfn(struct msc_window *win, unsigned int block)
292 {
293 	return msc_win_baddr(win, block) >> PAGE_SHIFT;
294 }
295 
296 /**
297  * msc_is_last_win() - check if a window is the last one for a given MSC
298  * @win:	window
299  * Return:	true if @win is the last window in MSC's multiblock buffer
300  */
301 static inline bool msc_is_last_win(struct msc_window *win)
302 {
303 	return win->entry.next == &win->msc->win_list;
304 }
305 
306 /**
307  * msc_next_window() - return next window in the multiblock buffer
308  * @win:	current window
309  *
310  * Return:	window following the current one
311  */
312 static struct msc_window *msc_next_window(struct msc_window *win)
313 {
314 	if (msc_is_last_win(win))
315 		return list_first_entry(&win->msc->win_list, struct msc_window,
316 					entry);
317 
318 	return list_next_entry(win, entry);
319 }
320 
321 static size_t msc_win_total_sz(struct msc_window *win)
322 {
323 	unsigned int blk;
324 	size_t size = 0;
325 
326 	for (blk = 0; blk < win->nr_segs; blk++) {
327 		struct msc_block_desc *bdesc = msc_win_block(win, blk);
328 
329 		if (msc_block_wrapped(bdesc))
330 			return win->nr_blocks << PAGE_SHIFT;
331 
332 		size += msc_total_sz(bdesc);
333 		if (msc_block_last_written(bdesc))
334 			break;
335 	}
336 
337 	return size;
338 }
339 
340 /**
341  * msc_find_window() - find a window matching a given sg_table
342  * @msc:	MSC device
343  * @sgt:	SG table of the window
344  * @nonempty:	skip over empty windows
345  *
346  * Return:	MSC window structure pointer or NULL if the window
347  *		could not be found.
348  */
349 static struct msc_window *
350 msc_find_window(struct msc *msc, struct sg_table *sgt, bool nonempty)
351 {
352 	struct msc_window *win;
353 	unsigned int found = 0;
354 
355 	if (list_empty(&msc->win_list))
356 		return NULL;
357 
358 	/*
359 	 * we might need a radix tree for this, depending on how
360 	 * many windows a typical user would allocate; ideally it's
361 	 * something like 2, in which case we're good
362 	 */
363 	list_for_each_entry(win, &msc->win_list, entry) {
364 		if (win->sgt == sgt)
365 			found++;
366 
367 		/* skip the empty ones */
368 		if (nonempty && msc_block_is_empty(msc_win_block(win, 0)))
369 			continue;
370 
371 		if (found)
372 			return win;
373 	}
374 
375 	return NULL;
376 }
377 
378 /**
379  * msc_oldest_window() - locate the window with oldest data
380  * @msc:	MSC device
381  *
382  * This should only be used in multiblock mode. Caller should hold the
383  * msc::user_count reference.
384  *
385  * Return:	the oldest window with valid data
386  */
387 static struct msc_window *msc_oldest_window(struct msc *msc)
388 {
389 	struct msc_window *win;
390 
391 	if (list_empty(&msc->win_list))
392 		return NULL;
393 
394 	win = msc_find_window(msc, msc_next_window(msc->cur_win)->sgt, true);
395 	if (win)
396 		return win;
397 
398 	return list_first_entry(&msc->win_list, struct msc_window, entry);
399 }
400 
401 /**
402  * msc_win_oldest_block() - locate the oldest block in a given window
403  * @win:	window to look at
404  *
405  * Return:	index of the block with the oldest data
406  */
407 static unsigned int msc_win_oldest_block(struct msc_window *win)
408 {
409 	unsigned int blk;
410 	struct msc_block_desc *bdesc = msc_win_block(win, 0);
411 
412 	/* without wrapping, first block is the oldest */
413 	if (!msc_block_wrapped(bdesc))
414 		return 0;
415 
416 	/*
417 	 * with wrapping, last written block contains both the newest and the
418 	 * oldest data for this window.
419 	 */
420 	for (blk = 0; blk < win->nr_segs; blk++) {
421 		bdesc = msc_win_block(win, blk);
422 
423 		if (msc_block_last_written(bdesc))
424 			return blk;
425 	}
426 
427 	return 0;
428 }
429 
430 static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter)
431 {
432 	return msc_win_block(iter->win, iter->block);
433 }
434 
435 static void msc_iter_init(struct msc_iter *iter)
436 {
437 	memset(iter, 0, sizeof(*iter));
438 	iter->start_block = -1;
439 	iter->block = -1;
440 }
441 
442 static struct msc_iter *msc_iter_install(struct msc *msc)
443 {
444 	struct msc_iter *iter;
445 
446 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
447 	if (!iter)
448 		return ERR_PTR(-ENOMEM);
449 
450 	mutex_lock(&msc->buf_mutex);
451 
452 	/*
453 	 * Reading and tracing are mutually exclusive; if msc is
454 	 * enabled, open() will fail; otherwise existing readers
455 	 * will prevent enabling the msc and the rest of fops don't
456 	 * need to worry about it.
457 	 */
458 	if (msc->enabled) {
459 		kfree(iter);
460 		iter = ERR_PTR(-EBUSY);
461 		goto unlock;
462 	}
463 
464 	msc_iter_init(iter);
465 	iter->msc = msc;
466 
467 	list_add_tail(&iter->entry, &msc->iter_list);
468 unlock:
469 	mutex_unlock(&msc->buf_mutex);
470 
471 	return iter;
472 }
473 
474 static void msc_iter_remove(struct msc_iter *iter, struct msc *msc)
475 {
476 	mutex_lock(&msc->buf_mutex);
477 	list_del(&iter->entry);
478 	mutex_unlock(&msc->buf_mutex);
479 
480 	kfree(iter);
481 }
482 
483 static void msc_iter_block_start(struct msc_iter *iter)
484 {
485 	if (iter->start_block != -1)
486 		return;
487 
488 	iter->start_block = msc_win_oldest_block(iter->win);
489 	iter->block = iter->start_block;
490 	iter->wrap_count = 0;
491 
492 	/*
493 	 * start with the block with oldest data; if data has wrapped
494 	 * in this window, it should be in this block
495 	 */
496 	if (msc_block_wrapped(msc_iter_bdesc(iter)))
497 		iter->wrap_count = 2;
498 
499 }
500 
501 static int msc_iter_win_start(struct msc_iter *iter, struct msc *msc)
502 {
503 	/* already started, nothing to do */
504 	if (iter->start_win)
505 		return 0;
506 
507 	iter->start_win = msc_oldest_window(msc);
508 	if (!iter->start_win)
509 		return -EINVAL;
510 
511 	iter->win = iter->start_win;
512 	iter->start_block = -1;
513 
514 	msc_iter_block_start(iter);
515 
516 	return 0;
517 }
518 
519 static int msc_iter_win_advance(struct msc_iter *iter)
520 {
521 	iter->win = msc_next_window(iter->win);
522 	iter->start_block = -1;
523 
524 	if (iter->win == iter->start_win) {
525 		iter->eof++;
526 		return 1;
527 	}
528 
529 	msc_iter_block_start(iter);
530 
531 	return 0;
532 }
533 
534 static int msc_iter_block_advance(struct msc_iter *iter)
535 {
536 	iter->block_off = 0;
537 
538 	/* wrapping */
539 	if (iter->wrap_count && iter->block == iter->start_block) {
540 		iter->wrap_count--;
541 		if (!iter->wrap_count)
542 			/* copied newest data from the wrapped block */
543 			return msc_iter_win_advance(iter);
544 	}
545 
546 	/* no wrapping, check for last written block */
547 	if (!iter->wrap_count && msc_block_last_written(msc_iter_bdesc(iter)))
548 		/* copied newest data for the window */
549 		return msc_iter_win_advance(iter);
550 
551 	/* block advance */
552 	if (++iter->block == iter->win->nr_segs)
553 		iter->block = 0;
554 
555 	/* no wrapping, sanity check in case there is no last written block */
556 	if (!iter->wrap_count && iter->block == iter->start_block)
557 		return msc_iter_win_advance(iter);
558 
559 	return 0;
560 }
561 
562 /**
563  * msc_buffer_iterate() - go through multiblock buffer's data
564  * @iter:	iterator structure
565  * @size:	amount of data to scan
566  * @data:	callback's private data
567  * @fn:		iterator callback
568  *
569  * This will start at the window which will be written to next (containing
570  * the oldest data) and work its way to the current window, calling @fn
571  * for each chunk of data as it goes.
572  *
573  * Caller should have msc::user_count reference to make sure the buffer
574  * doesn't disappear from under us.
575  *
576  * Return:	amount of data actually scanned.
577  */
578 static ssize_t
579 msc_buffer_iterate(struct msc_iter *iter, size_t size, void *data,
580 		   unsigned long (*fn)(void *, void *, size_t))
581 {
582 	struct msc *msc = iter->msc;
583 	size_t len = size;
584 	unsigned int advance;
585 
586 	if (iter->eof)
587 		return 0;
588 
589 	/* start with the oldest window */
590 	if (msc_iter_win_start(iter, msc))
591 		return 0;
592 
593 	do {
594 		unsigned long data_bytes = msc_data_sz(msc_iter_bdesc(iter));
595 		void *src = (void *)msc_iter_bdesc(iter) + MSC_BDESC;
596 		size_t tocopy = data_bytes, copied = 0;
597 		size_t remaining = 0;
598 
599 		advance = 1;
600 
601 		/*
602 		 * If block wrapping happened, we need to visit the last block
603 		 * twice, because it contains both the oldest and the newest
604 		 * data in this window.
605 		 *
606 		 * First time (wrap_count==2), in the very beginning, to collect
607 		 * the oldest data, which is in the range
608 		 * (data_bytes..DATA_IN_PAGE).
609 		 *
610 		 * Second time (wrap_count==1), it's just like any other block,
611 		 * containing data in the range of [MSC_BDESC..data_bytes].
612 		 */
613 		if (iter->block == iter->start_block && iter->wrap_count == 2) {
614 			tocopy = DATA_IN_PAGE - data_bytes;
615 			src += data_bytes;
616 		}
617 
618 		if (!tocopy)
619 			goto next_block;
620 
621 		tocopy -= iter->block_off;
622 		src += iter->block_off;
623 
624 		if (len < tocopy) {
625 			tocopy = len;
626 			advance = 0;
627 		}
628 
629 		remaining = fn(data, src, tocopy);
630 
631 		if (remaining)
632 			advance = 0;
633 
634 		copied = tocopy - remaining;
635 		len -= copied;
636 		iter->block_off += copied;
637 		iter->offset += copied;
638 
639 		if (!advance)
640 			break;
641 
642 next_block:
643 		if (msc_iter_block_advance(iter))
644 			break;
645 
646 	} while (len);
647 
648 	return size - len;
649 }
650 
651 /**
652  * msc_buffer_clear_hw_header() - clear hw header for multiblock
653  * @msc:	MSC device
654  */
655 static void msc_buffer_clear_hw_header(struct msc *msc)
656 {
657 	struct msc_window *win;
658 
659 	list_for_each_entry(win, &msc->win_list, entry) {
660 		unsigned int blk;
661 		size_t hw_sz = sizeof(struct msc_block_desc) -
662 			offsetof(struct msc_block_desc, hw_tag);
663 
664 		for (blk = 0; blk < win->nr_segs; blk++) {
665 			struct msc_block_desc *bdesc = msc_win_block(win, blk);
666 
667 			memset(&bdesc->hw_tag, 0, hw_sz);
668 		}
669 	}
670 }
671 
672 static int intel_th_msu_init(struct msc *msc)
673 {
674 	u32 mintctl, msusts;
675 
676 	if (!msc->do_irq)
677 		return 0;
678 
679 	if (!msc->mbuf)
680 		return 0;
681 
682 	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
683 	mintctl |= msc->index ? M1BLIE : M0BLIE;
684 	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
685 	if (mintctl != ioread32(msc->msu_base + REG_MSU_MINTCTL)) {
686 		dev_info(msc_dev(msc), "MINTCTL ignores writes: no usable interrupts\n");
687 		msc->do_irq = 0;
688 		return 0;
689 	}
690 
691 	msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
692 	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
693 
694 	return 0;
695 }
696 
697 static void intel_th_msu_deinit(struct msc *msc)
698 {
699 	u32 mintctl;
700 
701 	if (!msc->do_irq)
702 		return;
703 
704 	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
705 	mintctl &= msc->index ? ~M1BLIE : ~M0BLIE;
706 	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
707 }
708 
709 static int msc_win_set_lockout(struct msc_window *win,
710 			       enum lockout_state expect,
711 			       enum lockout_state new)
712 {
713 	enum lockout_state old;
714 	unsigned long flags;
715 	int ret = 0;
716 
717 	if (!win->msc->mbuf)
718 		return 0;
719 
720 	spin_lock_irqsave(&win->lo_lock, flags);
721 	old = win->lockout;
722 
723 	if (old != expect) {
724 		ret = -EINVAL;
725 		dev_warn_ratelimited(msc_dev(win->msc),
726 				     "expected lockout state %d, got %d\n",
727 				     expect, old);
728 		goto unlock;
729 	}
730 
731 	win->lockout = new;
732 
733 unlock:
734 	spin_unlock_irqrestore(&win->lo_lock, flags);
735 
736 	if (ret) {
737 		if (expect == WIN_READY && old == WIN_LOCKED)
738 			return -EBUSY;
739 
740 		/* from intel_th_msc_window_unlock(), don't warn if not locked */
741 		if (expect == WIN_LOCKED && old == new)
742 			return 0;
743 	}
744 
745 	return ret;
746 }
747 /**
748  * msc_configure() - set up MSC hardware
749  * @msc:	the MSC device to configure
750  *
751  * Program storage mode, wrapping, burst length and trace buffer address
752  * into a given MSC. Then, enable tracing and set msc::enabled.
753  * The latter is serialized on msc::buf_mutex, so make sure to hold it.
754  */
755 static int msc_configure(struct msc *msc)
756 {
757 	u32 reg;
758 
759 	lockdep_assert_held(&msc->buf_mutex);
760 
761 	if (msc->mode > MSC_MODE_MULTI)
762 		return -ENOTSUPP;
763 
764 	if (msc->mode == MSC_MODE_MULTI) {
765 		if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
766 			return -EBUSY;
767 
768 		msc_buffer_clear_hw_header(msc);
769 	}
770 
771 	reg = msc->base_addr >> PAGE_SHIFT;
772 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);
773 
774 	if (msc->mode == MSC_MODE_SINGLE) {
775 		reg = msc->nr_pages;
776 		iowrite32(reg, msc->reg_base + REG_MSU_MSC0SIZE);
777 	}
778 
779 	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
780 	reg &= ~(MSC_MODE | MSC_WRAPEN | MSC_EN | MSC_RD_HDR_OVRD);
781 
782 	reg |= MSC_EN;
783 	reg |= msc->mode << __ffs(MSC_MODE);
784 	reg |= msc->burst_len << __ffs(MSC_LEN);
785 
786 	if (msc->wrap)
787 		reg |= MSC_WRAPEN;
788 
789 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
790 
791 	intel_th_msu_init(msc);
792 
793 	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
794 	intel_th_trace_enable(msc->thdev);
795 	msc->enabled = 1;
796 
797 	if (msc->mbuf && msc->mbuf->activate)
798 		msc->mbuf->activate(msc->mbuf_priv);
799 
800 	return 0;
801 }
802 
803 /**
804  * msc_disable() - disable MSC hardware
805  * @msc:	MSC device to disable
806  *
807  * If @msc is enabled, disable tracing on the switch and then disable MSC
808  * storage. Caller must hold msc::buf_mutex.
809  */
810 static void msc_disable(struct msc *msc)
811 {
812 	struct msc_window *win = msc->cur_win;
813 	u32 reg;
814 
815 	lockdep_assert_held(&msc->buf_mutex);
816 
817 	if (msc->mode == MSC_MODE_MULTI)
818 		msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
819 
820 	if (msc->mbuf && msc->mbuf->deactivate)
821 		msc->mbuf->deactivate(msc->mbuf_priv);
822 	intel_th_msu_deinit(msc);
823 	intel_th_trace_disable(msc->thdev);
824 
825 	if (msc->mode == MSC_MODE_SINGLE) {
826 		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
827 		msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT);
828 
829 		reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP);
830 		msc->single_sz = reg & ((msc->nr_pages << PAGE_SHIFT) - 1);
831 		dev_dbg(msc_dev(msc), "MSCnMWP: %08x/%08lx, wrap: %d\n",
832 			reg, msc->single_sz, msc->single_wrap);
833 	}
834 
835 	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
836 	reg &= ~MSC_EN;
837 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
838 
839 	if (msc->mbuf && msc->mbuf->ready)
840 		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
841 				 msc_win_total_sz(win));
842 
843 	msc->enabled = 0;
844 
845 	iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR);
846 	iowrite32(0, msc->reg_base + REG_MSU_MSC0SIZE);
847 
848 	dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n",
849 		ioread32(msc->reg_base + REG_MSU_MSC0NWSA));
850 
851 	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
852 	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
853 
854 	reg = ioread32(msc->reg_base + REG_MSU_MSUSTS);
855 	reg &= msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
856 	iowrite32(reg, msc->reg_base + REG_MSU_MSUSTS);
857 }
858 
859 static int intel_th_msc_activate(struct intel_th_device *thdev)
860 {
861 	struct msc *msc = dev_get_drvdata(&thdev->dev);
862 	int ret = -EBUSY;
863 
864 	if (!atomic_inc_unless_negative(&msc->user_count))
865 		return -ENODEV;
866 
867 	mutex_lock(&msc->buf_mutex);
868 
869 	/* if there are readers, refuse */
870 	if (list_empty(&msc->iter_list))
871 		ret = msc_configure(msc);
872 
873 	mutex_unlock(&msc->buf_mutex);
874 
875 	if (ret)
876 		atomic_dec(&msc->user_count);
877 
878 	return ret;
879 }
880 
881 static void intel_th_msc_deactivate(struct intel_th_device *thdev)
882 {
883 	struct msc *msc = dev_get_drvdata(&thdev->dev);
884 
885 	mutex_lock(&msc->buf_mutex);
886 	if (msc->enabled) {
887 		msc_disable(msc);
888 		atomic_dec(&msc->user_count);
889 	}
890 	mutex_unlock(&msc->buf_mutex);
891 }
892 
893 /**
894  * msc_buffer_contig_alloc() - allocate a contiguous buffer for SINGLE mode
895  * @msc:	MSC device
896  * @size:	allocation size in bytes
897  *
898  * This modifies msc::base, which requires msc::buf_mutex to serialize, so the
899  * caller is expected to hold it.
900  *
901  * Return:	0 on success, -errno otherwise.
902  */
903 static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size)
904 {
905 	unsigned long nr_pages = size >> PAGE_SHIFT;
906 	unsigned int order = get_order(size);
907 	struct page *page;
908 	int ret;
909 
910 	if (!size)
911 		return 0;
912 
913 	ret = sg_alloc_table(&msc->single_sgt, 1, GFP_KERNEL);
914 	if (ret)
915 		goto err_out;
916 
917 	ret = -ENOMEM;
918 	page = alloc_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, order);
919 	if (!page)
920 		goto err_free_sgt;
921 
922 	split_page(page, order);
923 	sg_set_buf(msc->single_sgt.sgl, page_address(page), size);
924 
925 	ret = dma_map_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl, 1,
926 			 DMA_FROM_DEVICE);
927 	if (ret < 0)
928 		goto err_free_pages;
929 
930 	msc->nr_pages = nr_pages;
931 	msc->base = page_address(page);
932 	msc->base_addr = sg_dma_address(msc->single_sgt.sgl);
933 
934 	return 0;
935 
936 err_free_pages:
937 	__free_pages(page, order);
938 
939 err_free_sgt:
940 	sg_free_table(&msc->single_sgt);
941 
942 err_out:
943 	return ret;
944 }
945 
946 /**
947  * msc_buffer_contig_free() - free a contiguous buffer
948  * @msc:	MSC configured in SINGLE mode
949  */
950 static void msc_buffer_contig_free(struct msc *msc)
951 {
952 	unsigned long off;
953 
954 	dma_unmap_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl,
955 		     1, DMA_FROM_DEVICE);
956 	sg_free_table(&msc->single_sgt);
957 
958 	for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) {
959 		struct page *page = virt_to_page(msc->base + off);
960 
961 		page->mapping = NULL;
962 		__free_page(page);
963 	}
964 
965 	msc->nr_pages = 0;
966 }
967 
968 /**
969  * msc_buffer_contig_get_page() - find a page at a given offset
970  * @msc:	MSC configured in SINGLE mode
971  * @pgoff:	page offset
972  *
973  * Return:	page, if @pgoff is within the range, NULL otherwise.
974  */
975 static struct page *msc_buffer_contig_get_page(struct msc *msc,
976 					       unsigned long pgoff)
977 {
978 	if (pgoff >= msc->nr_pages)
979 		return NULL;
980 
981 	return virt_to_page(msc->base + (pgoff << PAGE_SHIFT));
982 }
983 
984 static int __msc_buffer_win_alloc(struct msc_window *win,
985 				  unsigned int nr_segs)
986 {
987 	struct scatterlist *sg_ptr;
988 	void *block;
989 	int i, ret;
990 
991 	ret = sg_alloc_table(win->sgt, nr_segs, GFP_KERNEL);
992 	if (ret)
993 		return -ENOMEM;
994 
995 	for_each_sg(win->sgt->sgl, sg_ptr, nr_segs, i) {
996 		block = dma_alloc_coherent(msc_dev(win->msc)->parent->parent,
997 					  PAGE_SIZE, &sg_dma_address(sg_ptr),
998 					  GFP_KERNEL);
999 		if (!block)
1000 			goto err_nomem;
1001 
1002 		sg_set_buf(sg_ptr, block, PAGE_SIZE);
1003 	}
1004 
1005 	return nr_segs;
1006 
1007 err_nomem:
1008 	for (i--; i >= 0; i--)
1009 		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
1010 				  msc_win_block(win, i),
1011 				  msc_win_baddr(win, i));
1012 
1013 	sg_free_table(win->sgt);
1014 
1015 	return -ENOMEM;
1016 }
1017 
1018 #ifdef CONFIG_X86
1019 static void msc_buffer_set_uc(struct msc_window *win, unsigned int nr_segs)
1020 {
1021 	int i;
1022 
1023 	for (i = 0; i < nr_segs; i++)
1024 		/* Set the page as uncached */
1025 		set_memory_uc((unsigned long)msc_win_block(win, i), 1);
1026 }
1027 
1028 static void msc_buffer_set_wb(struct msc_window *win)
1029 {
1030 	int i;
1031 
1032 	for (i = 0; i < win->nr_segs; i++)
1033 		/* Reset the page to write-back */
1034 		set_memory_wb((unsigned long)msc_win_block(win, i), 1);
1035 }
1036 #else /* !X86 */
1037 static inline void
1038 msc_buffer_set_uc(struct msc_window *win, unsigned int nr_segs) {}
1039 static inline void msc_buffer_set_wb(struct msc_window *win) {}
1040 #endif /* CONFIG_X86 */
1041 
1042 /**
1043  * msc_buffer_win_alloc() - alloc a window for a multiblock mode
1044  * @msc:	MSC device
1045  * @nr_blocks:	number of pages in this window
1046  *
1047  * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
1048  * to serialize, so the caller is expected to hold it.
1049  *
1050  * Return:	0 on success, -errno otherwise.
1051  */
1052 static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
1053 {
1054 	struct msc_window *win;
1055 	int ret = -ENOMEM;
1056 
1057 	if (!nr_blocks)
1058 		return 0;
1059 
1060 	/*
1061 	 * This limitation hold as long as we need random access to the
1062 	 * block. When that changes, this can go away.
1063 	 */
1064 	if (nr_blocks > SG_MAX_SINGLE_ALLOC)
1065 		return -EINVAL;
1066 
1067 	win = kzalloc(sizeof(*win), GFP_KERNEL);
1068 	if (!win)
1069 		return -ENOMEM;
1070 
1071 	win->msc = msc;
1072 	win->sgt = &win->_sgt;
1073 	win->lockout = WIN_READY;
1074 	spin_lock_init(&win->lo_lock);
1075 
1076 	if (!list_empty(&msc->win_list)) {
1077 		struct msc_window *prev = list_last_entry(&msc->win_list,
1078 							  struct msc_window,
1079 							  entry);
1080 
1081 		win->pgoff = prev->pgoff + prev->nr_blocks;
1082 	}
1083 
1084 	if (msc->mbuf && msc->mbuf->alloc_window)
1085 		ret = msc->mbuf->alloc_window(msc->mbuf_priv, &win->sgt,
1086 					      nr_blocks << PAGE_SHIFT);
1087 	else
1088 		ret = __msc_buffer_win_alloc(win, nr_blocks);
1089 
1090 	if (ret <= 0)
1091 		goto err_nomem;
1092 
1093 	msc_buffer_set_uc(win, ret);
1094 
1095 	win->nr_segs = ret;
1096 	win->nr_blocks = nr_blocks;
1097 
1098 	if (list_empty(&msc->win_list)) {
1099 		msc->base = msc_win_block(win, 0);
1100 		msc->base_addr = msc_win_baddr(win, 0);
1101 		msc->cur_win = win;
1102 	}
1103 
1104 	list_add_tail(&win->entry, &msc->win_list);
1105 	msc->nr_pages += nr_blocks;
1106 
1107 	return 0;
1108 
1109 err_nomem:
1110 	kfree(win);
1111 
1112 	return ret;
1113 }
1114 
1115 static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win)
1116 {
1117 	int i;
1118 
1119 	for (i = 0; i < win->nr_segs; i++) {
1120 		struct page *page = sg_page(&win->sgt->sgl[i]);
1121 
1122 		page->mapping = NULL;
1123 		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
1124 				  msc_win_block(win, i), msc_win_baddr(win, i));
1125 	}
1126 	sg_free_table(win->sgt);
1127 }
1128 
1129 /**
1130  * msc_buffer_win_free() - free a window from MSC's window list
1131  * @msc:	MSC device
1132  * @win:	window to free
1133  *
1134  * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
1135  * to serialize, so the caller is expected to hold it.
1136  */
1137 static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
1138 {
1139 	msc->nr_pages -= win->nr_blocks;
1140 
1141 	list_del(&win->entry);
1142 	if (list_empty(&msc->win_list)) {
1143 		msc->base = NULL;
1144 		msc->base_addr = 0;
1145 	}
1146 
1147 	msc_buffer_set_wb(win);
1148 
1149 	if (msc->mbuf && msc->mbuf->free_window)
1150 		msc->mbuf->free_window(msc->mbuf_priv, win->sgt);
1151 	else
1152 		__msc_buffer_win_free(msc, win);
1153 
1154 	kfree(win);
1155 }
1156 
1157 /**
1158  * msc_buffer_relink() - set up block descriptors for multiblock mode
1159  * @msc:	MSC device
1160  *
1161  * This traverses msc::win_list, which requires msc::buf_mutex to serialize,
1162  * so the caller is expected to hold it.
1163  */
1164 static void msc_buffer_relink(struct msc *msc)
1165 {
1166 	struct msc_window *win, *next_win;
1167 
1168 	/* call with msc::mutex locked */
1169 	list_for_each_entry(win, &msc->win_list, entry) {
1170 		unsigned int blk;
1171 		u32 sw_tag = 0;
1172 
1173 		/*
1174 		 * Last window's next_win should point to the first window
1175 		 * and MSC_SW_TAG_LASTWIN should be set.
1176 		 */
1177 		if (msc_is_last_win(win)) {
1178 			sw_tag |= MSC_SW_TAG_LASTWIN;
1179 			next_win = list_first_entry(&msc->win_list,
1180 						    struct msc_window, entry);
1181 		} else {
1182 			next_win = list_next_entry(win, entry);
1183 		}
1184 
1185 		for (blk = 0; blk < win->nr_segs; blk++) {
1186 			struct msc_block_desc *bdesc = msc_win_block(win, blk);
1187 
1188 			memset(bdesc, 0, sizeof(*bdesc));
1189 
1190 			bdesc->next_win = msc_win_bpfn(next_win, 0);
1191 
1192 			/*
1193 			 * Similarly to last window, last block should point
1194 			 * to the first one.
1195 			 */
1196 			if (blk == win->nr_segs - 1) {
1197 				sw_tag |= MSC_SW_TAG_LASTBLK;
1198 				bdesc->next_blk = msc_win_bpfn(win, 0);
1199 			} else {
1200 				bdesc->next_blk = msc_win_bpfn(win, blk + 1);
1201 			}
1202 
1203 			bdesc->sw_tag = sw_tag;
1204 			bdesc->block_sz = msc_win_actual_bsz(win, blk) / 64;
1205 		}
1206 	}
1207 
1208 	/*
1209 	 * Make the above writes globally visible before tracing is
1210 	 * enabled to make sure hardware sees them coherently.
1211 	 */
1212 	wmb();
1213 }
1214 
1215 static void msc_buffer_multi_free(struct msc *msc)
1216 {
1217 	struct msc_window *win, *iter;
1218 
1219 	list_for_each_entry_safe(win, iter, &msc->win_list, entry)
1220 		msc_buffer_win_free(msc, win);
1221 }
1222 
1223 static int msc_buffer_multi_alloc(struct msc *msc, unsigned long *nr_pages,
1224 				  unsigned int nr_wins)
1225 {
1226 	int ret, i;
1227 
1228 	for (i = 0; i < nr_wins; i++) {
1229 		ret = msc_buffer_win_alloc(msc, nr_pages[i]);
1230 		if (ret) {
1231 			msc_buffer_multi_free(msc);
1232 			return ret;
1233 		}
1234 	}
1235 
1236 	msc_buffer_relink(msc);
1237 
1238 	return 0;
1239 }
1240 
1241 /**
1242  * msc_buffer_free() - free buffers for MSC
1243  * @msc:	MSC device
1244  *
1245  * Free MSC's storage buffers.
1246  *
1247  * This modifies msc::win_list and msc::base, which requires msc::buf_mutex to
1248  * serialize, so the caller is expected to hold it.
1249  */
1250 static void msc_buffer_free(struct msc *msc)
1251 {
1252 	if (msc->mode == MSC_MODE_SINGLE)
1253 		msc_buffer_contig_free(msc);
1254 	else if (msc->mode == MSC_MODE_MULTI)
1255 		msc_buffer_multi_free(msc);
1256 }
1257 
1258 /**
1259  * msc_buffer_alloc() - allocate a buffer for MSC
1260  * @msc:	MSC device
1261  * @size:	allocation size in bytes
1262  *
1263  * Allocate a storage buffer for MSC, depending on the msc::mode, it will be
1264  * either done via msc_buffer_contig_alloc() for SINGLE operation mode or
1265  * msc_buffer_win_alloc() for multiblock operation. The latter allocates one
1266  * window per invocation, so in multiblock mode this can be called multiple
1267  * times for the same MSC to allocate multiple windows.
1268  *
1269  * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
1270  * to serialize, so the caller is expected to hold it.
1271  *
1272  * Return:	0 on success, -errno otherwise.
1273  */
1274 static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
1275 			    unsigned int nr_wins)
1276 {
1277 	int ret;
1278 
1279 	/* -1: buffer not allocated */
1280 	if (atomic_read(&msc->user_count) != -1)
1281 		return -EBUSY;
1282 
1283 	if (msc->mode == MSC_MODE_SINGLE) {
1284 		if (nr_wins != 1)
1285 			return -EINVAL;
1286 
1287 		ret = msc_buffer_contig_alloc(msc, nr_pages[0] << PAGE_SHIFT);
1288 	} else if (msc->mode == MSC_MODE_MULTI) {
1289 		ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
1290 	} else {
1291 		ret = -ENOTSUPP;
1292 	}
1293 
1294 	if (!ret) {
1295 		/* allocation should be visible before the counter goes to 0 */
1296 		smp_mb__before_atomic();
1297 
1298 		if (WARN_ON_ONCE(atomic_cmpxchg(&msc->user_count, -1, 0) != -1))
1299 			return -EINVAL;
1300 	}
1301 
1302 	return ret;
1303 }
1304 
1305 /**
1306  * msc_buffer_unlocked_free_unless_used() - free a buffer unless it's in use
1307  * @msc:	MSC device
1308  *
1309  * This will free MSC buffer unless it is in use or there is no allocated
1310  * buffer.
1311  * Caller needs to hold msc::buf_mutex.
1312  *
1313  * Return:	0 on successful deallocation or if there was no buffer to
1314  *		deallocate, -EBUSY if there are active users.
1315  */
1316 static int msc_buffer_unlocked_free_unless_used(struct msc *msc)
1317 {
1318 	int count, ret = 0;
1319 
1320 	count = atomic_cmpxchg(&msc->user_count, 0, -1);
1321 
1322 	/* > 0: buffer is allocated and has users */
1323 	if (count > 0)
1324 		ret = -EBUSY;
1325 	/* 0: buffer is allocated, no users */
1326 	else if (!count)
1327 		msc_buffer_free(msc);
1328 	/* < 0: no buffer, nothing to do */
1329 
1330 	return ret;
1331 }
1332 
1333 /**
1334  * msc_buffer_free_unless_used() - free a buffer unless it's in use
1335  * @msc:	MSC device
1336  *
1337  * This is a locked version of msc_buffer_unlocked_free_unless_used().
1338  */
1339 static int msc_buffer_free_unless_used(struct msc *msc)
1340 {
1341 	int ret;
1342 
1343 	mutex_lock(&msc->buf_mutex);
1344 	ret = msc_buffer_unlocked_free_unless_used(msc);
1345 	mutex_unlock(&msc->buf_mutex);
1346 
1347 	return ret;
1348 }
1349 
1350 /**
1351  * msc_buffer_get_page() - get MSC buffer page at a given offset
1352  * @msc:	MSC device
1353  * @pgoff:	page offset into the storage buffer
1354  *
1355  * This traverses msc::win_list, so holding msc::buf_mutex is expected from
1356  * the caller.
1357  *
1358  * Return:	page if @pgoff corresponds to a valid buffer page or NULL.
1359  */
1360 static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff)
1361 {
1362 	struct msc_window *win;
1363 	unsigned int blk;
1364 
1365 	if (msc->mode == MSC_MODE_SINGLE)
1366 		return msc_buffer_contig_get_page(msc, pgoff);
1367 
1368 	list_for_each_entry(win, &msc->win_list, entry)
1369 		if (pgoff >= win->pgoff && pgoff < win->pgoff + win->nr_blocks)
1370 			goto found;
1371 
1372 	return NULL;
1373 
1374 found:
1375 	pgoff -= win->pgoff;
1376 
1377 	for (blk = 0; blk < win->nr_segs; blk++) {
1378 		struct page *page = sg_page(&win->sgt->sgl[blk]);
1379 		size_t pgsz = PFN_DOWN(msc_win_actual_bsz(win, blk));
1380 
1381 		if (pgoff < pgsz)
1382 			return page + pgoff;
1383 
1384 		pgoff -= pgsz;
1385 	}
1386 
1387 	return NULL;
1388 }
1389 
1390 /**
1391  * struct msc_win_to_user_struct - data for copy_to_user() callback
1392  * @buf:	userspace buffer to copy data to
1393  * @offset:	running offset
1394  */
1395 struct msc_win_to_user_struct {
1396 	char __user	*buf;
1397 	unsigned long	offset;
1398 };
1399 
1400 /**
1401  * msc_win_to_user() - iterator for msc_buffer_iterate() to copy data to user
1402  * @data:	callback's private data
1403  * @src:	source buffer
1404  * @len:	amount of data to copy from the source buffer
1405  */
1406 static unsigned long msc_win_to_user(void *data, void *src, size_t len)
1407 {
1408 	struct msc_win_to_user_struct *u = data;
1409 	unsigned long ret;
1410 
1411 	ret = copy_to_user(u->buf + u->offset, src, len);
1412 	u->offset += len - ret;
1413 
1414 	return ret;
1415 }
1416 
1417 
1418 /*
1419  * file operations' callbacks
1420  */
1421 
1422 static int intel_th_msc_open(struct inode *inode, struct file *file)
1423 {
1424 	struct intel_th_device *thdev = file->private_data;
1425 	struct msc *msc = dev_get_drvdata(&thdev->dev);
1426 	struct msc_iter *iter;
1427 
1428 	if (!capable(CAP_SYS_RAWIO))
1429 		return -EPERM;
1430 
1431 	iter = msc_iter_install(msc);
1432 	if (IS_ERR(iter))
1433 		return PTR_ERR(iter);
1434 
1435 	file->private_data = iter;
1436 
1437 	return nonseekable_open(inode, file);
1438 }
1439 
1440 static int intel_th_msc_release(struct inode *inode, struct file *file)
1441 {
1442 	struct msc_iter *iter = file->private_data;
1443 	struct msc *msc = iter->msc;
1444 
1445 	msc_iter_remove(iter, msc);
1446 
1447 	return 0;
1448 }
1449 
1450 static ssize_t
1451 msc_single_to_user(struct msc *msc, char __user *buf, loff_t off, size_t len)
1452 {
1453 	unsigned long size = msc->nr_pages << PAGE_SHIFT, rem = len;
1454 	unsigned long start = off, tocopy = 0;
1455 
1456 	if (msc->single_wrap) {
1457 		start += msc->single_sz;
1458 		if (start < size) {
1459 			tocopy = min(rem, size - start);
1460 			if (copy_to_user(buf, msc->base + start, tocopy))
1461 				return -EFAULT;
1462 
1463 			buf += tocopy;
1464 			rem -= tocopy;
1465 			start += tocopy;
1466 		}
1467 
1468 		start &= size - 1;
1469 		if (rem) {
1470 			tocopy = min(rem, msc->single_sz - start);
1471 			if (copy_to_user(buf, msc->base + start, tocopy))
1472 				return -EFAULT;
1473 
1474 			rem -= tocopy;
1475 		}
1476 
1477 		return len - rem;
1478 	}
1479 
1480 	if (copy_to_user(buf, msc->base + start, rem))
1481 		return -EFAULT;
1482 
1483 	return len;
1484 }
1485 
1486 static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
1487 				 size_t len, loff_t *ppos)
1488 {
1489 	struct msc_iter *iter = file->private_data;
1490 	struct msc *msc = iter->msc;
1491 	size_t size;
1492 	loff_t off = *ppos;
1493 	ssize_t ret = 0;
1494 
1495 	if (!atomic_inc_unless_negative(&msc->user_count))
1496 		return 0;
1497 
1498 	if (msc->mode == MSC_MODE_SINGLE && !msc->single_wrap)
1499 		size = msc->single_sz;
1500 	else
1501 		size = msc->nr_pages << PAGE_SHIFT;
1502 
1503 	if (!size)
1504 		goto put_count;
1505 
1506 	if (off >= size)
1507 		goto put_count;
1508 
1509 	if (off + len >= size)
1510 		len = size - off;
1511 
1512 	if (msc->mode == MSC_MODE_SINGLE) {
1513 		ret = msc_single_to_user(msc, buf, off, len);
1514 		if (ret >= 0)
1515 			*ppos += ret;
1516 	} else if (msc->mode == MSC_MODE_MULTI) {
1517 		struct msc_win_to_user_struct u = {
1518 			.buf	= buf,
1519 			.offset	= 0,
1520 		};
1521 
1522 		ret = msc_buffer_iterate(iter, len, &u, msc_win_to_user);
1523 		if (ret >= 0)
1524 			*ppos = iter->offset;
1525 	} else {
1526 		ret = -ENOTSUPP;
1527 	}
1528 
1529 put_count:
1530 	atomic_dec(&msc->user_count);
1531 
1532 	return ret;
1533 }
1534 
1535 /*
1536  * vm operations callbacks (vm_ops)
1537  */
1538 
1539 static void msc_mmap_open(struct vm_area_struct *vma)
1540 {
1541 	struct msc_iter *iter = vma->vm_file->private_data;
1542 	struct msc *msc = iter->msc;
1543 
1544 	atomic_inc(&msc->mmap_count);
1545 }
1546 
1547 static void msc_mmap_close(struct vm_area_struct *vma)
1548 {
1549 	struct msc_iter *iter = vma->vm_file->private_data;
1550 	struct msc *msc = iter->msc;
1551 	unsigned long pg;
1552 
1553 	if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex))
1554 		return;
1555 
1556 	/* drop page _refcounts */
1557 	for (pg = 0; pg < msc->nr_pages; pg++) {
1558 		struct page *page = msc_buffer_get_page(msc, pg);
1559 
1560 		if (WARN_ON_ONCE(!page))
1561 			continue;
1562 
1563 		if (page->mapping)
1564 			page->mapping = NULL;
1565 	}
1566 
1567 	/* last mapping -- drop user_count */
1568 	atomic_dec(&msc->user_count);
1569 	mutex_unlock(&msc->buf_mutex);
1570 }
1571 
1572 static vm_fault_t msc_mmap_fault(struct vm_fault *vmf)
1573 {
1574 	struct msc_iter *iter = vmf->vma->vm_file->private_data;
1575 	struct msc *msc = iter->msc;
1576 
1577 	vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
1578 	if (!vmf->page)
1579 		return VM_FAULT_SIGBUS;
1580 
1581 	get_page(vmf->page);
1582 	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
1583 	vmf->page->index = vmf->pgoff;
1584 
1585 	return 0;
1586 }
1587 
1588 static const struct vm_operations_struct msc_mmap_ops = {
1589 	.open	= msc_mmap_open,
1590 	.close	= msc_mmap_close,
1591 	.fault	= msc_mmap_fault,
1592 };
1593 
1594 static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma)
1595 {
1596 	unsigned long size = vma->vm_end - vma->vm_start;
1597 	struct msc_iter *iter = vma->vm_file->private_data;
1598 	struct msc *msc = iter->msc;
1599 	int ret = -EINVAL;
1600 
1601 	if (!size || offset_in_page(size))
1602 		return -EINVAL;
1603 
1604 	if (vma->vm_pgoff)
1605 		return -EINVAL;
1606 
1607 	/* grab user_count once per mmap; drop in msc_mmap_close() */
1608 	if (!atomic_inc_unless_negative(&msc->user_count))
1609 		return -EINVAL;
1610 
1611 	if (msc->mode != MSC_MODE_SINGLE &&
1612 	    msc->mode != MSC_MODE_MULTI)
1613 		goto out;
1614 
1615 	if (size >> PAGE_SHIFT != msc->nr_pages)
1616 		goto out;
1617 
1618 	atomic_set(&msc->mmap_count, 1);
1619 	ret = 0;
1620 
1621 out:
1622 	if (ret)
1623 		atomic_dec(&msc->user_count);
1624 
1625 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1626 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTCOPY;
1627 	vma->vm_ops = &msc_mmap_ops;
1628 	return ret;
1629 }
1630 
1631 static const struct file_operations intel_th_msc_fops = {
1632 	.open		= intel_th_msc_open,
1633 	.release	= intel_th_msc_release,
1634 	.read		= intel_th_msc_read,
1635 	.mmap		= intel_th_msc_mmap,
1636 	.llseek		= no_llseek,
1637 	.owner		= THIS_MODULE,
1638 };
1639 
1640 static void intel_th_msc_wait_empty(struct intel_th_device *thdev)
1641 {
1642 	struct msc *msc = dev_get_drvdata(&thdev->dev);
1643 	unsigned long count;
1644 	u32 reg;
1645 
1646 	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
1647 	     count && !(reg & MSCSTS_PLE); count--) {
1648 		reg = __raw_readl(msc->reg_base + REG_MSU_MSC0STS);
1649 		cpu_relax();
1650 	}
1651 
1652 	if (!count)
1653 		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");
1654 }
1655 
1656 static int intel_th_msc_init(struct msc *msc)
1657 {
1658 	atomic_set(&msc->user_count, -1);
1659 
1660 	msc->mode = MSC_MODE_MULTI;
1661 	mutex_init(&msc->buf_mutex);
1662 	INIT_LIST_HEAD(&msc->win_list);
1663 	INIT_LIST_HEAD(&msc->iter_list);
1664 
1665 	msc->burst_len =
1666 		(ioread32(msc->reg_base + REG_MSU_MSC0CTL) & MSC_LEN) >>
1667 		__ffs(MSC_LEN);
1668 
1669 	return 0;
1670 }
1671 
1672 static void msc_win_switch(struct msc *msc)
1673 {
1674 	struct msc_window *first;
1675 
1676 	first = list_first_entry(&msc->win_list, struct msc_window, entry);
1677 
1678 	if (msc_is_last_win(msc->cur_win))
1679 		msc->cur_win = first;
1680 	else
1681 		msc->cur_win = list_next_entry(msc->cur_win, entry);
1682 
1683 	msc->base = msc_win_block(msc->cur_win, 0);
1684 	msc->base_addr = msc_win_baddr(msc->cur_win, 0);
1685 
1686 	intel_th_trace_switch(msc->thdev);
1687 }
1688 
1689 /**
1690  * intel_th_msc_window_unlock - put the window back in rotation
1691  * @dev:	MSC device to which this relates
1692  * @sgt:	buffer's sg_table for the window, does nothing if NULL
1693  */
1694 void intel_th_msc_window_unlock(struct device *dev, struct sg_table *sgt)
1695 {
1696 	struct msc *msc = dev_get_drvdata(dev);
1697 	struct msc_window *win;
1698 
1699 	if (!sgt)
1700 		return;
1701 
1702 	win = msc_find_window(msc, sgt, false);
1703 	if (!win)
1704 		return;
1705 
1706 	msc_win_set_lockout(win, WIN_LOCKED, WIN_READY);
1707 }
1708 EXPORT_SYMBOL_GPL(intel_th_msc_window_unlock);
1709 
1710 static void msc_work(struct work_struct *work)
1711 {
1712 	struct msc *msc = container_of(work, struct msc, work);
1713 
1714 	intel_th_msc_deactivate(msc->thdev);
1715 }
1716 
1717 static irqreturn_t intel_th_msc_interrupt(struct intel_th_device *thdev)
1718 {
1719 	struct msc *msc = dev_get_drvdata(&thdev->dev);
1720 	u32 msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
1721 	u32 mask = msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
1722 	struct msc_window *win, *next_win;
1723 
1724 	if (!msc->do_irq || !msc->mbuf)
1725 		return IRQ_NONE;
1726 
1727 	msusts &= mask;
1728 
1729 	if (!msusts)
1730 		return msc->enabled ? IRQ_HANDLED : IRQ_NONE;
1731 
1732 	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
1733 
1734 	if (!msc->enabled)
1735 		return IRQ_NONE;
1736 
1737 	/* grab the window before we do the switch */
1738 	win = msc->cur_win;
1739 	if (!win)
1740 		return IRQ_HANDLED;
1741 	next_win = msc_next_window(win);
1742 	if (!next_win)
1743 		return IRQ_HANDLED;
1744 
1745 	/* next window: if READY, proceed, if LOCKED, stop the trace */
1746 	if (msc_win_set_lockout(next_win, WIN_READY, WIN_INUSE)) {
1747 		schedule_work(&msc->work);
1748 		return IRQ_HANDLED;
1749 	}
1750 
1751 	/* current window: INUSE -> LOCKED */
1752 	msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
1753 
1754 	msc_win_switch(msc);
1755 
1756 	if (msc->mbuf && msc->mbuf->ready)
1757 		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
1758 				 msc_win_total_sz(win));
1759 
1760 	return IRQ_HANDLED;
1761 }
1762 
1763 static const char * const msc_mode[] = {
1764 	[MSC_MODE_SINGLE]	= "single",
1765 	[MSC_MODE_MULTI]	= "multi",
1766 	[MSC_MODE_EXI]		= "ExI",
1767 	[MSC_MODE_DEBUG]	= "debug",
1768 };
1769 
1770 static ssize_t
1771 wrap_show(struct device *dev, struct device_attribute *attr, char *buf)
1772 {
1773 	struct msc *msc = dev_get_drvdata(dev);
1774 
1775 	return scnprintf(buf, PAGE_SIZE, "%d\n", msc->wrap);
1776 }
1777 
1778 static ssize_t
1779 wrap_store(struct device *dev, struct device_attribute *attr, const char *buf,
1780 	   size_t size)
1781 {
1782 	struct msc *msc = dev_get_drvdata(dev);
1783 	unsigned long val;
1784 	int ret;
1785 
1786 	ret = kstrtoul(buf, 10, &val);
1787 	if (ret)
1788 		return ret;
1789 
1790 	msc->wrap = !!val;
1791 
1792 	return size;
1793 }
1794 
1795 static DEVICE_ATTR_RW(wrap);
1796 
1797 static void msc_buffer_unassign(struct msc *msc)
1798 {
1799 	lockdep_assert_held(&msc->buf_mutex);
1800 
1801 	if (!msc->mbuf)
1802 		return;
1803 
1804 	msc->mbuf->unassign(msc->mbuf_priv);
1805 	msu_buffer_put(msc->mbuf);
1806 	msc->mbuf_priv = NULL;
1807 	msc->mbuf = NULL;
1808 }
1809 
1810 static ssize_t
1811 mode_show(struct device *dev, struct device_attribute *attr, char *buf)
1812 {
1813 	struct msc *msc = dev_get_drvdata(dev);
1814 	const char *mode = msc_mode[msc->mode];
1815 	ssize_t ret;
1816 
1817 	mutex_lock(&msc->buf_mutex);
1818 	if (msc->mbuf)
1819 		mode = msc->mbuf->name;
1820 	ret = scnprintf(buf, PAGE_SIZE, "%s\n", mode);
1821 	mutex_unlock(&msc->buf_mutex);
1822 
1823 	return ret;
1824 }
1825 
1826 static ssize_t
1827 mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
1828 	   size_t size)
1829 {
1830 	const struct msu_buffer *mbuf = NULL;
1831 	struct msc *msc = dev_get_drvdata(dev);
1832 	size_t len = size;
1833 	char *cp, *mode;
1834 	int i, ret;
1835 
1836 	if (!capable(CAP_SYS_RAWIO))
1837 		return -EPERM;
1838 
1839 	cp = memchr(buf, '\n', len);
1840 	if (cp)
1841 		len = cp - buf;
1842 
1843 	mode = kstrndup(buf, len, GFP_KERNEL);
1844 	i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
1845 	if (i >= 0)
1846 		goto found;
1847 
1848 	/* Buffer sinks only work with a usable IRQ */
1849 	if (!msc->do_irq) {
1850 		kfree(mode);
1851 		return -EINVAL;
1852 	}
1853 
1854 	mbuf = msu_buffer_get(mode);
1855 	kfree(mode);
1856 	if (mbuf)
1857 		goto found;
1858 
1859 	return -EINVAL;
1860 
1861 found:
1862 	mutex_lock(&msc->buf_mutex);
1863 	ret = 0;
1864 
1865 	/* Same buffer: do nothing */
1866 	if (mbuf && mbuf == msc->mbuf) {
1867 		/* put the extra reference we just got */
1868 		msu_buffer_put(mbuf);
1869 		goto unlock;
1870 	}
1871 
1872 	ret = msc_buffer_unlocked_free_unless_used(msc);
1873 	if (ret)
1874 		goto unlock;
1875 
1876 	if (mbuf) {
1877 		void *mbuf_priv = mbuf->assign(dev, &i);
1878 
1879 		if (!mbuf_priv) {
1880 			ret = -ENOMEM;
1881 			goto unlock;
1882 		}
1883 
1884 		msc_buffer_unassign(msc);
1885 		msc->mbuf_priv = mbuf_priv;
1886 		msc->mbuf = mbuf;
1887 	} else {
1888 		msc_buffer_unassign(msc);
1889 	}
1890 
1891 	msc->mode = i;
1892 
1893 unlock:
1894 	if (ret && mbuf)
1895 		msu_buffer_put(mbuf);
1896 	mutex_unlock(&msc->buf_mutex);
1897 
1898 	return ret ? ret : size;
1899 }
1900 
1901 static DEVICE_ATTR_RW(mode);
1902 
1903 static ssize_t
1904 nr_pages_show(struct device *dev, struct device_attribute *attr, char *buf)
1905 {
1906 	struct msc *msc = dev_get_drvdata(dev);
1907 	struct msc_window *win;
1908 	size_t count = 0;
1909 
1910 	mutex_lock(&msc->buf_mutex);
1911 
1912 	if (msc->mode == MSC_MODE_SINGLE)
1913 		count = scnprintf(buf, PAGE_SIZE, "%ld\n", msc->nr_pages);
1914 	else if (msc->mode == MSC_MODE_MULTI) {
1915 		list_for_each_entry(win, &msc->win_list, entry) {
1916 			count += scnprintf(buf + count, PAGE_SIZE - count,
1917 					   "%d%c", win->nr_blocks,
1918 					   msc_is_last_win(win) ? '\n' : ',');
1919 		}
1920 	} else {
1921 		count = scnprintf(buf, PAGE_SIZE, "unsupported\n");
1922 	}
1923 
1924 	mutex_unlock(&msc->buf_mutex);
1925 
1926 	return count;
1927 }
1928 
1929 static ssize_t
1930 nr_pages_store(struct device *dev, struct device_attribute *attr,
1931 	       const char *buf, size_t size)
1932 {
1933 	struct msc *msc = dev_get_drvdata(dev);
1934 	unsigned long val, *win = NULL, *rewin;
1935 	size_t len = size;
1936 	const char *p = buf;
1937 	char *end, *s;
1938 	int ret, nr_wins = 0;
1939 
1940 	if (!capable(CAP_SYS_RAWIO))
1941 		return -EPERM;
1942 
1943 	ret = msc_buffer_free_unless_used(msc);
1944 	if (ret)
1945 		return ret;
1946 
1947 	/* scan the comma-separated list of allocation sizes */
1948 	end = memchr(buf, '\n', len);
1949 	if (end)
1950 		len = end - buf;
1951 
1952 	do {
1953 		end = memchr(p, ',', len);
1954 		s = kstrndup(p, end ? end - p : len, GFP_KERNEL);
1955 		if (!s) {
1956 			ret = -ENOMEM;
1957 			goto free_win;
1958 		}
1959 
1960 		ret = kstrtoul(s, 10, &val);
1961 		kfree(s);
1962 
1963 		if (ret || !val)
1964 			goto free_win;
1965 
1966 		if (nr_wins && msc->mode == MSC_MODE_SINGLE) {
1967 			ret = -EINVAL;
1968 			goto free_win;
1969 		}
1970 
1971 		nr_wins++;
1972 		rewin = krealloc(win, sizeof(*win) * nr_wins, GFP_KERNEL);
1973 		if (!rewin) {
1974 			kfree(win);
1975 			return -ENOMEM;
1976 		}
1977 
1978 		win = rewin;
1979 		win[nr_wins - 1] = val;
1980 
1981 		if (!end)
1982 			break;
1983 
1984 		/* consume the number and the following comma, hence +1 */
1985 		len -= end - p + 1;
1986 		p = end + 1;
1987 	} while (len);
1988 
1989 	mutex_lock(&msc->buf_mutex);
1990 	ret = msc_buffer_alloc(msc, win, nr_wins);
1991 	mutex_unlock(&msc->buf_mutex);
1992 
1993 free_win:
1994 	kfree(win);
1995 
1996 	return ret ? ret : size;
1997 }
1998 
1999 static DEVICE_ATTR_RW(nr_pages);
2000 
2001 static ssize_t
2002 win_switch_store(struct device *dev, struct device_attribute *attr,
2003 		 const char *buf, size_t size)
2004 {
2005 	struct msc *msc = dev_get_drvdata(dev);
2006 	unsigned long val;
2007 	int ret;
2008 
2009 	ret = kstrtoul(buf, 10, &val);
2010 	if (ret)
2011 		return ret;
2012 
2013 	if (val != 1)
2014 		return -EINVAL;
2015 
2016 	mutex_lock(&msc->buf_mutex);
2017 	/*
2018 	 * Window switch can only happen in the "multi" mode.
2019 	 * If a external buffer is engaged, they have the full
2020 	 * control over window switching.
2021 	 */
2022 	if (msc->mode != MSC_MODE_MULTI || msc->mbuf)
2023 		ret = -ENOTSUPP;
2024 	else
2025 		msc_win_switch(msc);
2026 	mutex_unlock(&msc->buf_mutex);
2027 
2028 	return ret ? ret : size;
2029 }
2030 
2031 static DEVICE_ATTR_WO(win_switch);
2032 
2033 static struct attribute *msc_output_attrs[] = {
2034 	&dev_attr_wrap.attr,
2035 	&dev_attr_mode.attr,
2036 	&dev_attr_nr_pages.attr,
2037 	&dev_attr_win_switch.attr,
2038 	NULL,
2039 };
2040 
2041 static struct attribute_group msc_output_group = {
2042 	.attrs	= msc_output_attrs,
2043 };
2044 
2045 static int intel_th_msc_probe(struct intel_th_device *thdev)
2046 {
2047 	struct device *dev = &thdev->dev;
2048 	struct resource *res;
2049 	struct msc *msc;
2050 	void __iomem *base;
2051 	int err;
2052 
2053 	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
2054 	if (!res)
2055 		return -ENODEV;
2056 
2057 	base = devm_ioremap(dev, res->start, resource_size(res));
2058 	if (!base)
2059 		return -ENOMEM;
2060 
2061 	msc = devm_kzalloc(dev, sizeof(*msc), GFP_KERNEL);
2062 	if (!msc)
2063 		return -ENOMEM;
2064 
2065 	res = intel_th_device_get_resource(thdev, IORESOURCE_IRQ, 1);
2066 	if (!res)
2067 		msc->do_irq = 1;
2068 
2069 	msc->index = thdev->id;
2070 
2071 	msc->thdev = thdev;
2072 	msc->reg_base = base + msc->index * 0x100;
2073 	msc->msu_base = base;
2074 
2075 	INIT_WORK(&msc->work, msc_work);
2076 	err = intel_th_msc_init(msc);
2077 	if (err)
2078 		return err;
2079 
2080 	dev_set_drvdata(dev, msc);
2081 
2082 	return 0;
2083 }
2084 
2085 static void intel_th_msc_remove(struct intel_th_device *thdev)
2086 {
2087 	struct msc *msc = dev_get_drvdata(&thdev->dev);
2088 	int ret;
2089 
2090 	intel_th_msc_deactivate(thdev);
2091 
2092 	/*
2093 	 * Buffers should not be used at this point except if the
2094 	 * output character device is still open and the parent
2095 	 * device gets detached from its bus, which is a FIXME.
2096 	 */
2097 	ret = msc_buffer_free_unless_used(msc);
2098 	WARN_ON_ONCE(ret);
2099 }
2100 
2101 static struct intel_th_driver intel_th_msc_driver = {
2102 	.probe	= intel_th_msc_probe,
2103 	.remove	= intel_th_msc_remove,
2104 	.irq		= intel_th_msc_interrupt,
2105 	.wait_empty	= intel_th_msc_wait_empty,
2106 	.activate	= intel_th_msc_activate,
2107 	.deactivate	= intel_th_msc_deactivate,
2108 	.fops	= &intel_th_msc_fops,
2109 	.attr_group	= &msc_output_group,
2110 	.driver	= {
2111 		.name	= "msc",
2112 		.owner	= THIS_MODULE,
2113 	},
2114 };
2115 
2116 module_driver(intel_th_msc_driver,
2117 	      intel_th_driver_register,
2118 	      intel_th_driver_unregister);
2119 
2120 MODULE_LICENSE("GPL v2");
2121 MODULE_DESCRIPTION("Intel(R) Trace Hub Memory Storage Unit driver");
2122 MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
2123