xref: /openbmc/linux/drivers/s390/cio/cmf.c (revision d0807da7)
1 /*
2  * Linux on zSeries Channel Measurement Facility support
3  *
4  * Copyright IBM Corp. 2000, 2006
5  *
6  * Authors: Arnd Bergmann <arndb@de.ibm.com>
7  *	    Cornelia Huck <cornelia.huck@de.ibm.com>
8  *
9  * original idea from Natarajan Krishnaswami <nkrishna@us.ibm.com>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2, or (at your option)
14  * any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25 
26 #define KMSG_COMPONENT "cio"
27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28 
29 #include <linux/bootmem.h>
30 #include <linux/device.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/export.h>
34 #include <linux/moduleparam.h>
35 #include <linux/slab.h>
36 #include <linux/timex.h>	/* get_tod_clock() */
37 
38 #include <asm/ccwdev.h>
39 #include <asm/cio.h>
40 #include <asm/cmb.h>
41 #include <asm/div64.h>
42 
43 #include "cio.h"
44 #include "css.h"
45 #include "device.h"
46 #include "ioasm.h"
47 #include "chsc.h"
48 
49 /*
50  * parameter to enable cmf during boot, possible uses are:
51  *  "s390cmf" -- enable cmf and allocate 2 MB of ram so measuring can be
52  *               used on any subchannel
53  *  "s390cmf=<num>" -- enable cmf and allocate enough memory to measure
54  *                     <num> subchannel, where <num> is an integer
55  *                     between 1 and 65535, default is 1024
56  */
57 #define ARGSTRING "s390cmf"
58 
59 /* indices for READCMB */
60 enum cmb_index {
61 	avg_utilization = -1,
62  /* basic and exended format: */
63 	cmb_ssch_rsch_count = 0,
64 	cmb_sample_count,
65 	cmb_device_connect_time,
66 	cmb_function_pending_time,
67 	cmb_device_disconnect_time,
68 	cmb_control_unit_queuing_time,
69 	cmb_device_active_only_time,
70  /* extended format only: */
71 	cmb_device_busy_time,
72 	cmb_initial_command_response_time,
73 };
74 
75 /**
76  * enum cmb_format - types of supported measurement block formats
77  *
78  * @CMF_BASIC:      traditional channel measurement blocks supported
79  *		    by all machines that we run on
80  * @CMF_EXTENDED:   improved format that was introduced with the z990
81  *		    machine
82  * @CMF_AUTODETECT: default: use extended format when running on a machine
83  *		    supporting extended format, otherwise fall back to
84  *		    basic format
85  */
86 enum cmb_format {
87 	CMF_BASIC,
88 	CMF_EXTENDED,
89 	CMF_AUTODETECT = -1,
90 };
91 
92 /*
93  * format - actual format for all measurement blocks
94  *
95  * The format module parameter can be set to a value of 0 (zero)
96  * or 1, indicating basic or extended format as described for
97  * enum cmb_format.
98  */
99 static int format = CMF_AUTODETECT;
100 module_param(format, bint, 0444);
101 
102 /**
103  * struct cmb_operations - functions to use depending on cmb_format
104  *
105  * Most of these functions operate on a struct ccw_device. There is only
106  * one instance of struct cmb_operations because the format of the measurement
107  * data is guaranteed to be the same for every ccw_device.
108  *
109  * @alloc:	allocate memory for a channel measurement block,
110  *		either with the help of a special pool or with kmalloc
111  * @free:	free memory allocated with @alloc
112  * @set:	enable or disable measurement
113  * @read:	read a measurement entry at an index
114  * @readall:	read a measurement block in a common format
115  * @reset:	clear the data in the associated measurement block and
116  *		reset its time stamp
117  */
118 struct cmb_operations {
119 	int  (*alloc)  (struct ccw_device *);
120 	void (*free)   (struct ccw_device *);
121 	int  (*set)    (struct ccw_device *, u32);
122 	u64  (*read)   (struct ccw_device *, int);
123 	int  (*readall)(struct ccw_device *, struct cmbdata *);
124 	void (*reset)  (struct ccw_device *);
125 /* private: */
126 	struct attribute_group *attr_group;
127 };
128 static struct cmb_operations *cmbops;
129 
130 struct cmb_data {
131 	void *hw_block;   /* Pointer to block updated by hardware */
132 	void *last_block; /* Last changed block copied from hardware block */
133 	int size;	  /* Size of hw_block and last_block */
134 	unsigned long long last_update;  /* when last_block was updated */
135 };
136 
137 /*
138  * Our user interface is designed in terms of nanoseconds,
139  * while the hardware measures total times in its own
140  * unit.
141  */
142 static inline u64 time_to_nsec(u32 value)
143 {
144 	return ((u64)value) * 128000ull;
145 }
146 
147 /*
148  * Users are usually interested in average times,
149  * not accumulated time.
150  * This also helps us with atomicity problems
151  * when reading sinlge values.
152  */
153 static inline u64 time_to_avg_nsec(u32 value, u32 count)
154 {
155 	u64 ret;
156 
157 	/* no samples yet, avoid division by 0 */
158 	if (count == 0)
159 		return 0;
160 
161 	/* value comes in units of 128 µsec */
162 	ret = time_to_nsec(value);
163 	do_div(ret, count);
164 
165 	return ret;
166 }
167 
168 #define CMF_OFF 0
169 #define CMF_ON	2
170 
171 /*
172  * Activate or deactivate the channel monitor. When area is NULL,
173  * the monitor is deactivated. The channel monitor needs to
174  * be active in order to measure subchannels, which also need
175  * to be enabled.
176  */
177 static inline void cmf_activate(void *area, unsigned int onoff)
178 {
179 	register void * __gpr2 asm("2");
180 	register long __gpr1 asm("1");
181 
182 	__gpr2 = area;
183 	__gpr1 = onoff;
184 	/* activate channel measurement */
185 	asm("schm" : : "d" (__gpr2), "d" (__gpr1) );
186 }
187 
188 static int set_schib(struct ccw_device *cdev, u32 mme, int mbfc,
189 		     unsigned long address)
190 {
191 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
192 	int ret;
193 
194 	sch->config.mme = mme;
195 	sch->config.mbfc = mbfc;
196 	/* address can be either a block address or a block index */
197 	if (mbfc)
198 		sch->config.mba = address;
199 	else
200 		sch->config.mbi = address;
201 
202 	ret = cio_commit_config(sch);
203 	if (!mme && ret == -ENODEV) {
204 		/*
205 		 * The task was to disable measurement block updates but
206 		 * the subchannel is already gone. Report success.
207 		 */
208 		ret = 0;
209 	}
210 	return ret;
211 }
212 
213 struct set_schib_struct {
214 	u32 mme;
215 	int mbfc;
216 	unsigned long address;
217 	wait_queue_head_t wait;
218 	int ret;
219 };
220 
221 #define CMF_PENDING 1
222 #define SET_SCHIB_TIMEOUT (10 * HZ)
223 
224 static int set_schib_wait(struct ccw_device *cdev, u32 mme,
225 			  int mbfc, unsigned long address)
226 {
227 	struct set_schib_struct set_data;
228 	int ret = -ENODEV;
229 
230 	spin_lock_irq(cdev->ccwlock);
231 	if (!cdev->private->cmb)
232 		goto out;
233 
234 	ret = set_schib(cdev, mme, mbfc, address);
235 	if (ret != -EBUSY)
236 		goto out;
237 
238 	/* if the device is not online, don't even try again */
239 	if (cdev->private->state != DEV_STATE_ONLINE)
240 		goto out;
241 
242 	init_waitqueue_head(&set_data.wait);
243 	set_data.mme = mme;
244 	set_data.mbfc = mbfc;
245 	set_data.address = address;
246 	set_data.ret = CMF_PENDING;
247 
248 	cdev->private->state = DEV_STATE_CMFCHANGE;
249 	cdev->private->cmb_wait = &set_data;
250 	spin_unlock_irq(cdev->ccwlock);
251 
252 	ret = wait_event_interruptible_timeout(set_data.wait,
253 					       set_data.ret != CMF_PENDING,
254 					       SET_SCHIB_TIMEOUT);
255 	spin_lock_irq(cdev->ccwlock);
256 	if (ret <= 0) {
257 		if (set_data.ret == CMF_PENDING) {
258 			set_data.ret = (ret == 0) ? -ETIME : ret;
259 			if (cdev->private->state == DEV_STATE_CMFCHANGE)
260 				cdev->private->state = DEV_STATE_ONLINE;
261 		}
262 	}
263 	cdev->private->cmb_wait = NULL;
264 	ret = set_data.ret;
265 out:
266 	spin_unlock_irq(cdev->ccwlock);
267 	return ret;
268 }
269 
270 void retry_set_schib(struct ccw_device *cdev)
271 {
272 	struct set_schib_struct *set_data = cdev->private->cmb_wait;
273 
274 	if (!set_data)
275 		return;
276 
277 	set_data->ret = set_schib(cdev, set_data->mme, set_data->mbfc,
278 				  set_data->address);
279 	wake_up(&set_data->wait);
280 }
281 
282 static int cmf_copy_block(struct ccw_device *cdev)
283 {
284 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
285 	struct cmb_data *cmb_data;
286 	void *hw_block;
287 
288 	if (cio_update_schib(sch))
289 		return -ENODEV;
290 
291 	if (scsw_fctl(&sch->schib.scsw) & SCSW_FCTL_START_FUNC) {
292 		/* Don't copy if a start function is in progress. */
293 		if ((!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_SUSPENDED)) &&
294 		    (scsw_actl(&sch->schib.scsw) &
295 		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)) &&
296 		    (!(scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_SEC_STATUS)))
297 			return -EBUSY;
298 	}
299 	cmb_data = cdev->private->cmb;
300 	hw_block = cmb_data->hw_block;
301 	memcpy(cmb_data->last_block, hw_block, cmb_data->size);
302 	cmb_data->last_update = get_tod_clock();
303 	return 0;
304 }
305 
306 struct copy_block_struct {
307 	wait_queue_head_t wait;
308 	int ret;
309 };
310 
311 static int cmf_cmb_copy_wait(struct ccw_device *cdev)
312 {
313 	struct copy_block_struct copy_block;
314 	int ret = -ENODEV;
315 
316 	spin_lock_irq(cdev->ccwlock);
317 	if (!cdev->private->cmb)
318 		goto out;
319 
320 	ret = cmf_copy_block(cdev);
321 	if (ret != -EBUSY)
322 		goto out;
323 
324 	if (cdev->private->state != DEV_STATE_ONLINE)
325 		goto out;
326 
327 	init_waitqueue_head(&copy_block.wait);
328 	copy_block.ret = CMF_PENDING;
329 
330 	cdev->private->state = DEV_STATE_CMFUPDATE;
331 	cdev->private->cmb_wait = &copy_block;
332 	spin_unlock_irq(cdev->ccwlock);
333 
334 	ret = wait_event_interruptible(copy_block.wait,
335 				       copy_block.ret != CMF_PENDING);
336 	spin_lock_irq(cdev->ccwlock);
337 	if (ret) {
338 		if (copy_block.ret == CMF_PENDING) {
339 			copy_block.ret = -ERESTARTSYS;
340 			if (cdev->private->state == DEV_STATE_CMFUPDATE)
341 				cdev->private->state = DEV_STATE_ONLINE;
342 		}
343 	}
344 	cdev->private->cmb_wait = NULL;
345 	ret = copy_block.ret;
346 out:
347 	spin_unlock_irq(cdev->ccwlock);
348 	return ret;
349 }
350 
351 void cmf_retry_copy_block(struct ccw_device *cdev)
352 {
353 	struct copy_block_struct *copy_block = cdev->private->cmb_wait;
354 
355 	if (!copy_block)
356 		return;
357 
358 	copy_block->ret = cmf_copy_block(cdev);
359 	wake_up(&copy_block->wait);
360 }
361 
362 static void cmf_generic_reset(struct ccw_device *cdev)
363 {
364 	struct cmb_data *cmb_data;
365 
366 	spin_lock_irq(cdev->ccwlock);
367 	cmb_data = cdev->private->cmb;
368 	if (cmb_data) {
369 		memset(cmb_data->last_block, 0, cmb_data->size);
370 		/*
371 		 * Need to reset hw block as well to make the hardware start
372 		 * from 0 again.
373 		 */
374 		memset(cmb_data->hw_block, 0, cmb_data->size);
375 		cmb_data->last_update = 0;
376 	}
377 	cdev->private->cmb_start_time = get_tod_clock();
378 	spin_unlock_irq(cdev->ccwlock);
379 }
380 
381 /**
382  * struct cmb_area - container for global cmb data
383  *
384  * @mem:	pointer to CMBs (only in basic measurement mode)
385  * @list:	contains a linked list of all subchannels
386  * @num_channels: number of channels to be measured
387  * @lock:	protect concurrent access to @mem and @list
388  */
389 struct cmb_area {
390 	struct cmb *mem;
391 	struct list_head list;
392 	int num_channels;
393 	spinlock_t lock;
394 };
395 
396 static struct cmb_area cmb_area = {
397 	.lock = __SPIN_LOCK_UNLOCKED(cmb_area.lock),
398 	.list = LIST_HEAD_INIT(cmb_area.list),
399 	.num_channels  = 1024,
400 };
401 
402 /* ****** old style CMB handling ********/
403 
404 /*
405  * Basic channel measurement blocks are allocated in one contiguous
406  * block of memory, which can not be moved as long as any channel
407  * is active. Therefore, a maximum number of subchannels needs to
408  * be defined somewhere. This is a module parameter, defaulting to
409  * a reasonable value of 1024, or 32 kb of memory.
410  * Current kernels don't allow kmalloc with more than 128kb, so the
411  * maximum is 4096.
412  */
413 
414 module_param_named(maxchannels, cmb_area.num_channels, uint, 0444);
415 
416 /**
417  * struct cmb - basic channel measurement block
418  * @ssch_rsch_count: number of ssch and rsch
419  * @sample_count: number of samples
420  * @device_connect_time: time of device connect
421  * @function_pending_time: time of function pending
422  * @device_disconnect_time: time of device disconnect
423  * @control_unit_queuing_time: time of control unit queuing
424  * @device_active_only_time: time of device active only
425  * @reserved: unused in basic measurement mode
426  *
427  * The measurement block as used by the hardware. The fields are described
428  * further in z/Architecture Principles of Operation, chapter 17.
429  *
430  * The cmb area made up from these blocks must be a contiguous array and may
431  * not be reallocated or freed.
432  * Only one cmb area can be present in the system.
433  */
434 struct cmb {
435 	u16 ssch_rsch_count;
436 	u16 sample_count;
437 	u32 device_connect_time;
438 	u32 function_pending_time;
439 	u32 device_disconnect_time;
440 	u32 control_unit_queuing_time;
441 	u32 device_active_only_time;
442 	u32 reserved[2];
443 };
444 
445 /*
446  * Insert a single device into the cmb_area list.
447  * Called with cmb_area.lock held from alloc_cmb.
448  */
449 static int alloc_cmb_single(struct ccw_device *cdev,
450 			    struct cmb_data *cmb_data)
451 {
452 	struct cmb *cmb;
453 	struct ccw_device_private *node;
454 	int ret;
455 
456 	spin_lock_irq(cdev->ccwlock);
457 	if (!list_empty(&cdev->private->cmb_list)) {
458 		ret = -EBUSY;
459 		goto out;
460 	}
461 
462 	/*
463 	 * Find first unused cmb in cmb_area.mem.
464 	 * This is a little tricky: cmb_area.list
465 	 * remains sorted by ->cmb->hw_data pointers.
466 	 */
467 	cmb = cmb_area.mem;
468 	list_for_each_entry(node, &cmb_area.list, cmb_list) {
469 		struct cmb_data *data;
470 		data = node->cmb;
471 		if ((struct cmb*)data->hw_block > cmb)
472 			break;
473 		cmb++;
474 	}
475 	if (cmb - cmb_area.mem >= cmb_area.num_channels) {
476 		ret = -ENOMEM;
477 		goto out;
478 	}
479 
480 	/* insert new cmb */
481 	list_add_tail(&cdev->private->cmb_list, &node->cmb_list);
482 	cmb_data->hw_block = cmb;
483 	cdev->private->cmb = cmb_data;
484 	ret = 0;
485 out:
486 	spin_unlock_irq(cdev->ccwlock);
487 	return ret;
488 }
489 
490 static int alloc_cmb(struct ccw_device *cdev)
491 {
492 	int ret;
493 	struct cmb *mem;
494 	ssize_t size;
495 	struct cmb_data *cmb_data;
496 
497 	/* Allocate private cmb_data. */
498 	cmb_data = kzalloc(sizeof(struct cmb_data), GFP_KERNEL);
499 	if (!cmb_data)
500 		return -ENOMEM;
501 
502 	cmb_data->last_block = kzalloc(sizeof(struct cmb), GFP_KERNEL);
503 	if (!cmb_data->last_block) {
504 		kfree(cmb_data);
505 		return -ENOMEM;
506 	}
507 	cmb_data->size = sizeof(struct cmb);
508 	spin_lock(&cmb_area.lock);
509 
510 	if (!cmb_area.mem) {
511 		/* there is no user yet, so we need a new area */
512 		size = sizeof(struct cmb) * cmb_area.num_channels;
513 		WARN_ON(!list_empty(&cmb_area.list));
514 
515 		spin_unlock(&cmb_area.lock);
516 		mem = (void*)__get_free_pages(GFP_KERNEL | GFP_DMA,
517 				 get_order(size));
518 		spin_lock(&cmb_area.lock);
519 
520 		if (cmb_area.mem) {
521 			/* ok, another thread was faster */
522 			free_pages((unsigned long)mem, get_order(size));
523 		} else if (!mem) {
524 			/* no luck */
525 			ret = -ENOMEM;
526 			goto out;
527 		} else {
528 			/* everything ok */
529 			memset(mem, 0, size);
530 			cmb_area.mem = mem;
531 			cmf_activate(cmb_area.mem, CMF_ON);
532 		}
533 	}
534 
535 	/* do the actual allocation */
536 	ret = alloc_cmb_single(cdev, cmb_data);
537 out:
538 	spin_unlock(&cmb_area.lock);
539 	if (ret) {
540 		kfree(cmb_data->last_block);
541 		kfree(cmb_data);
542 	}
543 	return ret;
544 }
545 
546 static void free_cmb(struct ccw_device *cdev)
547 {
548 	struct ccw_device_private *priv;
549 	struct cmb_data *cmb_data;
550 
551 	spin_lock(&cmb_area.lock);
552 	spin_lock_irq(cdev->ccwlock);
553 
554 	priv = cdev->private;
555 	cmb_data = priv->cmb;
556 	priv->cmb = NULL;
557 	if (cmb_data)
558 		kfree(cmb_data->last_block);
559 	kfree(cmb_data);
560 	list_del_init(&priv->cmb_list);
561 
562 	if (list_empty(&cmb_area.list)) {
563 		ssize_t size;
564 		size = sizeof(struct cmb) * cmb_area.num_channels;
565 		cmf_activate(NULL, CMF_OFF);
566 		free_pages((unsigned long)cmb_area.mem, get_order(size));
567 		cmb_area.mem = NULL;
568 	}
569 	spin_unlock_irq(cdev->ccwlock);
570 	spin_unlock(&cmb_area.lock);
571 }
572 
573 static int set_cmb(struct ccw_device *cdev, u32 mme)
574 {
575 	u16 offset;
576 	struct cmb_data *cmb_data;
577 	unsigned long flags;
578 
579 	spin_lock_irqsave(cdev->ccwlock, flags);
580 	if (!cdev->private->cmb) {
581 		spin_unlock_irqrestore(cdev->ccwlock, flags);
582 		return -EINVAL;
583 	}
584 	cmb_data = cdev->private->cmb;
585 	offset = mme ? (struct cmb *)cmb_data->hw_block - cmb_area.mem : 0;
586 	spin_unlock_irqrestore(cdev->ccwlock, flags);
587 
588 	return set_schib_wait(cdev, mme, 0, offset);
589 }
590 
591 /* calculate utilization in 0.1 percent units */
592 static u64 __cmb_utilization(u64 device_connect_time, u64 function_pending_time,
593 			     u64 device_disconnect_time, u64 start_time)
594 {
595 	u64 utilization, elapsed_time;
596 
597 	utilization = time_to_nsec(device_connect_time +
598 				   function_pending_time +
599 				   device_disconnect_time);
600 
601 	elapsed_time = get_tod_clock() - start_time;
602 	elapsed_time = tod_to_ns(elapsed_time);
603 	elapsed_time /= 1000;
604 
605 	return elapsed_time ? (utilization / elapsed_time) : 0;
606 }
607 
608 static u64 read_cmb(struct ccw_device *cdev, int index)
609 {
610 	struct cmb_data *cmb_data;
611 	unsigned long flags;
612 	struct cmb *cmb;
613 	u64 ret = 0;
614 	u32 val;
615 
616 	spin_lock_irqsave(cdev->ccwlock, flags);
617 	cmb_data = cdev->private->cmb;
618 	if (!cmb_data)
619 		goto out;
620 
621 	cmb = cmb_data->hw_block;
622 	switch (index) {
623 	case avg_utilization:
624 		ret = __cmb_utilization(cmb->device_connect_time,
625 					cmb->function_pending_time,
626 					cmb->device_disconnect_time,
627 					cdev->private->cmb_start_time);
628 		goto out;
629 	case cmb_ssch_rsch_count:
630 		ret = cmb->ssch_rsch_count;
631 		goto out;
632 	case cmb_sample_count:
633 		ret = cmb->sample_count;
634 		goto out;
635 	case cmb_device_connect_time:
636 		val = cmb->device_connect_time;
637 		break;
638 	case cmb_function_pending_time:
639 		val = cmb->function_pending_time;
640 		break;
641 	case cmb_device_disconnect_time:
642 		val = cmb->device_disconnect_time;
643 		break;
644 	case cmb_control_unit_queuing_time:
645 		val = cmb->control_unit_queuing_time;
646 		break;
647 	case cmb_device_active_only_time:
648 		val = cmb->device_active_only_time;
649 		break;
650 	default:
651 		goto out;
652 	}
653 	ret = time_to_avg_nsec(val, cmb->sample_count);
654 out:
655 	spin_unlock_irqrestore(cdev->ccwlock, flags);
656 	return ret;
657 }
658 
659 static int readall_cmb(struct ccw_device *cdev, struct cmbdata *data)
660 {
661 	struct cmb *cmb;
662 	struct cmb_data *cmb_data;
663 	u64 time;
664 	unsigned long flags;
665 	int ret;
666 
667 	ret = cmf_cmb_copy_wait(cdev);
668 	if (ret < 0)
669 		return ret;
670 	spin_lock_irqsave(cdev->ccwlock, flags);
671 	cmb_data = cdev->private->cmb;
672 	if (!cmb_data) {
673 		ret = -ENODEV;
674 		goto out;
675 	}
676 	if (cmb_data->last_update == 0) {
677 		ret = -EAGAIN;
678 		goto out;
679 	}
680 	cmb = cmb_data->last_block;
681 	time = cmb_data->last_update - cdev->private->cmb_start_time;
682 
683 	memset(data, 0, sizeof(struct cmbdata));
684 
685 	/* we only know values before device_busy_time */
686 	data->size = offsetof(struct cmbdata, device_busy_time);
687 
688 	data->elapsed_time = tod_to_ns(time);
689 
690 	/* copy data to new structure */
691 	data->ssch_rsch_count = cmb->ssch_rsch_count;
692 	data->sample_count = cmb->sample_count;
693 
694 	/* time fields are converted to nanoseconds while copying */
695 	data->device_connect_time = time_to_nsec(cmb->device_connect_time);
696 	data->function_pending_time = time_to_nsec(cmb->function_pending_time);
697 	data->device_disconnect_time =
698 		time_to_nsec(cmb->device_disconnect_time);
699 	data->control_unit_queuing_time
700 		= time_to_nsec(cmb->control_unit_queuing_time);
701 	data->device_active_only_time
702 		= time_to_nsec(cmb->device_active_only_time);
703 	ret = 0;
704 out:
705 	spin_unlock_irqrestore(cdev->ccwlock, flags);
706 	return ret;
707 }
708 
709 static void reset_cmb(struct ccw_device *cdev)
710 {
711 	cmf_generic_reset(cdev);
712 }
713 
714 static int cmf_enabled(struct ccw_device *cdev)
715 {
716 	int enabled;
717 
718 	spin_lock_irq(cdev->ccwlock);
719 	enabled = !!cdev->private->cmb;
720 	spin_unlock_irq(cdev->ccwlock);
721 
722 	return enabled;
723 }
724 
725 static struct attribute_group cmf_attr_group;
726 
727 static struct cmb_operations cmbops_basic = {
728 	.alloc	= alloc_cmb,
729 	.free	= free_cmb,
730 	.set	= set_cmb,
731 	.read	= read_cmb,
732 	.readall    = readall_cmb,
733 	.reset	    = reset_cmb,
734 	.attr_group = &cmf_attr_group,
735 };
736 
737 /* ******** extended cmb handling ********/
738 
739 /**
740  * struct cmbe - extended channel measurement block
741  * @ssch_rsch_count: number of ssch and rsch
742  * @sample_count: number of samples
743  * @device_connect_time: time of device connect
744  * @function_pending_time: time of function pending
745  * @device_disconnect_time: time of device disconnect
746  * @control_unit_queuing_time: time of control unit queuing
747  * @device_active_only_time: time of device active only
748  * @device_busy_time: time of device busy
749  * @initial_command_response_time: initial command response time
750  * @reserved: unused
751  *
752  * The measurement block as used by the hardware. May be in any 64 bit physical
753  * location.
754  * The fields are described further in z/Architecture Principles of Operation,
755  * third edition, chapter 17.
756  */
757 struct cmbe {
758 	u32 ssch_rsch_count;
759 	u32 sample_count;
760 	u32 device_connect_time;
761 	u32 function_pending_time;
762 	u32 device_disconnect_time;
763 	u32 control_unit_queuing_time;
764 	u32 device_active_only_time;
765 	u32 device_busy_time;
766 	u32 initial_command_response_time;
767 	u32 reserved[7];
768 } __packed __aligned(64);
769 
770 static struct kmem_cache *cmbe_cache;
771 
772 static int alloc_cmbe(struct ccw_device *cdev)
773 {
774 	struct cmb_data *cmb_data;
775 	struct cmbe *cmbe;
776 	int ret = -ENOMEM;
777 
778 	cmbe = kmem_cache_zalloc(cmbe_cache, GFP_KERNEL);
779 	if (!cmbe)
780 		return ret;
781 
782 	cmb_data = kzalloc(sizeof(*cmb_data), GFP_KERNEL);
783 	if (!cmb_data)
784 		goto out_free;
785 
786 	cmb_data->last_block = kzalloc(sizeof(struct cmbe), GFP_KERNEL);
787 	if (!cmb_data->last_block)
788 		goto out_free;
789 
790 	cmb_data->size = sizeof(*cmbe);
791 	cmb_data->hw_block = cmbe;
792 
793 	spin_lock(&cmb_area.lock);
794 	spin_lock_irq(cdev->ccwlock);
795 	if (cdev->private->cmb)
796 		goto out_unlock;
797 
798 	cdev->private->cmb = cmb_data;
799 
800 	/* activate global measurement if this is the first channel */
801 	if (list_empty(&cmb_area.list))
802 		cmf_activate(NULL, CMF_ON);
803 	list_add_tail(&cdev->private->cmb_list, &cmb_area.list);
804 
805 	spin_unlock_irq(cdev->ccwlock);
806 	spin_unlock(&cmb_area.lock);
807 	return 0;
808 
809 out_unlock:
810 	spin_unlock_irq(cdev->ccwlock);
811 	spin_unlock(&cmb_area.lock);
812 	ret = -EBUSY;
813 out_free:
814 	if (cmb_data)
815 		kfree(cmb_data->last_block);
816 	kfree(cmb_data);
817 	kmem_cache_free(cmbe_cache, cmbe);
818 
819 	return ret;
820 }
821 
822 static void free_cmbe(struct ccw_device *cdev)
823 {
824 	struct cmb_data *cmb_data;
825 
826 	spin_lock(&cmb_area.lock);
827 	spin_lock_irq(cdev->ccwlock);
828 	cmb_data = cdev->private->cmb;
829 	cdev->private->cmb = NULL;
830 	if (cmb_data) {
831 		kfree(cmb_data->last_block);
832 		kmem_cache_free(cmbe_cache, cmb_data->hw_block);
833 	}
834 	kfree(cmb_data);
835 
836 	/* deactivate global measurement if this is the last channel */
837 	list_del_init(&cdev->private->cmb_list);
838 	if (list_empty(&cmb_area.list))
839 		cmf_activate(NULL, CMF_OFF);
840 	spin_unlock_irq(cdev->ccwlock);
841 	spin_unlock(&cmb_area.lock);
842 }
843 
844 static int set_cmbe(struct ccw_device *cdev, u32 mme)
845 {
846 	unsigned long mba;
847 	struct cmb_data *cmb_data;
848 	unsigned long flags;
849 
850 	spin_lock_irqsave(cdev->ccwlock, flags);
851 	if (!cdev->private->cmb) {
852 		spin_unlock_irqrestore(cdev->ccwlock, flags);
853 		return -EINVAL;
854 	}
855 	cmb_data = cdev->private->cmb;
856 	mba = mme ? (unsigned long) cmb_data->hw_block : 0;
857 	spin_unlock_irqrestore(cdev->ccwlock, flags);
858 
859 	return set_schib_wait(cdev, mme, 1, mba);
860 }
861 
862 static u64 read_cmbe(struct ccw_device *cdev, int index)
863 {
864 	struct cmb_data *cmb_data;
865 	unsigned long flags;
866 	struct cmbe *cmb;
867 	u64 ret = 0;
868 	u32 val;
869 
870 	spin_lock_irqsave(cdev->ccwlock, flags);
871 	cmb_data = cdev->private->cmb;
872 	if (!cmb_data)
873 		goto out;
874 
875 	cmb = cmb_data->hw_block;
876 	switch (index) {
877 	case avg_utilization:
878 		ret = __cmb_utilization(cmb->device_connect_time,
879 					cmb->function_pending_time,
880 					cmb->device_disconnect_time,
881 					cdev->private->cmb_start_time);
882 		goto out;
883 	case cmb_ssch_rsch_count:
884 		ret = cmb->ssch_rsch_count;
885 		goto out;
886 	case cmb_sample_count:
887 		ret = cmb->sample_count;
888 		goto out;
889 	case cmb_device_connect_time:
890 		val = cmb->device_connect_time;
891 		break;
892 	case cmb_function_pending_time:
893 		val = cmb->function_pending_time;
894 		break;
895 	case cmb_device_disconnect_time:
896 		val = cmb->device_disconnect_time;
897 		break;
898 	case cmb_control_unit_queuing_time:
899 		val = cmb->control_unit_queuing_time;
900 		break;
901 	case cmb_device_active_only_time:
902 		val = cmb->device_active_only_time;
903 		break;
904 	case cmb_device_busy_time:
905 		val = cmb->device_busy_time;
906 		break;
907 	case cmb_initial_command_response_time:
908 		val = cmb->initial_command_response_time;
909 		break;
910 	default:
911 		goto out;
912 	}
913 	ret = time_to_avg_nsec(val, cmb->sample_count);
914 out:
915 	spin_unlock_irqrestore(cdev->ccwlock, flags);
916 	return ret;
917 }
918 
919 static int readall_cmbe(struct ccw_device *cdev, struct cmbdata *data)
920 {
921 	struct cmbe *cmb;
922 	struct cmb_data *cmb_data;
923 	u64 time;
924 	unsigned long flags;
925 	int ret;
926 
927 	ret = cmf_cmb_copy_wait(cdev);
928 	if (ret < 0)
929 		return ret;
930 	spin_lock_irqsave(cdev->ccwlock, flags);
931 	cmb_data = cdev->private->cmb;
932 	if (!cmb_data) {
933 		ret = -ENODEV;
934 		goto out;
935 	}
936 	if (cmb_data->last_update == 0) {
937 		ret = -EAGAIN;
938 		goto out;
939 	}
940 	time = cmb_data->last_update - cdev->private->cmb_start_time;
941 
942 	memset (data, 0, sizeof(struct cmbdata));
943 
944 	/* we only know values before device_busy_time */
945 	data->size = offsetof(struct cmbdata, device_busy_time);
946 
947 	data->elapsed_time = tod_to_ns(time);
948 
949 	cmb = cmb_data->last_block;
950 	/* copy data to new structure */
951 	data->ssch_rsch_count = cmb->ssch_rsch_count;
952 	data->sample_count = cmb->sample_count;
953 
954 	/* time fields are converted to nanoseconds while copying */
955 	data->device_connect_time = time_to_nsec(cmb->device_connect_time);
956 	data->function_pending_time = time_to_nsec(cmb->function_pending_time);
957 	data->device_disconnect_time =
958 		time_to_nsec(cmb->device_disconnect_time);
959 	data->control_unit_queuing_time
960 		= time_to_nsec(cmb->control_unit_queuing_time);
961 	data->device_active_only_time
962 		= time_to_nsec(cmb->device_active_only_time);
963 	data->device_busy_time = time_to_nsec(cmb->device_busy_time);
964 	data->initial_command_response_time
965 		= time_to_nsec(cmb->initial_command_response_time);
966 
967 	ret = 0;
968 out:
969 	spin_unlock_irqrestore(cdev->ccwlock, flags);
970 	return ret;
971 }
972 
973 static void reset_cmbe(struct ccw_device *cdev)
974 {
975 	cmf_generic_reset(cdev);
976 }
977 
978 static struct attribute_group cmf_attr_group_ext;
979 
980 static struct cmb_operations cmbops_extended = {
981 	.alloc	    = alloc_cmbe,
982 	.free	    = free_cmbe,
983 	.set	    = set_cmbe,
984 	.read	    = read_cmbe,
985 	.readall    = readall_cmbe,
986 	.reset	    = reset_cmbe,
987 	.attr_group = &cmf_attr_group_ext,
988 };
989 
990 static ssize_t cmb_show_attr(struct device *dev, char *buf, enum cmb_index idx)
991 {
992 	return sprintf(buf, "%lld\n",
993 		(unsigned long long) cmf_read(to_ccwdev(dev), idx));
994 }
995 
996 static ssize_t cmb_show_avg_sample_interval(struct device *dev,
997 					    struct device_attribute *attr,
998 					    char *buf)
999 {
1000 	struct ccw_device *cdev = to_ccwdev(dev);
1001 	unsigned long count;
1002 	long interval;
1003 
1004 	count = cmf_read(cdev, cmb_sample_count);
1005 	spin_lock_irq(cdev->ccwlock);
1006 	if (count) {
1007 		interval = get_tod_clock() - cdev->private->cmb_start_time;
1008 		interval = tod_to_ns(interval);
1009 		interval /= count;
1010 	} else
1011 		interval = -1;
1012 	spin_unlock_irq(cdev->ccwlock);
1013 	return sprintf(buf, "%ld\n", interval);
1014 }
1015 
1016 static ssize_t cmb_show_avg_utilization(struct device *dev,
1017 					struct device_attribute *attr,
1018 					char *buf)
1019 {
1020 	unsigned long u = cmf_read(to_ccwdev(dev), avg_utilization);
1021 
1022 	return sprintf(buf, "%02lu.%01lu%%\n", u / 10, u % 10);
1023 }
1024 
1025 #define cmf_attr(name) \
1026 static ssize_t show_##name(struct device *dev, \
1027 			   struct device_attribute *attr, char *buf)	\
1028 { return cmb_show_attr((dev), buf, cmb_##name); } \
1029 static DEVICE_ATTR(name, 0444, show_##name, NULL);
1030 
1031 #define cmf_attr_avg(name) \
1032 static ssize_t show_avg_##name(struct device *dev, \
1033 			       struct device_attribute *attr, char *buf) \
1034 { return cmb_show_attr((dev), buf, cmb_##name); } \
1035 static DEVICE_ATTR(avg_##name, 0444, show_avg_##name, NULL);
1036 
1037 cmf_attr(ssch_rsch_count);
1038 cmf_attr(sample_count);
1039 cmf_attr_avg(device_connect_time);
1040 cmf_attr_avg(function_pending_time);
1041 cmf_attr_avg(device_disconnect_time);
1042 cmf_attr_avg(control_unit_queuing_time);
1043 cmf_attr_avg(device_active_only_time);
1044 cmf_attr_avg(device_busy_time);
1045 cmf_attr_avg(initial_command_response_time);
1046 
1047 static DEVICE_ATTR(avg_sample_interval, 0444, cmb_show_avg_sample_interval,
1048 		   NULL);
1049 static DEVICE_ATTR(avg_utilization, 0444, cmb_show_avg_utilization, NULL);
1050 
1051 static struct attribute *cmf_attributes[] = {
1052 	&dev_attr_avg_sample_interval.attr,
1053 	&dev_attr_avg_utilization.attr,
1054 	&dev_attr_ssch_rsch_count.attr,
1055 	&dev_attr_sample_count.attr,
1056 	&dev_attr_avg_device_connect_time.attr,
1057 	&dev_attr_avg_function_pending_time.attr,
1058 	&dev_attr_avg_device_disconnect_time.attr,
1059 	&dev_attr_avg_control_unit_queuing_time.attr,
1060 	&dev_attr_avg_device_active_only_time.attr,
1061 	NULL,
1062 };
1063 
1064 static struct attribute_group cmf_attr_group = {
1065 	.name  = "cmf",
1066 	.attrs = cmf_attributes,
1067 };
1068 
1069 static struct attribute *cmf_attributes_ext[] = {
1070 	&dev_attr_avg_sample_interval.attr,
1071 	&dev_attr_avg_utilization.attr,
1072 	&dev_attr_ssch_rsch_count.attr,
1073 	&dev_attr_sample_count.attr,
1074 	&dev_attr_avg_device_connect_time.attr,
1075 	&dev_attr_avg_function_pending_time.attr,
1076 	&dev_attr_avg_device_disconnect_time.attr,
1077 	&dev_attr_avg_control_unit_queuing_time.attr,
1078 	&dev_attr_avg_device_active_only_time.attr,
1079 	&dev_attr_avg_device_busy_time.attr,
1080 	&dev_attr_avg_initial_command_response_time.attr,
1081 	NULL,
1082 };
1083 
1084 static struct attribute_group cmf_attr_group_ext = {
1085 	.name  = "cmf",
1086 	.attrs = cmf_attributes_ext,
1087 };
1088 
1089 static ssize_t cmb_enable_show(struct device *dev,
1090 			       struct device_attribute *attr,
1091 			       char *buf)
1092 {
1093 	struct ccw_device *cdev = to_ccwdev(dev);
1094 
1095 	return sprintf(buf, "%d\n", cmf_enabled(cdev));
1096 }
1097 
1098 static ssize_t cmb_enable_store(struct device *dev,
1099 				struct device_attribute *attr, const char *buf,
1100 				size_t c)
1101 {
1102 	struct ccw_device *cdev = to_ccwdev(dev);
1103 	unsigned long val;
1104 	int ret;
1105 
1106 	ret = kstrtoul(buf, 16, &val);
1107 	if (ret)
1108 		return ret;
1109 
1110 	switch (val) {
1111 	case 0:
1112 		ret = disable_cmf(cdev);
1113 		break;
1114 	case 1:
1115 		ret = enable_cmf(cdev);
1116 		break;
1117 	default:
1118 		ret = -EINVAL;
1119 	}
1120 
1121 	return ret ? ret : c;
1122 }
1123 DEVICE_ATTR_RW(cmb_enable);
1124 
1125 int ccw_set_cmf(struct ccw_device *cdev, int enable)
1126 {
1127 	return cmbops->set(cdev, enable ? 2 : 0);
1128 }
1129 
1130 /**
1131  * enable_cmf() - switch on the channel measurement for a specific device
1132  *  @cdev:	The ccw device to be enabled
1133  *
1134  *  Returns %0 for success or a negative error value.
1135  *  Note: If this is called on a device for which channel measurement is already
1136  *	  enabled a reset of the measurement data is triggered.
1137  *  Context:
1138  *    non-atomic
1139  */
1140 int enable_cmf(struct ccw_device *cdev)
1141 {
1142 	int ret = 0;
1143 
1144 	device_lock(&cdev->dev);
1145 	if (cmf_enabled(cdev)) {
1146 		cmbops->reset(cdev);
1147 		goto out_unlock;
1148 	}
1149 	get_device(&cdev->dev);
1150 	ret = cmbops->alloc(cdev);
1151 	if (ret)
1152 		goto out;
1153 	cmbops->reset(cdev);
1154 	ret = sysfs_create_group(&cdev->dev.kobj, cmbops->attr_group);
1155 	if (ret) {
1156 		cmbops->free(cdev);
1157 		goto out;
1158 	}
1159 	ret = cmbops->set(cdev, 2);
1160 	if (ret) {
1161 		sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
1162 		cmbops->free(cdev);
1163 	}
1164 out:
1165 	if (ret)
1166 		put_device(&cdev->dev);
1167 out_unlock:
1168 	device_unlock(&cdev->dev);
1169 	return ret;
1170 }
1171 
1172 /**
1173  * __disable_cmf() - switch off the channel measurement for a specific device
1174  *  @cdev:	The ccw device to be disabled
1175  *
1176  *  Returns %0 for success or a negative error value.
1177  *
1178  *  Context:
1179  *    non-atomic, device_lock() held.
1180  */
1181 int __disable_cmf(struct ccw_device *cdev)
1182 {
1183 	int ret;
1184 
1185 	ret = cmbops->set(cdev, 0);
1186 	if (ret)
1187 		return ret;
1188 
1189 	sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
1190 	cmbops->free(cdev);
1191 	put_device(&cdev->dev);
1192 
1193 	return ret;
1194 }
1195 
1196 /**
1197  * disable_cmf() - switch off the channel measurement for a specific device
1198  *  @cdev:	The ccw device to be disabled
1199  *
1200  *  Returns %0 for success or a negative error value.
1201  *
1202  *  Context:
1203  *    non-atomic
1204  */
1205 int disable_cmf(struct ccw_device *cdev)
1206 {
1207 	int ret;
1208 
1209 	device_lock(&cdev->dev);
1210 	ret = __disable_cmf(cdev);
1211 	device_unlock(&cdev->dev);
1212 
1213 	return ret;
1214 }
1215 
1216 /**
1217  * cmf_read() - read one value from the current channel measurement block
1218  * @cdev:	the channel to be read
1219  * @index:	the index of the value to be read
1220  *
1221  * Returns the value read or %0 if the value cannot be read.
1222  *
1223  *  Context:
1224  *    any
1225  */
1226 u64 cmf_read(struct ccw_device *cdev, int index)
1227 {
1228 	return cmbops->read(cdev, index);
1229 }
1230 
1231 /**
1232  * cmf_readall() - read the current channel measurement block
1233  * @cdev:	the channel to be read
1234  * @data:	a pointer to a data block that will be filled
1235  *
1236  * Returns %0 on success, a negative error value otherwise.
1237  *
1238  *  Context:
1239  *    any
1240  */
1241 int cmf_readall(struct ccw_device *cdev, struct cmbdata *data)
1242 {
1243 	return cmbops->readall(cdev, data);
1244 }
1245 
1246 /* Reenable cmf when a disconnected device becomes available again. */
1247 int cmf_reenable(struct ccw_device *cdev)
1248 {
1249 	cmbops->reset(cdev);
1250 	return cmbops->set(cdev, 2);
1251 }
1252 
1253 /**
1254  * cmf_reactivate() - reactivate measurement block updates
1255  *
1256  * Use this during resume from hibernate.
1257  */
1258 void cmf_reactivate(void)
1259 {
1260 	spin_lock(&cmb_area.lock);
1261 	if (!list_empty(&cmb_area.list))
1262 		cmf_activate(cmb_area.mem, CMF_ON);
1263 	spin_unlock(&cmb_area.lock);
1264 }
1265 
1266 static int __init init_cmbe(void)
1267 {
1268 	cmbe_cache = kmem_cache_create("cmbe_cache", sizeof(struct cmbe),
1269 				       __alignof__(struct cmbe), 0, NULL);
1270 
1271 	return cmbe_cache ? 0 : -ENOMEM;
1272 }
1273 
1274 static int __init init_cmf(void)
1275 {
1276 	char *format_string;
1277 	char *detect_string;
1278 	int ret;
1279 
1280 	/*
1281 	 * If the user did not give a parameter, see if we are running on a
1282 	 * machine supporting extended measurement blocks, otherwise fall back
1283 	 * to basic mode.
1284 	 */
1285 	if (format == CMF_AUTODETECT) {
1286 		if (!css_general_characteristics.ext_mb) {
1287 			format = CMF_BASIC;
1288 		} else {
1289 			format = CMF_EXTENDED;
1290 		}
1291 		detect_string = "autodetected";
1292 	} else {
1293 		detect_string = "parameter";
1294 	}
1295 
1296 	switch (format) {
1297 	case CMF_BASIC:
1298 		format_string = "basic";
1299 		cmbops = &cmbops_basic;
1300 		break;
1301 	case CMF_EXTENDED:
1302 		format_string = "extended";
1303 		cmbops = &cmbops_extended;
1304 
1305 		ret = init_cmbe();
1306 		if (ret)
1307 			return ret;
1308 		break;
1309 	default:
1310 		return -EINVAL;
1311 	}
1312 	pr_info("Channel measurement facility initialized using format "
1313 		"%s (mode %s)\n", format_string, detect_string);
1314 	return 0;
1315 }
1316 device_initcall(init_cmf);
1317 
1318 EXPORT_SYMBOL_GPL(enable_cmf);
1319 EXPORT_SYMBOL_GPL(disable_cmf);
1320 EXPORT_SYMBOL_GPL(cmf_read);
1321 EXPORT_SYMBOL_GPL(cmf_readall);
1322