xref: /openbmc/linux/drivers/block/drbd/drbd_proc.c (revision 151f4e2b)
1 /*
2    drbd_proc.c
3 
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5 
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9 
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14 
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 
24  */
25 
26 #include <linux/module.h>
27 
28 #include <linux/uaccess.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/drbd.h>
34 #include "drbd_int.h"
35 
36 struct proc_dir_entry *drbd_proc;
37 
38 static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
39 {
40 	/* v is in kB/sec. We don't expect TiByte/sec yet. */
41 	if (unlikely(v >= 1000000)) {
42 		/* cool: > GiByte/s */
43 		seq_printf(seq, "%ld,", v / 1000000);
44 		v %= 1000000;
45 		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
46 	} else if (likely(v >= 1000))
47 		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
48 	else
49 		seq_printf(seq, "%ld", v);
50 }
51 
52 static void drbd_get_syncer_progress(struct drbd_device *device,
53 		union drbd_dev_state state, unsigned long *rs_total,
54 		unsigned long *bits_left, unsigned int *per_mil_done)
55 {
56 	/* this is to break it at compile time when we change that, in case we
57 	 * want to support more than (1<<32) bits on a 32bit arch. */
58 	typecheck(unsigned long, device->rs_total);
59 	*rs_total = device->rs_total;
60 
61 	/* note: both rs_total and rs_left are in bits, i.e. in
62 	 * units of BM_BLOCK_SIZE.
63 	 * for the percentage, we don't care. */
64 
65 	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
66 		*bits_left = device->ov_left;
67 	else
68 		*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
69 	/* >> 10 to prevent overflow,
70 	 * +1 to prevent division by zero */
71 	if (*bits_left > *rs_total) {
72 		/* D'oh. Maybe a logic bug somewhere.  More likely just a race
73 		 * between state change and reset of rs_total.
74 		 */
75 		*bits_left = *rs_total;
76 		*per_mil_done = *rs_total ? 0 : 1000;
77 	} else {
78 		/* Make sure the division happens in long context.
79 		 * We allow up to one petabyte storage right now,
80 		 * at a granularity of 4k per bit that is 2**38 bits.
81 		 * After shift right and multiplication by 1000,
82 		 * this should still fit easily into a 32bit long,
83 		 * so we don't need a 64bit division on 32bit arch.
84 		 * Note: currently we don't support such large bitmaps on 32bit
85 		 * arch anyways, but no harm done to be prepared for it here.
86 		 */
87 		unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
88 		unsigned long left = *bits_left >> shift;
89 		unsigned long total = 1UL + (*rs_total >> shift);
90 		unsigned long tmp = 1000UL - left * 1000UL/total;
91 		*per_mil_done = tmp;
92 	}
93 }
94 
95 
96 /*lge
97  * progress bars shamelessly adapted from driver/md/md.c
98  * output looks like
99  *	[=====>..............] 33.5% (23456/123456)
100  *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
101  */
102 static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
103 		union drbd_dev_state state)
104 {
105 	unsigned long db, dt, dbdt, rt, rs_total, rs_left;
106 	unsigned int res;
107 	int i, x, y;
108 	int stalled = 0;
109 
110 	drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
111 
112 	x = res/50;
113 	y = 20-x;
114 	seq_puts(seq, "\t[");
115 	for (i = 1; i < x; i++)
116 		seq_putc(seq, '=');
117 	seq_putc(seq, '>');
118 	for (i = 0; i < y; i++)
119 		seq_putc(seq, '.');
120 	seq_puts(seq, "] ");
121 
122 	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
123 		seq_puts(seq, "verified:");
124 	else
125 		seq_puts(seq, "sync'ed:");
126 	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
127 
128 	/* if more than a few GB, display in MB */
129 	if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
130 		seq_printf(seq, "(%lu/%lu)M",
131 			    (unsigned long) Bit2KB(rs_left >> 10),
132 			    (unsigned long) Bit2KB(rs_total >> 10));
133 	else
134 		seq_printf(seq, "(%lu/%lu)K",
135 			    (unsigned long) Bit2KB(rs_left),
136 			    (unsigned long) Bit2KB(rs_total));
137 
138 	seq_puts(seq, "\n\t");
139 
140 	/* see drivers/md/md.c
141 	 * We do not want to overflow, so the order of operands and
142 	 * the * 100 / 100 trick are important. We do a +1 to be
143 	 * safe against division by zero. We only estimate anyway.
144 	 *
145 	 * dt: time from mark until now
146 	 * db: blocks written from mark until now
147 	 * rt: remaining time
148 	 */
149 	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
150 	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
151 	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
152 	/* ------------------------ ~18s average ------------------------ */
153 	i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
154 	dt = (jiffies - device->rs_mark_time[i]) / HZ;
155 	if (dt > 180)
156 		stalled = 1;
157 
158 	if (!dt)
159 		dt++;
160 	db = device->rs_mark_left[i] - rs_left;
161 	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
162 
163 	seq_printf(seq, "finish: %lu:%02lu:%02lu",
164 		rt / 3600, (rt % 3600) / 60, rt % 60);
165 
166 	dbdt = Bit2KB(db/dt);
167 	seq_puts(seq, " speed: ");
168 	seq_printf_with_thousands_grouping(seq, dbdt);
169 	seq_puts(seq, " (");
170 	/* ------------------------- ~3s average ------------------------ */
171 	if (drbd_proc_details >= 1) {
172 		/* this is what drbd_rs_should_slow_down() uses */
173 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
174 		dt = (jiffies - device->rs_mark_time[i]) / HZ;
175 		if (!dt)
176 			dt++;
177 		db = device->rs_mark_left[i] - rs_left;
178 		dbdt = Bit2KB(db/dt);
179 		seq_printf_with_thousands_grouping(seq, dbdt);
180 		seq_puts(seq, " -- ");
181 	}
182 
183 	/* --------------------- long term average ---------------------- */
184 	/* mean speed since syncer started
185 	 * we do account for PausedSync periods */
186 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
187 	if (dt == 0)
188 		dt = 1;
189 	db = rs_total - rs_left;
190 	dbdt = Bit2KB(db/dt);
191 	seq_printf_with_thousands_grouping(seq, dbdt);
192 	seq_putc(seq, ')');
193 
194 	if (state.conn == C_SYNC_TARGET ||
195 	    state.conn == C_VERIFY_S) {
196 		seq_puts(seq, " want: ");
197 		seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
198 	}
199 	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
200 
201 	if (drbd_proc_details >= 1) {
202 		/* 64 bit:
203 		 * we convert to sectors in the display below. */
204 		unsigned long bm_bits = drbd_bm_bits(device);
205 		unsigned long bit_pos;
206 		unsigned long long stop_sector = 0;
207 		if (state.conn == C_VERIFY_S ||
208 		    state.conn == C_VERIFY_T) {
209 			bit_pos = bm_bits - device->ov_left;
210 			if (verify_can_do_stop_sector(device))
211 				stop_sector = device->ov_stop_sector;
212 		} else
213 			bit_pos = device->bm_resync_fo;
214 		/* Total sectors may be slightly off for oddly
215 		 * sized devices. So what. */
216 		seq_printf(seq,
217 			"\t%3d%% sector pos: %llu/%llu",
218 			(int)(bit_pos / (bm_bits/100+1)),
219 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
220 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
221 		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
222 			seq_printf(seq, " stop sector: %llu", stop_sector);
223 		seq_putc(seq, '\n');
224 	}
225 }
226 
227 int drbd_seq_show(struct seq_file *seq, void *v)
228 {
229 	int i, prev_i = -1;
230 	const char *sn;
231 	struct drbd_device *device;
232 	struct net_conf *nc;
233 	union drbd_dev_state state;
234 	char wp;
235 
236 	static char write_ordering_chars[] = {
237 		[WO_NONE] = 'n',
238 		[WO_DRAIN_IO] = 'd',
239 		[WO_BDEV_FLUSH] = 'f',
240 	};
241 
242 	seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
243 		   API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
244 
245 	/*
246 	  cs .. connection state
247 	  ro .. node role (local/remote)
248 	  ds .. disk state (local/remote)
249 	     protocol
250 	     various flags
251 	  ns .. network send
252 	  nr .. network receive
253 	  dw .. disk write
254 	  dr .. disk read
255 	  al .. activity log write count
256 	  bm .. bitmap update write count
257 	  pe .. pending (waiting for ack or data reply)
258 	  ua .. unack'd (still need to send ack or data reply)
259 	  ap .. application requests accepted, but not yet completed
260 	  ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
261 	  wo .. write ordering mode currently in use
262 	 oos .. known out-of-sync kB
263 	*/
264 
265 	rcu_read_lock();
266 	idr_for_each_entry(&drbd_devices, device, i) {
267 		if (prev_i != i - 1)
268 			seq_putc(seq, '\n');
269 		prev_i = i;
270 
271 		state = device->state;
272 		sn = drbd_conn_str(state.conn);
273 
274 		if (state.conn == C_STANDALONE &&
275 		    state.disk == D_DISKLESS &&
276 		    state.role == R_SECONDARY) {
277 			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
278 		} else {
279 			/* reset device->congestion_reason */
280 			bdi_rw_congested(device->rq_queue->backing_dev_info);
281 
282 			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
283 			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
284 			seq_printf(seq,
285 			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
286 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
287 			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
288 			   i, sn,
289 			   drbd_role_str(state.role),
290 			   drbd_role_str(state.peer),
291 			   drbd_disk_str(state.disk),
292 			   drbd_disk_str(state.pdsk),
293 			   wp,
294 			   drbd_suspended(device) ? 's' : 'r',
295 			   state.aftr_isp ? 'a' : '-',
296 			   state.peer_isp ? 'p' : '-',
297 			   state.user_isp ? 'u' : '-',
298 			   device->congestion_reason ?: '-',
299 			   test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
300 			   device->send_cnt/2,
301 			   device->recv_cnt/2,
302 			   device->writ_cnt/2,
303 			   device->read_cnt/2,
304 			   device->al_writ_cnt,
305 			   device->bm_writ_cnt,
306 			   atomic_read(&device->local_cnt),
307 			   atomic_read(&device->ap_pending_cnt) +
308 			   atomic_read(&device->rs_pending_cnt),
309 			   atomic_read(&device->unacked_cnt),
310 			   atomic_read(&device->ap_bio_cnt),
311 			   first_peer_device(device)->connection->epochs,
312 			   write_ordering_chars[device->resource->write_ordering]
313 			);
314 			seq_printf(seq, " oos:%llu\n",
315 				   Bit2KB((unsigned long long)
316 					   drbd_bm_total_weight(device)));
317 		}
318 		if (state.conn == C_SYNC_SOURCE ||
319 		    state.conn == C_SYNC_TARGET ||
320 		    state.conn == C_VERIFY_S ||
321 		    state.conn == C_VERIFY_T)
322 			drbd_syncer_progress(device, seq, state);
323 
324 		if (drbd_proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
325 			lc_seq_printf_stats(seq, device->resync);
326 			lc_seq_printf_stats(seq, device->act_log);
327 			put_ldev(device);
328 		}
329 
330 		if (drbd_proc_details >= 2)
331 			seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
332 	}
333 	rcu_read_unlock();
334 
335 	return 0;
336 }
337