xref: /openbmc/linux/drivers/md/dm-stats.c (revision d3aa3e06)
13bd94003SHeinz Mauelshagen // SPDX-License-Identifier: GPL-2.0-only
2fd2ed4d2SMikulas Patocka #include <linux/errno.h>
3fd2ed4d2SMikulas Patocka #include <linux/numa.h>
4fd2ed4d2SMikulas Patocka #include <linux/slab.h>
5fd2ed4d2SMikulas Patocka #include <linux/rculist.h>
6fd2ed4d2SMikulas Patocka #include <linux/threads.h>
7fd2ed4d2SMikulas Patocka #include <linux/preempt.h>
8fd2ed4d2SMikulas Patocka #include <linux/irqflags.h>
9fd2ed4d2SMikulas Patocka #include <linux/vmalloc.h>
10fd2ed4d2SMikulas Patocka #include <linux/mm.h>
11fd2ed4d2SMikulas Patocka #include <linux/module.h>
12fd2ed4d2SMikulas Patocka #include <linux/device-mapper.h>
13fd2ed4d2SMikulas Patocka 
144cc96131SMike Snitzer #include "dm-core.h"
15fd2ed4d2SMikulas Patocka #include "dm-stats.h"
16fd2ed4d2SMikulas Patocka 
17fd2ed4d2SMikulas Patocka #define DM_MSG_PREFIX "stats"
18fd2ed4d2SMikulas Patocka 
19fd2ed4d2SMikulas Patocka static int dm_stat_need_rcu_barrier;
20fd2ed4d2SMikulas Patocka 
21fd2ed4d2SMikulas Patocka /*
22fd2ed4d2SMikulas Patocka  * Using 64-bit values to avoid overflow (which is a
23fd2ed4d2SMikulas Patocka  * problem that block/genhd.c's IO accounting has).
24fd2ed4d2SMikulas Patocka  */
25fd2ed4d2SMikulas Patocka struct dm_stat_percpu {
26fd2ed4d2SMikulas Patocka 	unsigned long long sectors[2];
27fd2ed4d2SMikulas Patocka 	unsigned long long ios[2];
28fd2ed4d2SMikulas Patocka 	unsigned long long merges[2];
29fd2ed4d2SMikulas Patocka 	unsigned long long ticks[2];
30fd2ed4d2SMikulas Patocka 	unsigned long long io_ticks[2];
31fd2ed4d2SMikulas Patocka 	unsigned long long io_ticks_total;
32fd2ed4d2SMikulas Patocka 	unsigned long long time_in_queue;
33dfcfac3eSMikulas Patocka 	unsigned long long *histogram;
34fd2ed4d2SMikulas Patocka };
35fd2ed4d2SMikulas Patocka 
36fd2ed4d2SMikulas Patocka struct dm_stat_shared {
37fd2ed4d2SMikulas Patocka 	atomic_t in_flight[2];
38c96aec34SMikulas Patocka 	unsigned long long stamp;
39fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu tmp;
40fd2ed4d2SMikulas Patocka };
41fd2ed4d2SMikulas Patocka 
42fd2ed4d2SMikulas Patocka struct dm_stat {
43fd2ed4d2SMikulas Patocka 	struct list_head list_entry;
44fd2ed4d2SMikulas Patocka 	int id;
4586a3238cSHeinz Mauelshagen 	unsigned int stat_flags;
46fd2ed4d2SMikulas Patocka 	size_t n_entries;
47fd2ed4d2SMikulas Patocka 	sector_t start;
48fd2ed4d2SMikulas Patocka 	sector_t end;
49fd2ed4d2SMikulas Patocka 	sector_t step;
5086a3238cSHeinz Mauelshagen 	unsigned int n_histogram_entries;
51dfcfac3eSMikulas Patocka 	unsigned long long *histogram_boundaries;
52fd2ed4d2SMikulas Patocka 	const char *program_id;
53fd2ed4d2SMikulas Patocka 	const char *aux_data;
54fd2ed4d2SMikulas Patocka 	struct rcu_head rcu_head;
55fd2ed4d2SMikulas Patocka 	size_t shared_alloc_size;
56fd2ed4d2SMikulas Patocka 	size_t percpu_alloc_size;
57dfcfac3eSMikulas Patocka 	size_t histogram_alloc_size;
58fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu *stat_percpu[NR_CPUS];
59b18ae8ddSGustavo A. R. Silva 	struct dm_stat_shared stat_shared[];
60fd2ed4d2SMikulas Patocka };
61fd2ed4d2SMikulas Patocka 
62c96aec34SMikulas Patocka #define STAT_PRECISE_TIMESTAMPS		1
63c96aec34SMikulas Patocka 
64fd2ed4d2SMikulas Patocka struct dm_stats_last_position {
65fd2ed4d2SMikulas Patocka 	sector_t last_sector;
6686a3238cSHeinz Mauelshagen 	unsigned int last_rw;
67fd2ed4d2SMikulas Patocka };
68fd2ed4d2SMikulas Patocka 
69fd2ed4d2SMikulas Patocka /*
70fd2ed4d2SMikulas Patocka  * A typo on the command line could possibly make the kernel run out of memory
71fd2ed4d2SMikulas Patocka  * and crash. To prevent the crash we account all used memory. We fail if we
72fd2ed4d2SMikulas Patocka  * exhaust 1/4 of all memory or 1/2 of vmalloc space.
73fd2ed4d2SMikulas Patocka  */
74fd2ed4d2SMikulas Patocka #define DM_STATS_MEMORY_FACTOR		4
75fd2ed4d2SMikulas Patocka #define DM_STATS_VMALLOC_FACTOR		2
76fd2ed4d2SMikulas Patocka 
77fd2ed4d2SMikulas Patocka static DEFINE_SPINLOCK(shared_memory_lock);
78fd2ed4d2SMikulas Patocka 
79fd2ed4d2SMikulas Patocka static unsigned long shared_memory_amount;
80fd2ed4d2SMikulas Patocka 
__check_shared_memory(size_t alloc_size)81fd2ed4d2SMikulas Patocka static bool __check_shared_memory(size_t alloc_size)
82fd2ed4d2SMikulas Patocka {
83fd2ed4d2SMikulas Patocka 	size_t a;
84fd2ed4d2SMikulas Patocka 
85fd2ed4d2SMikulas Patocka 	a = shared_memory_amount + alloc_size;
86fd2ed4d2SMikulas Patocka 	if (a < shared_memory_amount)
87fd2ed4d2SMikulas Patocka 		return false;
88ca79b0c2SArun KS 	if (a >> PAGE_SHIFT > totalram_pages() / DM_STATS_MEMORY_FACTOR)
89fd2ed4d2SMikulas Patocka 		return false;
90fd2ed4d2SMikulas Patocka #ifdef CONFIG_MMU
91fd2ed4d2SMikulas Patocka 	if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR)
92fd2ed4d2SMikulas Patocka 		return false;
93fd2ed4d2SMikulas Patocka #endif
94fd2ed4d2SMikulas Patocka 	return true;
95fd2ed4d2SMikulas Patocka }
96fd2ed4d2SMikulas Patocka 
check_shared_memory(size_t alloc_size)97fd2ed4d2SMikulas Patocka static bool check_shared_memory(size_t alloc_size)
98fd2ed4d2SMikulas Patocka {
99fd2ed4d2SMikulas Patocka 	bool ret;
100fd2ed4d2SMikulas Patocka 
101fd2ed4d2SMikulas Patocka 	spin_lock_irq(&shared_memory_lock);
102fd2ed4d2SMikulas Patocka 
103fd2ed4d2SMikulas Patocka 	ret = __check_shared_memory(alloc_size);
104fd2ed4d2SMikulas Patocka 
105fd2ed4d2SMikulas Patocka 	spin_unlock_irq(&shared_memory_lock);
106fd2ed4d2SMikulas Patocka 
107fd2ed4d2SMikulas Patocka 	return ret;
108fd2ed4d2SMikulas Patocka }
109fd2ed4d2SMikulas Patocka 
claim_shared_memory(size_t alloc_size)110fd2ed4d2SMikulas Patocka static bool claim_shared_memory(size_t alloc_size)
111fd2ed4d2SMikulas Patocka {
112fd2ed4d2SMikulas Patocka 	spin_lock_irq(&shared_memory_lock);
113fd2ed4d2SMikulas Patocka 
114fd2ed4d2SMikulas Patocka 	if (!__check_shared_memory(alloc_size)) {
115fd2ed4d2SMikulas Patocka 		spin_unlock_irq(&shared_memory_lock);
116fd2ed4d2SMikulas Patocka 		return false;
117fd2ed4d2SMikulas Patocka 	}
118fd2ed4d2SMikulas Patocka 
119fd2ed4d2SMikulas Patocka 	shared_memory_amount += alloc_size;
120fd2ed4d2SMikulas Patocka 
121fd2ed4d2SMikulas Patocka 	spin_unlock_irq(&shared_memory_lock);
122fd2ed4d2SMikulas Patocka 
123fd2ed4d2SMikulas Patocka 	return true;
124fd2ed4d2SMikulas Patocka }
125fd2ed4d2SMikulas Patocka 
free_shared_memory(size_t alloc_size)126fd2ed4d2SMikulas Patocka static void free_shared_memory(size_t alloc_size)
127fd2ed4d2SMikulas Patocka {
128fd2ed4d2SMikulas Patocka 	unsigned long flags;
129fd2ed4d2SMikulas Patocka 
130fd2ed4d2SMikulas Patocka 	spin_lock_irqsave(&shared_memory_lock, flags);
131fd2ed4d2SMikulas Patocka 
132fd2ed4d2SMikulas Patocka 	if (WARN_ON_ONCE(shared_memory_amount < alloc_size)) {
133fd2ed4d2SMikulas Patocka 		spin_unlock_irqrestore(&shared_memory_lock, flags);
134fd2ed4d2SMikulas Patocka 		DMCRIT("Memory usage accounting bug.");
135fd2ed4d2SMikulas Patocka 		return;
136fd2ed4d2SMikulas Patocka 	}
137fd2ed4d2SMikulas Patocka 
138fd2ed4d2SMikulas Patocka 	shared_memory_amount -= alloc_size;
139fd2ed4d2SMikulas Patocka 
140fd2ed4d2SMikulas Patocka 	spin_unlock_irqrestore(&shared_memory_lock, flags);
141fd2ed4d2SMikulas Patocka }
142fd2ed4d2SMikulas Patocka 
dm_kvzalloc(size_t alloc_size,int node)143fd2ed4d2SMikulas Patocka static void *dm_kvzalloc(size_t alloc_size, int node)
144fd2ed4d2SMikulas Patocka {
145fd2ed4d2SMikulas Patocka 	void *p;
146fd2ed4d2SMikulas Patocka 
147fd2ed4d2SMikulas Patocka 	if (!claim_shared_memory(alloc_size))
148fd2ed4d2SMikulas Patocka 		return NULL;
149fd2ed4d2SMikulas Patocka 
150a7c3e901SMichal Hocko 	p = kvzalloc_node(alloc_size, GFP_KERNEL | __GFP_NOMEMALLOC, node);
151fd2ed4d2SMikulas Patocka 	if (p)
152fd2ed4d2SMikulas Patocka 		return p;
153fd2ed4d2SMikulas Patocka 
154fd2ed4d2SMikulas Patocka 	free_shared_memory(alloc_size);
155fd2ed4d2SMikulas Patocka 
156fd2ed4d2SMikulas Patocka 	return NULL;
157fd2ed4d2SMikulas Patocka }
158fd2ed4d2SMikulas Patocka 
dm_kvfree(void * ptr,size_t alloc_size)159fd2ed4d2SMikulas Patocka static void dm_kvfree(void *ptr, size_t alloc_size)
160fd2ed4d2SMikulas Patocka {
161fd2ed4d2SMikulas Patocka 	if (!ptr)
162fd2ed4d2SMikulas Patocka 		return;
163fd2ed4d2SMikulas Patocka 
164fd2ed4d2SMikulas Patocka 	free_shared_memory(alloc_size);
165fd2ed4d2SMikulas Patocka 
1660f24b79bSPekka Enberg 	kvfree(ptr);
167fd2ed4d2SMikulas Patocka }
168fd2ed4d2SMikulas Patocka 
dm_stat_free(struct rcu_head * head)169fd2ed4d2SMikulas Patocka static void dm_stat_free(struct rcu_head *head)
170fd2ed4d2SMikulas Patocka {
171fd2ed4d2SMikulas Patocka 	int cpu;
172fd2ed4d2SMikulas Patocka 	struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
173fd2ed4d2SMikulas Patocka 
17460858318SMikulas Patocka 	kfree(s->histogram_boundaries);
175fd2ed4d2SMikulas Patocka 	kfree(s->program_id);
176fd2ed4d2SMikulas Patocka 	kfree(s->aux_data);
177dfcfac3eSMikulas Patocka 	for_each_possible_cpu(cpu) {
178dfcfac3eSMikulas Patocka 		dm_kvfree(s->stat_percpu[cpu][0].histogram, s->histogram_alloc_size);
179fd2ed4d2SMikulas Patocka 		dm_kvfree(s->stat_percpu[cpu], s->percpu_alloc_size);
180dfcfac3eSMikulas Patocka 	}
181dfcfac3eSMikulas Patocka 	dm_kvfree(s->stat_shared[0].tmp.histogram, s->histogram_alloc_size);
182fd2ed4d2SMikulas Patocka 	dm_kvfree(s, s->shared_alloc_size);
183fd2ed4d2SMikulas Patocka }
184fd2ed4d2SMikulas Patocka 
dm_stat_in_flight(struct dm_stat_shared * shared)185fd2ed4d2SMikulas Patocka static int dm_stat_in_flight(struct dm_stat_shared *shared)
186fd2ed4d2SMikulas Patocka {
187fd2ed4d2SMikulas Patocka 	return atomic_read(&shared->in_flight[READ]) +
188fd2ed4d2SMikulas Patocka 	       atomic_read(&shared->in_flight[WRITE]);
189fd2ed4d2SMikulas Patocka }
190fd2ed4d2SMikulas Patocka 
dm_stats_init(struct dm_stats * stats)191*d3aa3e06SJiasheng Jiang int dm_stats_init(struct dm_stats *stats)
192fd2ed4d2SMikulas Patocka {
193fd2ed4d2SMikulas Patocka 	int cpu;
194fd2ed4d2SMikulas Patocka 	struct dm_stats_last_position *last;
195fd2ed4d2SMikulas Patocka 
196fd2ed4d2SMikulas Patocka 	mutex_init(&stats->mutex);
197fd2ed4d2SMikulas Patocka 	INIT_LIST_HEAD(&stats->list);
1980cdb90f0SMike Snitzer 	stats->precise_timestamps = false;
199fd2ed4d2SMikulas Patocka 	stats->last = alloc_percpu(struct dm_stats_last_position);
200*d3aa3e06SJiasheng Jiang 	if (!stats->last)
201*d3aa3e06SJiasheng Jiang 		return -ENOMEM;
202*d3aa3e06SJiasheng Jiang 
203fd2ed4d2SMikulas Patocka 	for_each_possible_cpu(cpu) {
204fd2ed4d2SMikulas Patocka 		last = per_cpu_ptr(stats->last, cpu);
205fd2ed4d2SMikulas Patocka 		last->last_sector = (sector_t)ULLONG_MAX;
206fd2ed4d2SMikulas Patocka 		last->last_rw = UINT_MAX;
207fd2ed4d2SMikulas Patocka 	}
208*d3aa3e06SJiasheng Jiang 
209*d3aa3e06SJiasheng Jiang 	return 0;
210fd2ed4d2SMikulas Patocka }
211fd2ed4d2SMikulas Patocka 
dm_stats_cleanup(struct dm_stats * stats)212fd2ed4d2SMikulas Patocka void dm_stats_cleanup(struct dm_stats *stats)
213fd2ed4d2SMikulas Patocka {
214fd2ed4d2SMikulas Patocka 	size_t ni;
215fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
216fd2ed4d2SMikulas Patocka 	struct dm_stat_shared *shared;
217fd2ed4d2SMikulas Patocka 
218fd2ed4d2SMikulas Patocka 	while (!list_empty(&stats->list)) {
219fd2ed4d2SMikulas Patocka 		s = container_of(stats->list.next, struct dm_stat, list_entry);
220fd2ed4d2SMikulas Patocka 		list_del(&s->list_entry);
221fd2ed4d2SMikulas Patocka 		for (ni = 0; ni < s->n_entries; ni++) {
222fd2ed4d2SMikulas Patocka 			shared = &s->stat_shared[ni];
223fd2ed4d2SMikulas Patocka 			if (WARN_ON(dm_stat_in_flight(shared))) {
224fd2ed4d2SMikulas Patocka 				DMCRIT("leaked in-flight counter at index %lu "
225fd2ed4d2SMikulas Patocka 				       "(start %llu, end %llu, step %llu): reads %d, writes %d",
226fd2ed4d2SMikulas Patocka 				       (unsigned long)ni,
227fd2ed4d2SMikulas Patocka 				       (unsigned long long)s->start,
228fd2ed4d2SMikulas Patocka 				       (unsigned long long)s->end,
229fd2ed4d2SMikulas Patocka 				       (unsigned long long)s->step,
230fd2ed4d2SMikulas Patocka 				       atomic_read(&shared->in_flight[READ]),
231fd2ed4d2SMikulas Patocka 				       atomic_read(&shared->in_flight[WRITE]));
232fd2ed4d2SMikulas Patocka 			}
233bfe2b014SMikulas Patocka 			cond_resched();
234fd2ed4d2SMikulas Patocka 		}
235fd2ed4d2SMikulas Patocka 		dm_stat_free(&s->rcu_head);
236fd2ed4d2SMikulas Patocka 	}
237fd2ed4d2SMikulas Patocka 	free_percpu(stats->last);
238d5ffebddSMike Snitzer 	mutex_destroy(&stats->mutex);
239fd2ed4d2SMikulas Patocka }
240fd2ed4d2SMikulas Patocka 
dm_stats_recalc_precise_timestamps(struct dm_stats * stats)2410cdb90f0SMike Snitzer static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats)
2420cdb90f0SMike Snitzer {
2430cdb90f0SMike Snitzer 	struct list_head *l;
2440cdb90f0SMike Snitzer 	struct dm_stat *tmp_s;
2450cdb90f0SMike Snitzer 	bool precise_timestamps = false;
2460cdb90f0SMike Snitzer 
2470cdb90f0SMike Snitzer 	list_for_each(l, &stats->list) {
2480cdb90f0SMike Snitzer 		tmp_s = container_of(l, struct dm_stat, list_entry);
2490cdb90f0SMike Snitzer 		if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) {
2500cdb90f0SMike Snitzer 			precise_timestamps = true;
2510cdb90f0SMike Snitzer 			break;
2520cdb90f0SMike Snitzer 		}
2530cdb90f0SMike Snitzer 	}
2540cdb90f0SMike Snitzer 	stats->precise_timestamps = precise_timestamps;
2550cdb90f0SMike Snitzer }
2560cdb90f0SMike Snitzer 
dm_stats_create(struct dm_stats * stats,sector_t start,sector_t end,sector_t step,unsigned int stat_flags,unsigned int n_histogram_entries,unsigned long long * histogram_boundaries,const char * program_id,const char * aux_data,void (* suspend_callback)(struct mapped_device *),void (* resume_callback)(struct mapped_device *),struct mapped_device * md)257fd2ed4d2SMikulas Patocka static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
25886a3238cSHeinz Mauelshagen 			   sector_t step, unsigned int stat_flags,
25986a3238cSHeinz Mauelshagen 			   unsigned int n_histogram_entries,
260dfcfac3eSMikulas Patocka 			   unsigned long long *histogram_boundaries,
261c96aec34SMikulas Patocka 			   const char *program_id, const char *aux_data,
262fd2ed4d2SMikulas Patocka 			   void (*suspend_callback)(struct mapped_device *),
263fd2ed4d2SMikulas Patocka 			   void (*resume_callback)(struct mapped_device *),
264fd2ed4d2SMikulas Patocka 			   struct mapped_device *md)
265fd2ed4d2SMikulas Patocka {
266fd2ed4d2SMikulas Patocka 	struct list_head *l;
267fd2ed4d2SMikulas Patocka 	struct dm_stat *s, *tmp_s;
268fd2ed4d2SMikulas Patocka 	sector_t n_entries;
269fd2ed4d2SMikulas Patocka 	size_t ni;
270fd2ed4d2SMikulas Patocka 	size_t shared_alloc_size;
271fd2ed4d2SMikulas Patocka 	size_t percpu_alloc_size;
272dfcfac3eSMikulas Patocka 	size_t histogram_alloc_size;
273fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu *p;
274fd2ed4d2SMikulas Patocka 	int cpu;
275fd2ed4d2SMikulas Patocka 	int ret_id;
276fd2ed4d2SMikulas Patocka 	int r;
277fd2ed4d2SMikulas Patocka 
278fd2ed4d2SMikulas Patocka 	if (end < start || !step)
279fd2ed4d2SMikulas Patocka 		return -EINVAL;
280fd2ed4d2SMikulas Patocka 
281fd2ed4d2SMikulas Patocka 	n_entries = end - start;
282fd2ed4d2SMikulas Patocka 	if (dm_sector_div64(n_entries, step))
283fd2ed4d2SMikulas Patocka 		n_entries++;
284fd2ed4d2SMikulas Patocka 
285fd2ed4d2SMikulas Patocka 	if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
286fd2ed4d2SMikulas Patocka 		return -EOVERFLOW;
287fd2ed4d2SMikulas Patocka 
288fb16c799SGustavo A. R. Silva 	shared_alloc_size = struct_size(s, stat_shared, n_entries);
289fd2ed4d2SMikulas Patocka 	if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
290fd2ed4d2SMikulas Patocka 		return -EOVERFLOW;
291fd2ed4d2SMikulas Patocka 
292fd2ed4d2SMikulas Patocka 	percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu);
293fd2ed4d2SMikulas Patocka 	if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries)
294fd2ed4d2SMikulas Patocka 		return -EOVERFLOW;
295fd2ed4d2SMikulas Patocka 
296dfcfac3eSMikulas Patocka 	histogram_alloc_size = (n_histogram_entries + 1) * (size_t)n_entries * sizeof(unsigned long long);
297dfcfac3eSMikulas Patocka 	if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long))
298dfcfac3eSMikulas Patocka 		return -EOVERFLOW;
299dfcfac3eSMikulas Patocka 
300dfcfac3eSMikulas Patocka 	if (!check_shared_memory(shared_alloc_size + histogram_alloc_size +
301dfcfac3eSMikulas Patocka 				 num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size)))
302fd2ed4d2SMikulas Patocka 		return -ENOMEM;
303fd2ed4d2SMikulas Patocka 
304fd2ed4d2SMikulas Patocka 	s = dm_kvzalloc(shared_alloc_size, NUMA_NO_NODE);
305fd2ed4d2SMikulas Patocka 	if (!s)
306fd2ed4d2SMikulas Patocka 		return -ENOMEM;
307fd2ed4d2SMikulas Patocka 
308c96aec34SMikulas Patocka 	s->stat_flags = stat_flags;
309fd2ed4d2SMikulas Patocka 	s->n_entries = n_entries;
310fd2ed4d2SMikulas Patocka 	s->start = start;
311fd2ed4d2SMikulas Patocka 	s->end = end;
312fd2ed4d2SMikulas Patocka 	s->step = step;
313fd2ed4d2SMikulas Patocka 	s->shared_alloc_size = shared_alloc_size;
314fd2ed4d2SMikulas Patocka 	s->percpu_alloc_size = percpu_alloc_size;
315dfcfac3eSMikulas Patocka 	s->histogram_alloc_size = histogram_alloc_size;
316dfcfac3eSMikulas Patocka 
317dfcfac3eSMikulas Patocka 	s->n_histogram_entries = n_histogram_entries;
318dfcfac3eSMikulas Patocka 	s->histogram_boundaries = kmemdup(histogram_boundaries,
319dfcfac3eSMikulas Patocka 					  s->n_histogram_entries * sizeof(unsigned long long), GFP_KERNEL);
320dfcfac3eSMikulas Patocka 	if (!s->histogram_boundaries) {
321dfcfac3eSMikulas Patocka 		r = -ENOMEM;
322dfcfac3eSMikulas Patocka 		goto out;
323dfcfac3eSMikulas Patocka 	}
324fd2ed4d2SMikulas Patocka 
325fd2ed4d2SMikulas Patocka 	s->program_id = kstrdup(program_id, GFP_KERNEL);
326fd2ed4d2SMikulas Patocka 	if (!s->program_id) {
327fd2ed4d2SMikulas Patocka 		r = -ENOMEM;
328fd2ed4d2SMikulas Patocka 		goto out;
329fd2ed4d2SMikulas Patocka 	}
330fd2ed4d2SMikulas Patocka 	s->aux_data = kstrdup(aux_data, GFP_KERNEL);
331fd2ed4d2SMikulas Patocka 	if (!s->aux_data) {
332fd2ed4d2SMikulas Patocka 		r = -ENOMEM;
333fd2ed4d2SMikulas Patocka 		goto out;
334fd2ed4d2SMikulas Patocka 	}
335fd2ed4d2SMikulas Patocka 
336fd2ed4d2SMikulas Patocka 	for (ni = 0; ni < n_entries; ni++) {
337fd2ed4d2SMikulas Patocka 		atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
338fd2ed4d2SMikulas Patocka 		atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
339bfe2b014SMikulas Patocka 		cond_resched();
340fd2ed4d2SMikulas Patocka 	}
341fd2ed4d2SMikulas Patocka 
342dfcfac3eSMikulas Patocka 	if (s->n_histogram_entries) {
343dfcfac3eSMikulas Patocka 		unsigned long long *hi;
3440ef0b471SHeinz Mauelshagen 
345dfcfac3eSMikulas Patocka 		hi = dm_kvzalloc(s->histogram_alloc_size, NUMA_NO_NODE);
346dfcfac3eSMikulas Patocka 		if (!hi) {
347dfcfac3eSMikulas Patocka 			r = -ENOMEM;
348dfcfac3eSMikulas Patocka 			goto out;
349dfcfac3eSMikulas Patocka 		}
350dfcfac3eSMikulas Patocka 		for (ni = 0; ni < n_entries; ni++) {
351dfcfac3eSMikulas Patocka 			s->stat_shared[ni].tmp.histogram = hi;
352dfcfac3eSMikulas Patocka 			hi += s->n_histogram_entries + 1;
353bfe2b014SMikulas Patocka 			cond_resched();
354dfcfac3eSMikulas Patocka 		}
355dfcfac3eSMikulas Patocka 	}
356dfcfac3eSMikulas Patocka 
357fd2ed4d2SMikulas Patocka 	for_each_possible_cpu(cpu) {
358fd2ed4d2SMikulas Patocka 		p = dm_kvzalloc(percpu_alloc_size, cpu_to_node(cpu));
359fd2ed4d2SMikulas Patocka 		if (!p) {
360fd2ed4d2SMikulas Patocka 			r = -ENOMEM;
361fd2ed4d2SMikulas Patocka 			goto out;
362fd2ed4d2SMikulas Patocka 		}
363fd2ed4d2SMikulas Patocka 		s->stat_percpu[cpu] = p;
364dfcfac3eSMikulas Patocka 		if (s->n_histogram_entries) {
365dfcfac3eSMikulas Patocka 			unsigned long long *hi;
3660ef0b471SHeinz Mauelshagen 
367dfcfac3eSMikulas Patocka 			hi = dm_kvzalloc(s->histogram_alloc_size, cpu_to_node(cpu));
368dfcfac3eSMikulas Patocka 			if (!hi) {
369dfcfac3eSMikulas Patocka 				r = -ENOMEM;
370dfcfac3eSMikulas Patocka 				goto out;
371dfcfac3eSMikulas Patocka 			}
372dfcfac3eSMikulas Patocka 			for (ni = 0; ni < n_entries; ni++) {
373dfcfac3eSMikulas Patocka 				p[ni].histogram = hi;
374dfcfac3eSMikulas Patocka 				hi += s->n_histogram_entries + 1;
375bfe2b014SMikulas Patocka 				cond_resched();
376dfcfac3eSMikulas Patocka 			}
377dfcfac3eSMikulas Patocka 		}
378fd2ed4d2SMikulas Patocka 	}
379fd2ed4d2SMikulas Patocka 
380fd2ed4d2SMikulas Patocka 	/*
381fd2ed4d2SMikulas Patocka 	 * Suspend/resume to make sure there is no i/o in flight,
382fd2ed4d2SMikulas Patocka 	 * so that newly created statistics will be exact.
383fd2ed4d2SMikulas Patocka 	 *
384fd2ed4d2SMikulas Patocka 	 * (note: we couldn't suspend earlier because we must not
385fd2ed4d2SMikulas Patocka 	 * allocate memory while suspended)
386fd2ed4d2SMikulas Patocka 	 */
387fd2ed4d2SMikulas Patocka 	suspend_callback(md);
388fd2ed4d2SMikulas Patocka 
389fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
390fd2ed4d2SMikulas Patocka 	s->id = 0;
391fd2ed4d2SMikulas Patocka 	list_for_each(l, &stats->list) {
392fd2ed4d2SMikulas Patocka 		tmp_s = container_of(l, struct dm_stat, list_entry);
393fd2ed4d2SMikulas Patocka 		if (WARN_ON(tmp_s->id < s->id)) {
394fd2ed4d2SMikulas Patocka 			r = -EINVAL;
395fd2ed4d2SMikulas Patocka 			goto out_unlock_resume;
396fd2ed4d2SMikulas Patocka 		}
397fd2ed4d2SMikulas Patocka 		if (tmp_s->id > s->id)
398fd2ed4d2SMikulas Patocka 			break;
399fd2ed4d2SMikulas Patocka 		if (unlikely(s->id == INT_MAX)) {
400fd2ed4d2SMikulas Patocka 			r = -ENFILE;
401fd2ed4d2SMikulas Patocka 			goto out_unlock_resume;
402fd2ed4d2SMikulas Patocka 		}
403fd2ed4d2SMikulas Patocka 		s->id++;
404fd2ed4d2SMikulas Patocka 	}
405fd2ed4d2SMikulas Patocka 	ret_id = s->id;
406fd2ed4d2SMikulas Patocka 	list_add_tail_rcu(&s->list_entry, l);
4070cdb90f0SMike Snitzer 
4080cdb90f0SMike Snitzer 	dm_stats_recalc_precise_timestamps(stats);
4090cdb90f0SMike Snitzer 
410442761fdSMike Snitzer 	if (!static_key_enabled(&stats_enabled.key))
411442761fdSMike Snitzer 		static_branch_enable(&stats_enabled);
412442761fdSMike Snitzer 
413fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
414fd2ed4d2SMikulas Patocka 
415fd2ed4d2SMikulas Patocka 	resume_callback(md);
416fd2ed4d2SMikulas Patocka 
417fd2ed4d2SMikulas Patocka 	return ret_id;
418fd2ed4d2SMikulas Patocka 
419fd2ed4d2SMikulas Patocka out_unlock_resume:
420fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
421fd2ed4d2SMikulas Patocka 	resume_callback(md);
422fd2ed4d2SMikulas Patocka out:
423fd2ed4d2SMikulas Patocka 	dm_stat_free(&s->rcu_head);
424fd2ed4d2SMikulas Patocka 	return r;
425fd2ed4d2SMikulas Patocka }
426fd2ed4d2SMikulas Patocka 
__dm_stats_find(struct dm_stats * stats,int id)427fd2ed4d2SMikulas Patocka static struct dm_stat *__dm_stats_find(struct dm_stats *stats, int id)
428fd2ed4d2SMikulas Patocka {
429fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
430fd2ed4d2SMikulas Patocka 
431fd2ed4d2SMikulas Patocka 	list_for_each_entry(s, &stats->list, list_entry) {
432fd2ed4d2SMikulas Patocka 		if (s->id > id)
433fd2ed4d2SMikulas Patocka 			break;
434fd2ed4d2SMikulas Patocka 		if (s->id == id)
435fd2ed4d2SMikulas Patocka 			return s;
436fd2ed4d2SMikulas Patocka 	}
437fd2ed4d2SMikulas Patocka 
438fd2ed4d2SMikulas Patocka 	return NULL;
439fd2ed4d2SMikulas Patocka }
440fd2ed4d2SMikulas Patocka 
dm_stats_delete(struct dm_stats * stats,int id)441fd2ed4d2SMikulas Patocka static int dm_stats_delete(struct dm_stats *stats, int id)
442fd2ed4d2SMikulas Patocka {
443fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
444fd2ed4d2SMikulas Patocka 	int cpu;
445fd2ed4d2SMikulas Patocka 
446fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
447fd2ed4d2SMikulas Patocka 
448fd2ed4d2SMikulas Patocka 	s = __dm_stats_find(stats, id);
449fd2ed4d2SMikulas Patocka 	if (!s) {
450fd2ed4d2SMikulas Patocka 		mutex_unlock(&stats->mutex);
451fd2ed4d2SMikulas Patocka 		return -ENOENT;
452fd2ed4d2SMikulas Patocka 	}
453fd2ed4d2SMikulas Patocka 
454fd2ed4d2SMikulas Patocka 	list_del_rcu(&s->list_entry);
4550cdb90f0SMike Snitzer 
4560cdb90f0SMike Snitzer 	dm_stats_recalc_precise_timestamps(stats);
4570cdb90f0SMike Snitzer 
458fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
459fd2ed4d2SMikulas Patocka 
460fd2ed4d2SMikulas Patocka 	/*
461fd2ed4d2SMikulas Patocka 	 * vfree can't be called from RCU callback
462fd2ed4d2SMikulas Patocka 	 */
463fd2ed4d2SMikulas Patocka 	for_each_possible_cpu(cpu)
464dfcfac3eSMikulas Patocka 		if (is_vmalloc_addr(s->stat_percpu) ||
465dfcfac3eSMikulas Patocka 		    is_vmalloc_addr(s->stat_percpu[cpu][0].histogram))
466fd2ed4d2SMikulas Patocka 			goto do_sync_free;
467dfcfac3eSMikulas Patocka 	if (is_vmalloc_addr(s) ||
468dfcfac3eSMikulas Patocka 	    is_vmalloc_addr(s->stat_shared[0].tmp.histogram)) {
469fd2ed4d2SMikulas Patocka do_sync_free:
470fd2ed4d2SMikulas Patocka 		synchronize_rcu_expedited();
471fd2ed4d2SMikulas Patocka 		dm_stat_free(&s->rcu_head);
472fd2ed4d2SMikulas Patocka 	} else {
4736aa7de05SMark Rutland 		WRITE_ONCE(dm_stat_need_rcu_barrier, 1);
474fd2ed4d2SMikulas Patocka 		call_rcu(&s->rcu_head, dm_stat_free);
475fd2ed4d2SMikulas Patocka 	}
476fd2ed4d2SMikulas Patocka 	return 0;
477fd2ed4d2SMikulas Patocka }
478fd2ed4d2SMikulas Patocka 
dm_stats_list(struct dm_stats * stats,const char * program,char * result,unsigned int maxlen)479fd2ed4d2SMikulas Patocka static int dm_stats_list(struct dm_stats *stats, const char *program,
48086a3238cSHeinz Mauelshagen 			 char *result, unsigned int maxlen)
481fd2ed4d2SMikulas Patocka {
482fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
483fd2ed4d2SMikulas Patocka 	sector_t len;
48486a3238cSHeinz Mauelshagen 	unsigned int sz = 0;
485fd2ed4d2SMikulas Patocka 
486fd2ed4d2SMikulas Patocka 	/*
487fd2ed4d2SMikulas Patocka 	 * Output format:
488fd2ed4d2SMikulas Patocka 	 *   <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
489fd2ed4d2SMikulas Patocka 	 */
490fd2ed4d2SMikulas Patocka 
491fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
492fd2ed4d2SMikulas Patocka 	list_for_each_entry(s, &stats->list, list_entry) {
493fd2ed4d2SMikulas Patocka 		if (!program || !strcmp(program, s->program_id)) {
494fd2ed4d2SMikulas Patocka 			len = s->end - s->start;
495bd49784fSMikulas Patocka 			DMEMIT("%d: %llu+%llu %llu %s %s", s->id,
496fd2ed4d2SMikulas Patocka 				(unsigned long long)s->start,
497fd2ed4d2SMikulas Patocka 				(unsigned long long)len,
498fd2ed4d2SMikulas Patocka 				(unsigned long long)s->step,
499fd2ed4d2SMikulas Patocka 				s->program_id,
500fd2ed4d2SMikulas Patocka 				s->aux_data);
501bd49784fSMikulas Patocka 			if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
502bd49784fSMikulas Patocka 				DMEMIT(" precise_timestamps");
503bd49784fSMikulas Patocka 			if (s->n_histogram_entries) {
50486a3238cSHeinz Mauelshagen 				unsigned int i;
5050ef0b471SHeinz Mauelshagen 
506bd49784fSMikulas Patocka 				DMEMIT(" histogram:");
507bd49784fSMikulas Patocka 				for (i = 0; i < s->n_histogram_entries; i++) {
508bd49784fSMikulas Patocka 					if (i)
509bd49784fSMikulas Patocka 						DMEMIT(",");
510bd49784fSMikulas Patocka 					DMEMIT("%llu", s->histogram_boundaries[i]);
511bd49784fSMikulas Patocka 				}
512bd49784fSMikulas Patocka 			}
513bd49784fSMikulas Patocka 			DMEMIT("\n");
514fd2ed4d2SMikulas Patocka 		}
515bfe2b014SMikulas Patocka 		cond_resched();
516fd2ed4d2SMikulas Patocka 	}
517fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
518fd2ed4d2SMikulas Patocka 
519fd2ed4d2SMikulas Patocka 	return 1;
520fd2ed4d2SMikulas Patocka }
521fd2ed4d2SMikulas Patocka 
dm_stat_round(struct dm_stat * s,struct dm_stat_shared * shared,struct dm_stat_percpu * p)522c96aec34SMikulas Patocka static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
523c96aec34SMikulas Patocka 			  struct dm_stat_percpu *p)
524fd2ed4d2SMikulas Patocka {
525fd2ed4d2SMikulas Patocka 	/*
526fd2ed4d2SMikulas Patocka 	 * This is racy, but so is part_round_stats_single.
527fd2ed4d2SMikulas Patocka 	 */
528c96aec34SMikulas Patocka 	unsigned long long now, difference;
52986a3238cSHeinz Mauelshagen 	unsigned int in_flight_read, in_flight_write;
530fd2ed4d2SMikulas Patocka 
531c96aec34SMikulas Patocka 	if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)))
532c96aec34SMikulas Patocka 		now = jiffies;
533c96aec34SMikulas Patocka 	else
534c96aec34SMikulas Patocka 		now = ktime_to_ns(ktime_get());
535c96aec34SMikulas Patocka 
536c96aec34SMikulas Patocka 	difference = now - shared->stamp;
537fd2ed4d2SMikulas Patocka 	if (!difference)
538fd2ed4d2SMikulas Patocka 		return;
539c96aec34SMikulas Patocka 
54086a3238cSHeinz Mauelshagen 	in_flight_read = (unsigned int)atomic_read(&shared->in_flight[READ]);
54186a3238cSHeinz Mauelshagen 	in_flight_write = (unsigned int)atomic_read(&shared->in_flight[WRITE]);
542fd2ed4d2SMikulas Patocka 	if (in_flight_read)
543fd2ed4d2SMikulas Patocka 		p->io_ticks[READ] += difference;
544fd2ed4d2SMikulas Patocka 	if (in_flight_write)
545fd2ed4d2SMikulas Patocka 		p->io_ticks[WRITE] += difference;
546fd2ed4d2SMikulas Patocka 	if (in_flight_read + in_flight_write) {
547fd2ed4d2SMikulas Patocka 		p->io_ticks_total += difference;
548fd2ed4d2SMikulas Patocka 		p->time_in_queue += (in_flight_read + in_flight_write) * difference;
549fd2ed4d2SMikulas Patocka 	}
550fd2ed4d2SMikulas Patocka 	shared->stamp = now;
551fd2ed4d2SMikulas Patocka }
552fd2ed4d2SMikulas Patocka 
dm_stat_for_entry(struct dm_stat * s,size_t entry,int idx,sector_t len,struct dm_stats_aux * stats_aux,bool end,unsigned long duration_jiffies)553fd2ed4d2SMikulas Patocka static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
554528ec5abSMike Christie 			      int idx, sector_t len,
555c96aec34SMikulas Patocka 			      struct dm_stats_aux *stats_aux, bool end,
556c96aec34SMikulas Patocka 			      unsigned long duration_jiffies)
557fd2ed4d2SMikulas Patocka {
558fd2ed4d2SMikulas Patocka 	struct dm_stat_shared *shared = &s->stat_shared[entry];
559fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu *p;
560fd2ed4d2SMikulas Patocka 
561fd2ed4d2SMikulas Patocka 	/*
562bbf3f8cbSMikulas Patocka 	 * For strict correctness we should use local_irq_save/restore
563fd2ed4d2SMikulas Patocka 	 * instead of preempt_disable/enable.
564fd2ed4d2SMikulas Patocka 	 *
565bbf3f8cbSMikulas Patocka 	 * preempt_disable/enable is racy if the driver finishes bios
566bbf3f8cbSMikulas Patocka 	 * from non-interrupt context as well as from interrupt context
567bbf3f8cbSMikulas Patocka 	 * or from more different interrupts.
568fd2ed4d2SMikulas Patocka 	 *
569bbf3f8cbSMikulas Patocka 	 * On 64-bit architectures the race only results in not counting some
570bbf3f8cbSMikulas Patocka 	 * events, so it is acceptable.  On 32-bit architectures the race could
571bbf3f8cbSMikulas Patocka 	 * cause the counter going off by 2^32, so we need to do proper locking
572bbf3f8cbSMikulas Patocka 	 * there.
573fd2ed4d2SMikulas Patocka 	 *
574fd2ed4d2SMikulas Patocka 	 * part_stat_lock()/part_stat_unlock() have this race too.
575fd2ed4d2SMikulas Patocka 	 */
576bbf3f8cbSMikulas Patocka #if BITS_PER_LONG == 32
577bbf3f8cbSMikulas Patocka 	unsigned long flags;
5780ef0b471SHeinz Mauelshagen 
579bbf3f8cbSMikulas Patocka 	local_irq_save(flags);
580bbf3f8cbSMikulas Patocka #else
581fd2ed4d2SMikulas Patocka 	preempt_disable();
582bbf3f8cbSMikulas Patocka #endif
583fd2ed4d2SMikulas Patocka 	p = &s->stat_percpu[smp_processor_id()][entry];
584fd2ed4d2SMikulas Patocka 
585fd2ed4d2SMikulas Patocka 	if (!end) {
586c96aec34SMikulas Patocka 		dm_stat_round(s, shared, p);
587fd2ed4d2SMikulas Patocka 		atomic_inc(&shared->in_flight[idx]);
588fd2ed4d2SMikulas Patocka 	} else {
589dfcfac3eSMikulas Patocka 		unsigned long long duration;
5900ef0b471SHeinz Mauelshagen 
591c96aec34SMikulas Patocka 		dm_stat_round(s, shared, p);
592fd2ed4d2SMikulas Patocka 		atomic_dec(&shared->in_flight[idx]);
593fd2ed4d2SMikulas Patocka 		p->sectors[idx] += len;
594fd2ed4d2SMikulas Patocka 		p->ios[idx] += 1;
595c96aec34SMikulas Patocka 		p->merges[idx] += stats_aux->merged;
596dfcfac3eSMikulas Patocka 		if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)) {
597c96aec34SMikulas Patocka 			p->ticks[idx] += duration_jiffies;
598dfcfac3eSMikulas Patocka 			duration = jiffies_to_msecs(duration_jiffies);
599dfcfac3eSMikulas Patocka 		} else {
600c96aec34SMikulas Patocka 			p->ticks[idx] += stats_aux->duration_ns;
601dfcfac3eSMikulas Patocka 			duration = stats_aux->duration_ns;
602dfcfac3eSMikulas Patocka 		}
603dfcfac3eSMikulas Patocka 		if (s->n_histogram_entries) {
60486a3238cSHeinz Mauelshagen 			unsigned int lo = 0, hi = s->n_histogram_entries + 1;
6050ef0b471SHeinz Mauelshagen 
606dfcfac3eSMikulas Patocka 			while (lo + 1 < hi) {
60786a3238cSHeinz Mauelshagen 				unsigned int mid = (lo + hi) / 2;
6080ef0b471SHeinz Mauelshagen 
6092d0f25cbSHeinz Mauelshagen 				if (s->histogram_boundaries[mid - 1] > duration)
610dfcfac3eSMikulas Patocka 					hi = mid;
6112d0f25cbSHeinz Mauelshagen 				else
612dfcfac3eSMikulas Patocka 					lo = mid;
613dfcfac3eSMikulas Patocka 			}
614dfcfac3eSMikulas Patocka 			p->histogram[lo]++;
615dfcfac3eSMikulas Patocka 		}
616fd2ed4d2SMikulas Patocka 	}
617fd2ed4d2SMikulas Patocka 
618bbf3f8cbSMikulas Patocka #if BITS_PER_LONG == 32
619bbf3f8cbSMikulas Patocka 	local_irq_restore(flags);
620bbf3f8cbSMikulas Patocka #else
621fd2ed4d2SMikulas Patocka 	preempt_enable();
622bbf3f8cbSMikulas Patocka #endif
623fd2ed4d2SMikulas Patocka }
624fd2ed4d2SMikulas Patocka 
__dm_stat_bio(struct dm_stat * s,int bi_rw,sector_t bi_sector,sector_t end_sector,bool end,unsigned long duration_jiffies,struct dm_stats_aux * stats_aux)625528ec5abSMike Christie static void __dm_stat_bio(struct dm_stat *s, int bi_rw,
626fd2ed4d2SMikulas Patocka 			  sector_t bi_sector, sector_t end_sector,
627c96aec34SMikulas Patocka 			  bool end, unsigned long duration_jiffies,
628fd2ed4d2SMikulas Patocka 			  struct dm_stats_aux *stats_aux)
629fd2ed4d2SMikulas Patocka {
630fd2ed4d2SMikulas Patocka 	sector_t rel_sector, offset, todo, fragment_len;
631fd2ed4d2SMikulas Patocka 	size_t entry;
632fd2ed4d2SMikulas Patocka 
633fd2ed4d2SMikulas Patocka 	if (end_sector <= s->start || bi_sector >= s->end)
634fd2ed4d2SMikulas Patocka 		return;
635fd2ed4d2SMikulas Patocka 	if (unlikely(bi_sector < s->start)) {
636fd2ed4d2SMikulas Patocka 		rel_sector = 0;
637fd2ed4d2SMikulas Patocka 		todo = end_sector - s->start;
638fd2ed4d2SMikulas Patocka 	} else {
639fd2ed4d2SMikulas Patocka 		rel_sector = bi_sector - s->start;
640fd2ed4d2SMikulas Patocka 		todo = end_sector - bi_sector;
641fd2ed4d2SMikulas Patocka 	}
642fd2ed4d2SMikulas Patocka 	if (unlikely(end_sector > s->end))
643fd2ed4d2SMikulas Patocka 		todo -= (end_sector - s->end);
644fd2ed4d2SMikulas Patocka 
645fd2ed4d2SMikulas Patocka 	offset = dm_sector_div64(rel_sector, s->step);
646fd2ed4d2SMikulas Patocka 	entry = rel_sector;
647fd2ed4d2SMikulas Patocka 	do {
648fd2ed4d2SMikulas Patocka 		if (WARN_ON_ONCE(entry >= s->n_entries)) {
649fd2ed4d2SMikulas Patocka 			DMCRIT("Invalid area access in region id %d", s->id);
650fd2ed4d2SMikulas Patocka 			return;
651fd2ed4d2SMikulas Patocka 		}
652fd2ed4d2SMikulas Patocka 		fragment_len = todo;
653fd2ed4d2SMikulas Patocka 		if (fragment_len > s->step - offset)
654fd2ed4d2SMikulas Patocka 			fragment_len = s->step - offset;
655fd2ed4d2SMikulas Patocka 		dm_stat_for_entry(s, entry, bi_rw, fragment_len,
656c96aec34SMikulas Patocka 				  stats_aux, end, duration_jiffies);
657fd2ed4d2SMikulas Patocka 		todo -= fragment_len;
658fd2ed4d2SMikulas Patocka 		entry++;
659fd2ed4d2SMikulas Patocka 		offset = 0;
660fd2ed4d2SMikulas Patocka 	} while (unlikely(todo != 0));
661fd2ed4d2SMikulas Patocka }
662fd2ed4d2SMikulas Patocka 
dm_stats_account_io(struct dm_stats * stats,unsigned long bi_rw,sector_t bi_sector,unsigned int bi_sectors,bool end,unsigned long start_time,struct dm_stats_aux * stats_aux)663fd2ed4d2SMikulas Patocka void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
66486a3238cSHeinz Mauelshagen 			 sector_t bi_sector, unsigned int bi_sectors, bool end,
6658d394bc4SMike Snitzer 			 unsigned long start_time,
666c96aec34SMikulas Patocka 			 struct dm_stats_aux *stats_aux)
667fd2ed4d2SMikulas Patocka {
668fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
669fd2ed4d2SMikulas Patocka 	sector_t end_sector;
670fd2ed4d2SMikulas Patocka 	struct dm_stats_last_position *last;
671c96aec34SMikulas Patocka 	bool got_precise_time;
6728d394bc4SMike Snitzer 	unsigned long duration_jiffies = 0;
673fd2ed4d2SMikulas Patocka 
674fd2ed4d2SMikulas Patocka 	if (unlikely(!bi_sectors))
675fd2ed4d2SMikulas Patocka 		return;
676fd2ed4d2SMikulas Patocka 
677fd2ed4d2SMikulas Patocka 	end_sector = bi_sector + bi_sectors;
678fd2ed4d2SMikulas Patocka 
679fd2ed4d2SMikulas Patocka 	if (!end) {
680fd2ed4d2SMikulas Patocka 		/*
681fd2ed4d2SMikulas Patocka 		 * A race condition can at worst result in the merged flag being
682fd2ed4d2SMikulas Patocka 		 * misrepresented, so we don't have to disable preemption here.
683fd2ed4d2SMikulas Patocka 		 */
6841f125e76SChristoph Lameter 		last = raw_cpu_ptr(stats->last);
685fd2ed4d2SMikulas Patocka 		stats_aux->merged =
6866aa7de05SMark Rutland 			(bi_sector == (READ_ONCE(last->last_sector) &&
687528ec5abSMike Christie 				       ((bi_rw == WRITE) ==
6886aa7de05SMark Rutland 					(READ_ONCE(last->last_rw) == WRITE))
689fd2ed4d2SMikulas Patocka 				       ));
6906aa7de05SMark Rutland 		WRITE_ONCE(last->last_sector, end_sector);
6916aa7de05SMark Rutland 		WRITE_ONCE(last->last_rw, bi_rw);
6928d394bc4SMike Snitzer 	} else
6938d394bc4SMike Snitzer 		duration_jiffies = jiffies - start_time;
694fd2ed4d2SMikulas Patocka 
695fd2ed4d2SMikulas Patocka 	rcu_read_lock();
696fd2ed4d2SMikulas Patocka 
697c96aec34SMikulas Patocka 	got_precise_time = false;
698c96aec34SMikulas Patocka 	list_for_each_entry_rcu(s, &stats->list, list_entry) {
699c96aec34SMikulas Patocka 		if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
7000cdb90f0SMike Snitzer 			/* start (!end) duration_ns is set by DM core's alloc_io() */
7010cdb90f0SMike Snitzer 			if (end)
702c96aec34SMikulas Patocka 				stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
703c96aec34SMikulas Patocka 			got_precise_time = true;
704c96aec34SMikulas Patocka 		}
705c96aec34SMikulas Patocka 		__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
706c96aec34SMikulas Patocka 	}
707fd2ed4d2SMikulas Patocka 
708fd2ed4d2SMikulas Patocka 	rcu_read_unlock();
709fd2ed4d2SMikulas Patocka }
710fd2ed4d2SMikulas Patocka 
__dm_stat_init_temporary_percpu_totals(struct dm_stat_shared * shared,struct dm_stat * s,size_t x)711fd2ed4d2SMikulas Patocka static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared,
712fd2ed4d2SMikulas Patocka 						   struct dm_stat *s, size_t x)
713fd2ed4d2SMikulas Patocka {
714fd2ed4d2SMikulas Patocka 	int cpu;
715fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu *p;
716fd2ed4d2SMikulas Patocka 
717fd2ed4d2SMikulas Patocka 	local_irq_disable();
718fd2ed4d2SMikulas Patocka 	p = &s->stat_percpu[smp_processor_id()][x];
719c96aec34SMikulas Patocka 	dm_stat_round(s, shared, p);
720fd2ed4d2SMikulas Patocka 	local_irq_enable();
721fd2ed4d2SMikulas Patocka 
722dfcfac3eSMikulas Patocka 	shared->tmp.sectors[READ] = 0;
723dfcfac3eSMikulas Patocka 	shared->tmp.sectors[WRITE] = 0;
724dfcfac3eSMikulas Patocka 	shared->tmp.ios[READ] = 0;
725dfcfac3eSMikulas Patocka 	shared->tmp.ios[WRITE] = 0;
726dfcfac3eSMikulas Patocka 	shared->tmp.merges[READ] = 0;
727dfcfac3eSMikulas Patocka 	shared->tmp.merges[WRITE] = 0;
728dfcfac3eSMikulas Patocka 	shared->tmp.ticks[READ] = 0;
729dfcfac3eSMikulas Patocka 	shared->tmp.ticks[WRITE] = 0;
730dfcfac3eSMikulas Patocka 	shared->tmp.io_ticks[READ] = 0;
731dfcfac3eSMikulas Patocka 	shared->tmp.io_ticks[WRITE] = 0;
732dfcfac3eSMikulas Patocka 	shared->tmp.io_ticks_total = 0;
733dfcfac3eSMikulas Patocka 	shared->tmp.time_in_queue = 0;
734dfcfac3eSMikulas Patocka 
735dfcfac3eSMikulas Patocka 	if (s->n_histogram_entries)
736dfcfac3eSMikulas Patocka 		memset(shared->tmp.histogram, 0, (s->n_histogram_entries + 1) * sizeof(unsigned long long));
737dfcfac3eSMikulas Patocka 
738fd2ed4d2SMikulas Patocka 	for_each_possible_cpu(cpu) {
739fd2ed4d2SMikulas Patocka 		p = &s->stat_percpu[cpu][x];
7406aa7de05SMark Rutland 		shared->tmp.sectors[READ] += READ_ONCE(p->sectors[READ]);
7416aa7de05SMark Rutland 		shared->tmp.sectors[WRITE] += READ_ONCE(p->sectors[WRITE]);
7426aa7de05SMark Rutland 		shared->tmp.ios[READ] += READ_ONCE(p->ios[READ]);
7436aa7de05SMark Rutland 		shared->tmp.ios[WRITE] += READ_ONCE(p->ios[WRITE]);
7446aa7de05SMark Rutland 		shared->tmp.merges[READ] += READ_ONCE(p->merges[READ]);
7456aa7de05SMark Rutland 		shared->tmp.merges[WRITE] += READ_ONCE(p->merges[WRITE]);
7466aa7de05SMark Rutland 		shared->tmp.ticks[READ] += READ_ONCE(p->ticks[READ]);
7476aa7de05SMark Rutland 		shared->tmp.ticks[WRITE] += READ_ONCE(p->ticks[WRITE]);
7486aa7de05SMark Rutland 		shared->tmp.io_ticks[READ] += READ_ONCE(p->io_ticks[READ]);
7496aa7de05SMark Rutland 		shared->tmp.io_ticks[WRITE] += READ_ONCE(p->io_ticks[WRITE]);
7506aa7de05SMark Rutland 		shared->tmp.io_ticks_total += READ_ONCE(p->io_ticks_total);
7516aa7de05SMark Rutland 		shared->tmp.time_in_queue += READ_ONCE(p->time_in_queue);
752dfcfac3eSMikulas Patocka 		if (s->n_histogram_entries) {
75386a3238cSHeinz Mauelshagen 			unsigned int i;
7540ef0b471SHeinz Mauelshagen 
755dfcfac3eSMikulas Patocka 			for (i = 0; i < s->n_histogram_entries + 1; i++)
7566aa7de05SMark Rutland 				shared->tmp.histogram[i] += READ_ONCE(p->histogram[i]);
757dfcfac3eSMikulas Patocka 		}
758fd2ed4d2SMikulas Patocka 	}
759fd2ed4d2SMikulas Patocka }
760fd2ed4d2SMikulas Patocka 
__dm_stat_clear(struct dm_stat * s,size_t idx_start,size_t idx_end,bool init_tmp_percpu_totals)761fd2ed4d2SMikulas Patocka static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
762fd2ed4d2SMikulas Patocka 			    bool init_tmp_percpu_totals)
763fd2ed4d2SMikulas Patocka {
764fd2ed4d2SMikulas Patocka 	size_t x;
765fd2ed4d2SMikulas Patocka 	struct dm_stat_shared *shared;
766fd2ed4d2SMikulas Patocka 	struct dm_stat_percpu *p;
767fd2ed4d2SMikulas Patocka 
768fd2ed4d2SMikulas Patocka 	for (x = idx_start; x < idx_end; x++) {
769fd2ed4d2SMikulas Patocka 		shared = &s->stat_shared[x];
770fd2ed4d2SMikulas Patocka 		if (init_tmp_percpu_totals)
771fd2ed4d2SMikulas Patocka 			__dm_stat_init_temporary_percpu_totals(shared, s, x);
772fd2ed4d2SMikulas Patocka 		local_irq_disable();
773fd2ed4d2SMikulas Patocka 		p = &s->stat_percpu[smp_processor_id()][x];
774fd2ed4d2SMikulas Patocka 		p->sectors[READ] -= shared->tmp.sectors[READ];
775fd2ed4d2SMikulas Patocka 		p->sectors[WRITE] -= shared->tmp.sectors[WRITE];
776fd2ed4d2SMikulas Patocka 		p->ios[READ] -= shared->tmp.ios[READ];
777fd2ed4d2SMikulas Patocka 		p->ios[WRITE] -= shared->tmp.ios[WRITE];
778fd2ed4d2SMikulas Patocka 		p->merges[READ] -= shared->tmp.merges[READ];
779fd2ed4d2SMikulas Patocka 		p->merges[WRITE] -= shared->tmp.merges[WRITE];
780fd2ed4d2SMikulas Patocka 		p->ticks[READ] -= shared->tmp.ticks[READ];
781fd2ed4d2SMikulas Patocka 		p->ticks[WRITE] -= shared->tmp.ticks[WRITE];
782fd2ed4d2SMikulas Patocka 		p->io_ticks[READ] -= shared->tmp.io_ticks[READ];
783fd2ed4d2SMikulas Patocka 		p->io_ticks[WRITE] -= shared->tmp.io_ticks[WRITE];
784fd2ed4d2SMikulas Patocka 		p->io_ticks_total -= shared->tmp.io_ticks_total;
785fd2ed4d2SMikulas Patocka 		p->time_in_queue -= shared->tmp.time_in_queue;
786fd2ed4d2SMikulas Patocka 		local_irq_enable();
787dfcfac3eSMikulas Patocka 		if (s->n_histogram_entries) {
78886a3238cSHeinz Mauelshagen 			unsigned int i;
7890ef0b471SHeinz Mauelshagen 
790dfcfac3eSMikulas Patocka 			for (i = 0; i < s->n_histogram_entries + 1; i++) {
791dfcfac3eSMikulas Patocka 				local_irq_disable();
792dfcfac3eSMikulas Patocka 				p = &s->stat_percpu[smp_processor_id()][x];
793dfcfac3eSMikulas Patocka 				p->histogram[i] -= shared->tmp.histogram[i];
794dfcfac3eSMikulas Patocka 				local_irq_enable();
795dfcfac3eSMikulas Patocka 			}
796dfcfac3eSMikulas Patocka 		}
797bfe2b014SMikulas Patocka 		cond_resched();
798fd2ed4d2SMikulas Patocka 	}
799fd2ed4d2SMikulas Patocka }
800fd2ed4d2SMikulas Patocka 
dm_stats_clear(struct dm_stats * stats,int id)801fd2ed4d2SMikulas Patocka static int dm_stats_clear(struct dm_stats *stats, int id)
802fd2ed4d2SMikulas Patocka {
803fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
804fd2ed4d2SMikulas Patocka 
805fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
806fd2ed4d2SMikulas Patocka 
807fd2ed4d2SMikulas Patocka 	s = __dm_stats_find(stats, id);
808fd2ed4d2SMikulas Patocka 	if (!s) {
809fd2ed4d2SMikulas Patocka 		mutex_unlock(&stats->mutex);
810fd2ed4d2SMikulas Patocka 		return -ENOENT;
811fd2ed4d2SMikulas Patocka 	}
812fd2ed4d2SMikulas Patocka 
813fd2ed4d2SMikulas Patocka 	__dm_stat_clear(s, 0, s->n_entries, true);
814fd2ed4d2SMikulas Patocka 
815fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
816fd2ed4d2SMikulas Patocka 
817fd2ed4d2SMikulas Patocka 	return 1;
818fd2ed4d2SMikulas Patocka }
819fd2ed4d2SMikulas Patocka 
820fd2ed4d2SMikulas Patocka /*
821fd2ed4d2SMikulas Patocka  * This is like jiffies_to_msec, but works for 64-bit values.
822fd2ed4d2SMikulas Patocka  */
dm_jiffies_to_msec64(struct dm_stat * s,unsigned long long j)823c96aec34SMikulas Patocka static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j)
824fd2ed4d2SMikulas Patocka {
825c96aec34SMikulas Patocka 	unsigned long long result;
82686a3238cSHeinz Mauelshagen 	unsigned int mult;
827fd2ed4d2SMikulas Patocka 
828c96aec34SMikulas Patocka 	if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
829c96aec34SMikulas Patocka 		return j;
830c96aec34SMikulas Patocka 
831c96aec34SMikulas Patocka 	result = 0;
832fd2ed4d2SMikulas Patocka 	if (j)
833fd2ed4d2SMikulas Patocka 		result = jiffies_to_msecs(j & 0x3fffff);
834fd2ed4d2SMikulas Patocka 	if (j >= 1 << 22) {
835fd2ed4d2SMikulas Patocka 		mult = jiffies_to_msecs(1 << 22);
836fd2ed4d2SMikulas Patocka 		result += (unsigned long long)mult * (unsigned long long)jiffies_to_msecs((j >> 22) & 0x3fffff);
837fd2ed4d2SMikulas Patocka 	}
838fd2ed4d2SMikulas Patocka 	if (j >= 1ULL << 44)
839fd2ed4d2SMikulas Patocka 		result += (unsigned long long)mult * (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j >> 44);
840fd2ed4d2SMikulas Patocka 
841fd2ed4d2SMikulas Patocka 	return result;
842fd2ed4d2SMikulas Patocka }
843fd2ed4d2SMikulas Patocka 
dm_stats_print(struct dm_stats * stats,int id,size_t idx_start,size_t idx_len,bool clear,char * result,unsigned int maxlen)844fd2ed4d2SMikulas Patocka static int dm_stats_print(struct dm_stats *stats, int id,
845fd2ed4d2SMikulas Patocka 			  size_t idx_start, size_t idx_len,
84686a3238cSHeinz Mauelshagen 			  bool clear, char *result, unsigned int maxlen)
847fd2ed4d2SMikulas Patocka {
84886a3238cSHeinz Mauelshagen 	unsigned int sz = 0;
849fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
850fd2ed4d2SMikulas Patocka 	size_t x;
851fd2ed4d2SMikulas Patocka 	sector_t start, end, step;
852fd2ed4d2SMikulas Patocka 	size_t idx_end;
853fd2ed4d2SMikulas Patocka 	struct dm_stat_shared *shared;
854fd2ed4d2SMikulas Patocka 
855fd2ed4d2SMikulas Patocka 	/*
856fd2ed4d2SMikulas Patocka 	 * Output format:
857fd2ed4d2SMikulas Patocka 	 *   <start_sector>+<length> counters
858fd2ed4d2SMikulas Patocka 	 */
859fd2ed4d2SMikulas Patocka 
860fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
861fd2ed4d2SMikulas Patocka 
862fd2ed4d2SMikulas Patocka 	s = __dm_stats_find(stats, id);
863fd2ed4d2SMikulas Patocka 	if (!s) {
864fd2ed4d2SMikulas Patocka 		mutex_unlock(&stats->mutex);
865fd2ed4d2SMikulas Patocka 		return -ENOENT;
866fd2ed4d2SMikulas Patocka 	}
867fd2ed4d2SMikulas Patocka 
868fd2ed4d2SMikulas Patocka 	idx_end = idx_start + idx_len;
869fd2ed4d2SMikulas Patocka 	if (idx_end < idx_start ||
870fd2ed4d2SMikulas Patocka 	    idx_end > s->n_entries)
871fd2ed4d2SMikulas Patocka 		idx_end = s->n_entries;
872fd2ed4d2SMikulas Patocka 
873fd2ed4d2SMikulas Patocka 	if (idx_start > idx_end)
874fd2ed4d2SMikulas Patocka 		idx_start = idx_end;
875fd2ed4d2SMikulas Patocka 
876fd2ed4d2SMikulas Patocka 	step = s->step;
877fd2ed4d2SMikulas Patocka 	start = s->start + (step * idx_start);
878fd2ed4d2SMikulas Patocka 
879fd2ed4d2SMikulas Patocka 	for (x = idx_start; x < idx_end; x++, start = end) {
880fd2ed4d2SMikulas Patocka 		shared = &s->stat_shared[x];
881fd2ed4d2SMikulas Patocka 		end = start + step;
882fd2ed4d2SMikulas Patocka 		if (unlikely(end > s->end))
883fd2ed4d2SMikulas Patocka 			end = s->end;
884fd2ed4d2SMikulas Patocka 
885fd2ed4d2SMikulas Patocka 		__dm_stat_init_temporary_percpu_totals(shared, s, x);
886fd2ed4d2SMikulas Patocka 
887dfcfac3eSMikulas Patocka 		DMEMIT("%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu",
888fd2ed4d2SMikulas Patocka 		       (unsigned long long)start,
889fd2ed4d2SMikulas Patocka 		       (unsigned long long)step,
890fd2ed4d2SMikulas Patocka 		       shared->tmp.ios[READ],
891fd2ed4d2SMikulas Patocka 		       shared->tmp.merges[READ],
892fd2ed4d2SMikulas Patocka 		       shared->tmp.sectors[READ],
893c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]),
894fd2ed4d2SMikulas Patocka 		       shared->tmp.ios[WRITE],
895fd2ed4d2SMikulas Patocka 		       shared->tmp.merges[WRITE],
896fd2ed4d2SMikulas Patocka 		       shared->tmp.sectors[WRITE],
897c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]),
898fd2ed4d2SMikulas Patocka 		       dm_stat_in_flight(shared),
899c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total),
900c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.time_in_queue),
901c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]),
902c96aec34SMikulas Patocka 		       dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE]));
903dfcfac3eSMikulas Patocka 		if (s->n_histogram_entries) {
90486a3238cSHeinz Mauelshagen 			unsigned int i;
9050ef0b471SHeinz Mauelshagen 
9062d0f25cbSHeinz Mauelshagen 			for (i = 0; i < s->n_histogram_entries + 1; i++)
907dfcfac3eSMikulas Patocka 				DMEMIT("%s%llu", !i ? " " : ":", shared->tmp.histogram[i]);
908dfcfac3eSMikulas Patocka 		}
909dfcfac3eSMikulas Patocka 		DMEMIT("\n");
910fd2ed4d2SMikulas Patocka 
911fd2ed4d2SMikulas Patocka 		if (unlikely(sz + 1 >= maxlen))
912fd2ed4d2SMikulas Patocka 			goto buffer_overflow;
913bfe2b014SMikulas Patocka 
914bfe2b014SMikulas Patocka 		cond_resched();
915fd2ed4d2SMikulas Patocka 	}
916fd2ed4d2SMikulas Patocka 
917fd2ed4d2SMikulas Patocka 	if (clear)
918fd2ed4d2SMikulas Patocka 		__dm_stat_clear(s, idx_start, idx_end, false);
919fd2ed4d2SMikulas Patocka 
920fd2ed4d2SMikulas Patocka buffer_overflow:
921fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
922fd2ed4d2SMikulas Patocka 
923fd2ed4d2SMikulas Patocka 	return 1;
924fd2ed4d2SMikulas Patocka }
925fd2ed4d2SMikulas Patocka 
dm_stats_set_aux(struct dm_stats * stats,int id,const char * aux_data)926fd2ed4d2SMikulas Patocka static int dm_stats_set_aux(struct dm_stats *stats, int id, const char *aux_data)
927fd2ed4d2SMikulas Patocka {
928fd2ed4d2SMikulas Patocka 	struct dm_stat *s;
929fd2ed4d2SMikulas Patocka 	const char *new_aux_data;
930fd2ed4d2SMikulas Patocka 
931fd2ed4d2SMikulas Patocka 	mutex_lock(&stats->mutex);
932fd2ed4d2SMikulas Patocka 
933fd2ed4d2SMikulas Patocka 	s = __dm_stats_find(stats, id);
934fd2ed4d2SMikulas Patocka 	if (!s) {
935fd2ed4d2SMikulas Patocka 		mutex_unlock(&stats->mutex);
936fd2ed4d2SMikulas Patocka 		return -ENOENT;
937fd2ed4d2SMikulas Patocka 	}
938fd2ed4d2SMikulas Patocka 
939fd2ed4d2SMikulas Patocka 	new_aux_data = kstrdup(aux_data, GFP_KERNEL);
940fd2ed4d2SMikulas Patocka 	if (!new_aux_data) {
941fd2ed4d2SMikulas Patocka 		mutex_unlock(&stats->mutex);
942fd2ed4d2SMikulas Patocka 		return -ENOMEM;
943fd2ed4d2SMikulas Patocka 	}
944fd2ed4d2SMikulas Patocka 
945fd2ed4d2SMikulas Patocka 	kfree(s->aux_data);
946fd2ed4d2SMikulas Patocka 	s->aux_data = new_aux_data;
947fd2ed4d2SMikulas Patocka 
948fd2ed4d2SMikulas Patocka 	mutex_unlock(&stats->mutex);
949fd2ed4d2SMikulas Patocka 
950fd2ed4d2SMikulas Patocka 	return 0;
951fd2ed4d2SMikulas Patocka }
952fd2ed4d2SMikulas Patocka 
parse_histogram(const char * h,unsigned int * n_histogram_entries,unsigned long long ** histogram_boundaries)95386a3238cSHeinz Mauelshagen static int parse_histogram(const char *h, unsigned int *n_histogram_entries,
954dfcfac3eSMikulas Patocka 			   unsigned long long **histogram_boundaries)
955dfcfac3eSMikulas Patocka {
956dfcfac3eSMikulas Patocka 	const char *q;
95786a3238cSHeinz Mauelshagen 	unsigned int n;
958dfcfac3eSMikulas Patocka 	unsigned long long last;
959dfcfac3eSMikulas Patocka 
960dfcfac3eSMikulas Patocka 	*n_histogram_entries = 1;
961dfcfac3eSMikulas Patocka 	for (q = h; *q; q++)
962dfcfac3eSMikulas Patocka 		if (*q == ',')
963dfcfac3eSMikulas Patocka 			(*n_histogram_entries)++;
964dfcfac3eSMikulas Patocka 
9656da2ec56SKees Cook 	*histogram_boundaries = kmalloc_array(*n_histogram_entries,
9666da2ec56SKees Cook 					      sizeof(unsigned long long),
9676da2ec56SKees Cook 					      GFP_KERNEL);
968dfcfac3eSMikulas Patocka 	if (!*histogram_boundaries)
969dfcfac3eSMikulas Patocka 		return -ENOMEM;
970dfcfac3eSMikulas Patocka 
971dfcfac3eSMikulas Patocka 	n = 0;
972dfcfac3eSMikulas Patocka 	last = 0;
973dfcfac3eSMikulas Patocka 	while (1) {
974dfcfac3eSMikulas Patocka 		unsigned long long hi;
975dfcfac3eSMikulas Patocka 		int s;
976dfcfac3eSMikulas Patocka 		char ch;
9770ef0b471SHeinz Mauelshagen 
978dfcfac3eSMikulas Patocka 		s = sscanf(h, "%llu%c", &hi, &ch);
979dfcfac3eSMikulas Patocka 		if (!s || (s == 2 && ch != ','))
980dfcfac3eSMikulas Patocka 			return -EINVAL;
981dfcfac3eSMikulas Patocka 		if (hi <= last)
982dfcfac3eSMikulas Patocka 			return -EINVAL;
983dfcfac3eSMikulas Patocka 		last = hi;
984dfcfac3eSMikulas Patocka 		(*histogram_boundaries)[n] = hi;
985dfcfac3eSMikulas Patocka 		if (s == 1)
986dfcfac3eSMikulas Patocka 			return 0;
987dfcfac3eSMikulas Patocka 		h = strchr(h, ',') + 1;
988dfcfac3eSMikulas Patocka 		n++;
989dfcfac3eSMikulas Patocka 	}
990dfcfac3eSMikulas Patocka }
991dfcfac3eSMikulas Patocka 
message_stats_create(struct mapped_device * md,unsigned int argc,char ** argv,char * result,unsigned int maxlen)992fd2ed4d2SMikulas Patocka static int message_stats_create(struct mapped_device *md,
99386a3238cSHeinz Mauelshagen 				unsigned int argc, char **argv,
99486a3238cSHeinz Mauelshagen 				char *result, unsigned int maxlen)
995fd2ed4d2SMikulas Patocka {
996dfcfac3eSMikulas Patocka 	int r;
997fd2ed4d2SMikulas Patocka 	int id;
998fd2ed4d2SMikulas Patocka 	char dummy;
999fd2ed4d2SMikulas Patocka 	unsigned long long start, end, len, step;
100086a3238cSHeinz Mauelshagen 	unsigned int divisor;
1001fd2ed4d2SMikulas Patocka 	const char *program_id, *aux_data;
100286a3238cSHeinz Mauelshagen 	unsigned int stat_flags = 0;
100386a3238cSHeinz Mauelshagen 	unsigned int n_histogram_entries = 0;
1004dfcfac3eSMikulas Patocka 	unsigned long long *histogram_boundaries = NULL;
1005c96aec34SMikulas Patocka 	struct dm_arg_set as, as_backup;
1006c96aec34SMikulas Patocka 	const char *a;
100786a3238cSHeinz Mauelshagen 	unsigned int feature_args;
1008fd2ed4d2SMikulas Patocka 
1009fd2ed4d2SMikulas Patocka 	/*
1010fd2ed4d2SMikulas Patocka 	 * Input format:
1011c96aec34SMikulas Patocka 	 *   <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
1012fd2ed4d2SMikulas Patocka 	 */
1013fd2ed4d2SMikulas Patocka 
1014c96aec34SMikulas Patocka 	if (argc < 3)
1015dfcfac3eSMikulas Patocka 		goto ret_einval;
1016fd2ed4d2SMikulas Patocka 
1017c96aec34SMikulas Patocka 	as.argc = argc;
1018c96aec34SMikulas Patocka 	as.argv = argv;
1019c96aec34SMikulas Patocka 	dm_consume_args(&as, 1);
1020c96aec34SMikulas Patocka 
1021c96aec34SMikulas Patocka 	a = dm_shift_arg(&as);
1022c96aec34SMikulas Patocka 	if (!strcmp(a, "-")) {
1023fd2ed4d2SMikulas Patocka 		start = 0;
1024fd2ed4d2SMikulas Patocka 		len = dm_get_size(md);
1025fd2ed4d2SMikulas Patocka 		if (!len)
1026fd2ed4d2SMikulas Patocka 			len = 1;
1027c96aec34SMikulas Patocka 	} else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
1028fd2ed4d2SMikulas Patocka 		   start != (sector_t)start || len != (sector_t)len)
1029dfcfac3eSMikulas Patocka 		goto ret_einval;
1030fd2ed4d2SMikulas Patocka 
1031fd2ed4d2SMikulas Patocka 	end = start + len;
1032fd2ed4d2SMikulas Patocka 	if (start >= end)
1033dfcfac3eSMikulas Patocka 		goto ret_einval;
1034fd2ed4d2SMikulas Patocka 
1035c96aec34SMikulas Patocka 	a = dm_shift_arg(&as);
1036c96aec34SMikulas Patocka 	if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) {
1037dd4c1b7dSMikulas Patocka 		if (!divisor)
1038dd4c1b7dSMikulas Patocka 			return -EINVAL;
1039fd2ed4d2SMikulas Patocka 		step = end - start;
1040fd2ed4d2SMikulas Patocka 		if (do_div(step, divisor))
1041fd2ed4d2SMikulas Patocka 			step++;
1042fd2ed4d2SMikulas Patocka 		if (!step)
1043fd2ed4d2SMikulas Patocka 			step = 1;
1044c96aec34SMikulas Patocka 	} else if (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
1045fd2ed4d2SMikulas Patocka 		   step != (sector_t)step || !step)
1046dfcfac3eSMikulas Patocka 		goto ret_einval;
1047fd2ed4d2SMikulas Patocka 
1048c96aec34SMikulas Patocka 	as_backup = as;
1049c96aec34SMikulas Patocka 	a = dm_shift_arg(&as);
1050c96aec34SMikulas Patocka 	if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) {
1051c96aec34SMikulas Patocka 		while (feature_args--) {
1052c96aec34SMikulas Patocka 			a = dm_shift_arg(&as);
1053c96aec34SMikulas Patocka 			if (!a)
1054dfcfac3eSMikulas Patocka 				goto ret_einval;
1055c96aec34SMikulas Patocka 			if (!strcasecmp(a, "precise_timestamps"))
1056c96aec34SMikulas Patocka 				stat_flags |= STAT_PRECISE_TIMESTAMPS;
1057dfcfac3eSMikulas Patocka 			else if (!strncasecmp(a, "histogram:", 10)) {
1058dfcfac3eSMikulas Patocka 				if (n_histogram_entries)
1059dfcfac3eSMikulas Patocka 					goto ret_einval;
1060d715fa23SHeinz Mauelshagen 				r = parse_histogram(a + 10, &n_histogram_entries, &histogram_boundaries);
1061d715fa23SHeinz Mauelshagen 				if (r)
1062dfcfac3eSMikulas Patocka 					goto ret;
1063dfcfac3eSMikulas Patocka 			} else
1064dfcfac3eSMikulas Patocka 				goto ret_einval;
1065c96aec34SMikulas Patocka 		}
1066c96aec34SMikulas Patocka 	} else {
1067c96aec34SMikulas Patocka 		as = as_backup;
1068c96aec34SMikulas Patocka 	}
1069c96aec34SMikulas Patocka 
1070fd2ed4d2SMikulas Patocka 	program_id = "-";
1071fd2ed4d2SMikulas Patocka 	aux_data = "-";
1072fd2ed4d2SMikulas Patocka 
1073c96aec34SMikulas Patocka 	a = dm_shift_arg(&as);
1074c96aec34SMikulas Patocka 	if (a)
1075c96aec34SMikulas Patocka 		program_id = a;
1076fd2ed4d2SMikulas Patocka 
1077c96aec34SMikulas Patocka 	a = dm_shift_arg(&as);
1078c96aec34SMikulas Patocka 	if (a)
1079c96aec34SMikulas Patocka 		aux_data = a;
1080c96aec34SMikulas Patocka 
1081c96aec34SMikulas Patocka 	if (as.argc)
1082dfcfac3eSMikulas Patocka 		goto ret_einval;
1083fd2ed4d2SMikulas Patocka 
1084fd2ed4d2SMikulas Patocka 	/*
1085fd2ed4d2SMikulas Patocka 	 * If a buffer overflow happens after we created the region,
1086fd2ed4d2SMikulas Patocka 	 * it's too late (the userspace would retry with a larger
1087fd2ed4d2SMikulas Patocka 	 * buffer, but the region id that caused the overflow is already
1088fd2ed4d2SMikulas Patocka 	 * leaked).  So we must detect buffer overflow in advance.
1089fd2ed4d2SMikulas Patocka 	 */
1090fd2ed4d2SMikulas Patocka 	snprintf(result, maxlen, "%d", INT_MAX);
1091dfcfac3eSMikulas Patocka 	if (dm_message_test_buffer_overflow(result, maxlen)) {
1092dfcfac3eSMikulas Patocka 		r = 1;
1093dfcfac3eSMikulas Patocka 		goto ret;
1094dfcfac3eSMikulas Patocka 	}
1095fd2ed4d2SMikulas Patocka 
1096dfcfac3eSMikulas Patocka 	id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags,
1097dfcfac3eSMikulas Patocka 			     n_histogram_entries, histogram_boundaries, program_id, aux_data,
1098ffcc3936SMike Snitzer 			     dm_internal_suspend_fast, dm_internal_resume_fast, md);
1099dfcfac3eSMikulas Patocka 	if (id < 0) {
1100dfcfac3eSMikulas Patocka 		r = id;
1101dfcfac3eSMikulas Patocka 		goto ret;
1102dfcfac3eSMikulas Patocka 	}
1103fd2ed4d2SMikulas Patocka 
1104fd2ed4d2SMikulas Patocka 	snprintf(result, maxlen, "%d", id);
1105fd2ed4d2SMikulas Patocka 
1106dfcfac3eSMikulas Patocka 	r = 1;
1107dfcfac3eSMikulas Patocka 	goto ret;
1108dfcfac3eSMikulas Patocka 
1109dfcfac3eSMikulas Patocka ret_einval:
1110dfcfac3eSMikulas Patocka 	r = -EINVAL;
1111dfcfac3eSMikulas Patocka ret:
1112dfcfac3eSMikulas Patocka 	kfree(histogram_boundaries);
1113dfcfac3eSMikulas Patocka 	return r;
1114fd2ed4d2SMikulas Patocka }
1115fd2ed4d2SMikulas Patocka 
message_stats_delete(struct mapped_device * md,unsigned int argc,char ** argv)1116fd2ed4d2SMikulas Patocka static int message_stats_delete(struct mapped_device *md,
111786a3238cSHeinz Mauelshagen 				unsigned int argc, char **argv)
1118fd2ed4d2SMikulas Patocka {
1119fd2ed4d2SMikulas Patocka 	int id;
1120fd2ed4d2SMikulas Patocka 	char dummy;
1121fd2ed4d2SMikulas Patocka 
1122fd2ed4d2SMikulas Patocka 	if (argc != 2)
1123fd2ed4d2SMikulas Patocka 		return -EINVAL;
1124fd2ed4d2SMikulas Patocka 
1125fd2ed4d2SMikulas Patocka 	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
1126fd2ed4d2SMikulas Patocka 		return -EINVAL;
1127fd2ed4d2SMikulas Patocka 
1128fd2ed4d2SMikulas Patocka 	return dm_stats_delete(dm_get_stats(md), id);
1129fd2ed4d2SMikulas Patocka }
1130fd2ed4d2SMikulas Patocka 
message_stats_clear(struct mapped_device * md,unsigned int argc,char ** argv)1131fd2ed4d2SMikulas Patocka static int message_stats_clear(struct mapped_device *md,
113286a3238cSHeinz Mauelshagen 			       unsigned int argc, char **argv)
1133fd2ed4d2SMikulas Patocka {
1134fd2ed4d2SMikulas Patocka 	int id;
1135fd2ed4d2SMikulas Patocka 	char dummy;
1136fd2ed4d2SMikulas Patocka 
1137fd2ed4d2SMikulas Patocka 	if (argc != 2)
1138fd2ed4d2SMikulas Patocka 		return -EINVAL;
1139fd2ed4d2SMikulas Patocka 
1140fd2ed4d2SMikulas Patocka 	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
1141fd2ed4d2SMikulas Patocka 		return -EINVAL;
1142fd2ed4d2SMikulas Patocka 
1143fd2ed4d2SMikulas Patocka 	return dm_stats_clear(dm_get_stats(md), id);
1144fd2ed4d2SMikulas Patocka }
1145fd2ed4d2SMikulas Patocka 
message_stats_list(struct mapped_device * md,unsigned int argc,char ** argv,char * result,unsigned int maxlen)1146fd2ed4d2SMikulas Patocka static int message_stats_list(struct mapped_device *md,
114786a3238cSHeinz Mauelshagen 			      unsigned int argc, char **argv,
114886a3238cSHeinz Mauelshagen 			      char *result, unsigned int maxlen)
1149fd2ed4d2SMikulas Patocka {
1150fd2ed4d2SMikulas Patocka 	int r;
1151fd2ed4d2SMikulas Patocka 	const char *program = NULL;
1152fd2ed4d2SMikulas Patocka 
1153fd2ed4d2SMikulas Patocka 	if (argc < 1 || argc > 2)
1154fd2ed4d2SMikulas Patocka 		return -EINVAL;
1155fd2ed4d2SMikulas Patocka 
1156fd2ed4d2SMikulas Patocka 	if (argc > 1) {
1157fd2ed4d2SMikulas Patocka 		program = kstrdup(argv[1], GFP_KERNEL);
1158fd2ed4d2SMikulas Patocka 		if (!program)
1159fd2ed4d2SMikulas Patocka 			return -ENOMEM;
1160fd2ed4d2SMikulas Patocka 	}
1161fd2ed4d2SMikulas Patocka 
1162fd2ed4d2SMikulas Patocka 	r = dm_stats_list(dm_get_stats(md), program, result, maxlen);
1163fd2ed4d2SMikulas Patocka 
1164fd2ed4d2SMikulas Patocka 	kfree(program);
1165fd2ed4d2SMikulas Patocka 
1166fd2ed4d2SMikulas Patocka 	return r;
1167fd2ed4d2SMikulas Patocka }
1168fd2ed4d2SMikulas Patocka 
message_stats_print(struct mapped_device * md,unsigned int argc,char ** argv,bool clear,char * result,unsigned int maxlen)1169fd2ed4d2SMikulas Patocka static int message_stats_print(struct mapped_device *md,
117086a3238cSHeinz Mauelshagen 			       unsigned int argc, char **argv, bool clear,
117186a3238cSHeinz Mauelshagen 			       char *result, unsigned int maxlen)
1172fd2ed4d2SMikulas Patocka {
1173fd2ed4d2SMikulas Patocka 	int id;
1174fd2ed4d2SMikulas Patocka 	char dummy;
1175fd2ed4d2SMikulas Patocka 	unsigned long idx_start = 0, idx_len = ULONG_MAX;
1176fd2ed4d2SMikulas Patocka 
1177fd2ed4d2SMikulas Patocka 	if (argc != 2 && argc != 4)
1178fd2ed4d2SMikulas Patocka 		return -EINVAL;
1179fd2ed4d2SMikulas Patocka 
1180fd2ed4d2SMikulas Patocka 	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
1181fd2ed4d2SMikulas Patocka 		return -EINVAL;
1182fd2ed4d2SMikulas Patocka 
1183fd2ed4d2SMikulas Patocka 	if (argc > 3) {
1184fd2ed4d2SMikulas Patocka 		if (strcmp(argv[2], "-") &&
1185fd2ed4d2SMikulas Patocka 		    sscanf(argv[2], "%lu%c", &idx_start, &dummy) != 1)
1186fd2ed4d2SMikulas Patocka 			return -EINVAL;
1187fd2ed4d2SMikulas Patocka 		if (strcmp(argv[3], "-") &&
1188fd2ed4d2SMikulas Patocka 		    sscanf(argv[3], "%lu%c", &idx_len, &dummy) != 1)
1189fd2ed4d2SMikulas Patocka 			return -EINVAL;
1190fd2ed4d2SMikulas Patocka 	}
1191fd2ed4d2SMikulas Patocka 
1192fd2ed4d2SMikulas Patocka 	return dm_stats_print(dm_get_stats(md), id, idx_start, idx_len, clear,
1193fd2ed4d2SMikulas Patocka 			      result, maxlen);
1194fd2ed4d2SMikulas Patocka }
1195fd2ed4d2SMikulas Patocka 
message_stats_set_aux(struct mapped_device * md,unsigned int argc,char ** argv)1196fd2ed4d2SMikulas Patocka static int message_stats_set_aux(struct mapped_device *md,
119786a3238cSHeinz Mauelshagen 				 unsigned int argc, char **argv)
1198fd2ed4d2SMikulas Patocka {
1199fd2ed4d2SMikulas Patocka 	int id;
1200fd2ed4d2SMikulas Patocka 	char dummy;
1201fd2ed4d2SMikulas Patocka 
1202fd2ed4d2SMikulas Patocka 	if (argc != 3)
1203fd2ed4d2SMikulas Patocka 		return -EINVAL;
1204fd2ed4d2SMikulas Patocka 
1205fd2ed4d2SMikulas Patocka 	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
1206fd2ed4d2SMikulas Patocka 		return -EINVAL;
1207fd2ed4d2SMikulas Patocka 
1208fd2ed4d2SMikulas Patocka 	return dm_stats_set_aux(dm_get_stats(md), id, argv[2]);
1209fd2ed4d2SMikulas Patocka }
1210fd2ed4d2SMikulas Patocka 
dm_stats_message(struct mapped_device * md,unsigned int argc,char ** argv,char * result,unsigned int maxlen)121186a3238cSHeinz Mauelshagen int dm_stats_message(struct mapped_device *md, unsigned int argc, char **argv,
121286a3238cSHeinz Mauelshagen 		     char *result, unsigned int maxlen)
1213fd2ed4d2SMikulas Patocka {
1214fd2ed4d2SMikulas Patocka 	int r;
1215fd2ed4d2SMikulas Patocka 
1216fd2ed4d2SMikulas Patocka 	/* All messages here must start with '@' */
1217fd2ed4d2SMikulas Patocka 	if (!strcasecmp(argv[0], "@stats_create"))
1218fd2ed4d2SMikulas Patocka 		r = message_stats_create(md, argc, argv, result, maxlen);
1219fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_delete"))
1220fd2ed4d2SMikulas Patocka 		r = message_stats_delete(md, argc, argv);
1221fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_clear"))
1222fd2ed4d2SMikulas Patocka 		r = message_stats_clear(md, argc, argv);
1223fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_list"))
1224fd2ed4d2SMikulas Patocka 		r = message_stats_list(md, argc, argv, result, maxlen);
1225fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_print"))
1226fd2ed4d2SMikulas Patocka 		r = message_stats_print(md, argc, argv, false, result, maxlen);
1227fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_print_clear"))
1228fd2ed4d2SMikulas Patocka 		r = message_stats_print(md, argc, argv, true, result, maxlen);
1229fd2ed4d2SMikulas Patocka 	else if (!strcasecmp(argv[0], "@stats_set_aux"))
1230fd2ed4d2SMikulas Patocka 		r = message_stats_set_aux(md, argc, argv);
1231fd2ed4d2SMikulas Patocka 	else
1232fd2ed4d2SMikulas Patocka 		return 2; /* this wasn't a stats message */
1233fd2ed4d2SMikulas Patocka 
1234fd2ed4d2SMikulas Patocka 	if (r == -EINVAL)
123543e6c111SMikulas Patocka 		DMCRIT("Invalid parameters for message %s", argv[0]);
1236fd2ed4d2SMikulas Patocka 
1237fd2ed4d2SMikulas Patocka 	return r;
1238fd2ed4d2SMikulas Patocka }
1239fd2ed4d2SMikulas Patocka 
dm_statistics_init(void)1240fd2ed4d2SMikulas Patocka int __init dm_statistics_init(void)
1241fd2ed4d2SMikulas Patocka {
124276f5bee5SMikulas Patocka 	shared_memory_amount = 0;
1243fd2ed4d2SMikulas Patocka 	dm_stat_need_rcu_barrier = 0;
1244fd2ed4d2SMikulas Patocka 	return 0;
1245fd2ed4d2SMikulas Patocka }
1246fd2ed4d2SMikulas Patocka 
dm_statistics_exit(void)1247fd2ed4d2SMikulas Patocka void dm_statistics_exit(void)
1248fd2ed4d2SMikulas Patocka {
1249fd2ed4d2SMikulas Patocka 	if (dm_stat_need_rcu_barrier)
1250fd2ed4d2SMikulas Patocka 		rcu_barrier();
1251fd2ed4d2SMikulas Patocka 	if (WARN_ON(shared_memory_amount))
1252fd2ed4d2SMikulas Patocka 		DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount);
1253fd2ed4d2SMikulas Patocka }
1254fd2ed4d2SMikulas Patocka 
12556a808034SHeinz Mauelshagen module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, 0444);
1256fd2ed4d2SMikulas Patocka MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics");
1257