xref: /openbmc/linux/fs/ceph/metric.c (revision 35f752be)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/ceph/ceph_debug.h>
3 
4 #include <linux/types.h>
5 #include <linux/percpu_counter.h>
6 #include <linux/math64.h>
7 
8 #include "metric.h"
9 #include "mds_client.h"
10 
11 static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
12 				   struct ceph_mds_session *s)
13 {
14 	struct ceph_metric_head *head;
15 	struct ceph_metric_cap *cap;
16 	struct ceph_metric_read_latency *read;
17 	struct ceph_metric_write_latency *write;
18 	struct ceph_metric_metadata_latency *meta;
19 	struct ceph_metric_dlease *dlease;
20 	struct ceph_opened_files *files;
21 	struct ceph_pinned_icaps *icaps;
22 	struct ceph_opened_inodes *inodes;
23 	struct ceph_client_metric *m = &mdsc->metric;
24 	u64 nr_caps = atomic64_read(&m->total_caps);
25 	struct ceph_msg *msg;
26 	struct timespec64 ts;
27 	s64 sum;
28 	s32 items = 0;
29 	s32 len;
30 
31 	len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
32 	      + sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
33 	      + sizeof(*icaps) + sizeof(*inodes);
34 
35 	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
36 	if (!msg) {
37 		pr_err("send metrics to mds%d, failed to allocate message\n",
38 		       s->s_mds);
39 		return false;
40 	}
41 
42 	head = msg->front.iov_base;
43 
44 	/* encode the cap metric */
45 	cap = (struct ceph_metric_cap *)(head + 1);
46 	cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
47 	cap->ver = 1;
48 	cap->compat = 1;
49 	cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
50 	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
51 	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
52 	cap->total = cpu_to_le64(nr_caps);
53 	items++;
54 
55 	/* encode the read latency metric */
56 	read = (struct ceph_metric_read_latency *)(cap + 1);
57 	read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
58 	read->ver = 1;
59 	read->compat = 1;
60 	read->data_len = cpu_to_le32(sizeof(*read) - 10);
61 	sum = m->read_latency_sum;
62 	jiffies_to_timespec64(sum, &ts);
63 	read->sec = cpu_to_le32(ts.tv_sec);
64 	read->nsec = cpu_to_le32(ts.tv_nsec);
65 	items++;
66 
67 	/* encode the write latency metric */
68 	write = (struct ceph_metric_write_latency *)(read + 1);
69 	write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
70 	write->ver = 1;
71 	write->compat = 1;
72 	write->data_len = cpu_to_le32(sizeof(*write) - 10);
73 	sum = m->write_latency_sum;
74 	jiffies_to_timespec64(sum, &ts);
75 	write->sec = cpu_to_le32(ts.tv_sec);
76 	write->nsec = cpu_to_le32(ts.tv_nsec);
77 	items++;
78 
79 	/* encode the metadata latency metric */
80 	meta = (struct ceph_metric_metadata_latency *)(write + 1);
81 	meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
82 	meta->ver = 1;
83 	meta->compat = 1;
84 	meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
85 	sum = m->metadata_latency_sum;
86 	jiffies_to_timespec64(sum, &ts);
87 	meta->sec = cpu_to_le32(ts.tv_sec);
88 	meta->nsec = cpu_to_le32(ts.tv_nsec);
89 	items++;
90 
91 	/* encode the dentry lease metric */
92 	dlease = (struct ceph_metric_dlease *)(meta + 1);
93 	dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
94 	dlease->ver = 1;
95 	dlease->compat = 1;
96 	dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
97 	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
98 	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
99 	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
100 	items++;
101 
102 	sum = percpu_counter_sum(&m->total_inodes);
103 
104 	/* encode the opened files metric */
105 	files = (struct ceph_opened_files *)(dlease + 1);
106 	files->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
107 	files->ver = 1;
108 	files->compat = 1;
109 	files->data_len = cpu_to_le32(sizeof(*files) - 10);
110 	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
111 	files->total = cpu_to_le64(sum);
112 	items++;
113 
114 	/* encode the pinned icaps metric */
115 	icaps = (struct ceph_pinned_icaps *)(files + 1);
116 	icaps->type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
117 	icaps->ver = 1;
118 	icaps->compat = 1;
119 	icaps->data_len = cpu_to_le32(sizeof(*icaps) - 10);
120 	icaps->pinned_icaps = cpu_to_le64(nr_caps);
121 	icaps->total = cpu_to_le64(sum);
122 	items++;
123 
124 	/* encode the opened inodes metric */
125 	inodes = (struct ceph_opened_inodes *)(icaps + 1);
126 	inodes->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
127 	inodes->ver = 1;
128 	inodes->compat = 1;
129 	inodes->data_len = cpu_to_le32(sizeof(*inodes) - 10);
130 	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
131 	inodes->total = cpu_to_le64(sum);
132 	items++;
133 
134 	put_unaligned_le32(items, &head->num);
135 	msg->front.iov_len = len;
136 	msg->hdr.version = cpu_to_le16(1);
137 	msg->hdr.compat_version = cpu_to_le16(1);
138 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
139 	dout("client%llu send metrics to mds%d\n",
140 	     ceph_client_gid(mdsc->fsc->client), s->s_mds);
141 	ceph_con_send(&s->s_con, msg);
142 
143 	return true;
144 }
145 
146 
147 static void metric_get_session(struct ceph_mds_client *mdsc)
148 {
149 	struct ceph_mds_session *s;
150 	int i;
151 
152 	mutex_lock(&mdsc->mutex);
153 	for (i = 0; i < mdsc->max_sessions; i++) {
154 		s = __ceph_lookup_mds_session(mdsc, i);
155 		if (!s)
156 			continue;
157 
158 		/*
159 		 * Skip it if MDS doesn't support the metric collection,
160 		 * or the MDS will close the session's socket connection
161 		 * directly when it get this message.
162 		 */
163 		if (check_session_state(s) &&
164 		    test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) {
165 			mdsc->metric.session = s;
166 			break;
167 		}
168 
169 		ceph_put_mds_session(s);
170 	}
171 	mutex_unlock(&mdsc->mutex);
172 }
173 
174 static void metric_delayed_work(struct work_struct *work)
175 {
176 	struct ceph_client_metric *m =
177 		container_of(work, struct ceph_client_metric, delayed_work.work);
178 	struct ceph_mds_client *mdsc =
179 		container_of(m, struct ceph_mds_client, metric);
180 
181 	if (mdsc->stopping)
182 		return;
183 
184 	if (!m->session || !check_session_state(m->session)) {
185 		if (m->session) {
186 			ceph_put_mds_session(m->session);
187 			m->session = NULL;
188 		}
189 		metric_get_session(mdsc);
190 	}
191 	if (m->session) {
192 		ceph_mdsc_send_metrics(mdsc, m->session);
193 		metric_schedule_delayed(m);
194 	}
195 }
196 
197 int ceph_metric_init(struct ceph_client_metric *m)
198 {
199 	int ret;
200 
201 	if (!m)
202 		return -EINVAL;
203 
204 	atomic64_set(&m->total_dentries, 0);
205 	ret = percpu_counter_init(&m->d_lease_hit, 0, GFP_KERNEL);
206 	if (ret)
207 		return ret;
208 
209 	ret = percpu_counter_init(&m->d_lease_mis, 0, GFP_KERNEL);
210 	if (ret)
211 		goto err_d_lease_mis;
212 
213 	atomic64_set(&m->total_caps, 0);
214 	ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL);
215 	if (ret)
216 		goto err_i_caps_hit;
217 
218 	ret = percpu_counter_init(&m->i_caps_mis, 0, GFP_KERNEL);
219 	if (ret)
220 		goto err_i_caps_mis;
221 
222 	spin_lock_init(&m->read_metric_lock);
223 	m->read_latency_sq_sum = 0;
224 	m->read_latency_min = KTIME_MAX;
225 	m->read_latency_max = 0;
226 	m->total_reads = 0;
227 	m->read_latency_sum = 0;
228 
229 	spin_lock_init(&m->write_metric_lock);
230 	m->write_latency_sq_sum = 0;
231 	m->write_latency_min = KTIME_MAX;
232 	m->write_latency_max = 0;
233 	m->total_writes = 0;
234 	m->write_latency_sum = 0;
235 
236 	spin_lock_init(&m->metadata_metric_lock);
237 	m->metadata_latency_sq_sum = 0;
238 	m->metadata_latency_min = KTIME_MAX;
239 	m->metadata_latency_max = 0;
240 	m->total_metadatas = 0;
241 	m->metadata_latency_sum = 0;
242 
243 	atomic64_set(&m->opened_files, 0);
244 	ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
245 	if (ret)
246 		goto err_opened_inodes;
247 	ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL);
248 	if (ret)
249 		goto err_total_inodes;
250 
251 	m->session = NULL;
252 	INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
253 
254 	return 0;
255 
256 err_total_inodes:
257 	percpu_counter_destroy(&m->opened_inodes);
258 err_opened_inodes:
259 	percpu_counter_destroy(&m->i_caps_mis);
260 err_i_caps_mis:
261 	percpu_counter_destroy(&m->i_caps_hit);
262 err_i_caps_hit:
263 	percpu_counter_destroy(&m->d_lease_mis);
264 err_d_lease_mis:
265 	percpu_counter_destroy(&m->d_lease_hit);
266 
267 	return ret;
268 }
269 
270 void ceph_metric_destroy(struct ceph_client_metric *m)
271 {
272 	if (!m)
273 		return;
274 
275 	percpu_counter_destroy(&m->total_inodes);
276 	percpu_counter_destroy(&m->opened_inodes);
277 	percpu_counter_destroy(&m->i_caps_mis);
278 	percpu_counter_destroy(&m->i_caps_hit);
279 	percpu_counter_destroy(&m->d_lease_mis);
280 	percpu_counter_destroy(&m->d_lease_hit);
281 
282 	cancel_delayed_work_sync(&m->delayed_work);
283 
284 	if (m->session)
285 		ceph_put_mds_session(m->session);
286 }
287 
288 static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,
289 				    ktime_t *min, ktime_t *max,
290 				    ktime_t *sq_sump, ktime_t lat)
291 {
292 	ktime_t total, avg, sq, lsum;
293 
294 	total = ++(*totalp);
295 	lsum = (*lsump += lat);
296 
297 	if (unlikely(lat < *min))
298 		*min = lat;
299 	if (unlikely(lat > *max))
300 		*max = lat;
301 
302 	if (unlikely(total == 1))
303 		return;
304 
305 	/* the sq is (lat - old_avg) * (lat - new_avg) */
306 	avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1));
307 	sq = lat - avg;
308 	avg = DIV64_U64_ROUND_CLOSEST(lsum, total);
309 	sq = sq * (lat - avg);
310 	*sq_sump += sq;
311 }
312 
313 void ceph_update_read_metrics(struct ceph_client_metric *m,
314 			      ktime_t r_start, ktime_t r_end,
315 			      int rc)
316 {
317 	ktime_t lat = ktime_sub(r_end, r_start);
318 
319 	if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
320 		return;
321 
322 	spin_lock(&m->read_metric_lock);
323 	__update_latency(&m->total_reads, &m->read_latency_sum,
324 			 &m->read_latency_min, &m->read_latency_max,
325 			 &m->read_latency_sq_sum, lat);
326 	spin_unlock(&m->read_metric_lock);
327 }
328 
329 void ceph_update_write_metrics(struct ceph_client_metric *m,
330 			       ktime_t r_start, ktime_t r_end,
331 			       int rc)
332 {
333 	ktime_t lat = ktime_sub(r_end, r_start);
334 
335 	if (unlikely(rc && rc != -ETIMEDOUT))
336 		return;
337 
338 	spin_lock(&m->write_metric_lock);
339 	__update_latency(&m->total_writes, &m->write_latency_sum,
340 			 &m->write_latency_min, &m->write_latency_max,
341 			 &m->write_latency_sq_sum, lat);
342 	spin_unlock(&m->write_metric_lock);
343 }
344 
345 void ceph_update_metadata_metrics(struct ceph_client_metric *m,
346 				  ktime_t r_start, ktime_t r_end,
347 				  int rc)
348 {
349 	ktime_t lat = ktime_sub(r_end, r_start);
350 
351 	if (unlikely(rc && rc != -ENOENT))
352 		return;
353 
354 	spin_lock(&m->metadata_metric_lock);
355 	__update_latency(&m->total_metadatas, &m->metadata_latency_sum,
356 			 &m->metadata_latency_min, &m->metadata_latency_max,
357 			 &m->metadata_latency_sq_sum, lat);
358 	spin_unlock(&m->metadata_metric_lock);
359 }
360