xref: /openbmc/linux/fs/dlm/dir.c (revision c04fecb4d9f7753e0cbff7edd03ec68f8721cdce)
1e7fd4179SDavid Teigland /******************************************************************************
2e7fd4179SDavid Teigland *******************************************************************************
3e7fd4179SDavid Teigland **
4e7fd4179SDavid Teigland **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5e7fd4179SDavid Teigland **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
6e7fd4179SDavid Teigland **
7e7fd4179SDavid Teigland **  This copyrighted material is made available to anyone wishing to use,
8e7fd4179SDavid Teigland **  modify, copy, or redistribute it subject to the terms and conditions
9e7fd4179SDavid Teigland **  of the GNU General Public License v.2.
10e7fd4179SDavid Teigland **
11e7fd4179SDavid Teigland *******************************************************************************
12e7fd4179SDavid Teigland ******************************************************************************/
13e7fd4179SDavid Teigland 
14e7fd4179SDavid Teigland #include "dlm_internal.h"
15e7fd4179SDavid Teigland #include "lockspace.h"
16e7fd4179SDavid Teigland #include "member.h"
17e7fd4179SDavid Teigland #include "lowcomms.h"
18e7fd4179SDavid Teigland #include "rcom.h"
19e7fd4179SDavid Teigland #include "config.h"
20e7fd4179SDavid Teigland #include "memory.h"
21e7fd4179SDavid Teigland #include "recover.h"
22e7fd4179SDavid Teigland #include "util.h"
23e7fd4179SDavid Teigland #include "lock.h"
24e7fd4179SDavid Teigland #include "dir.h"
25e7fd4179SDavid Teigland 
26e7fd4179SDavid Teigland /*
27e7fd4179SDavid Teigland  * We use the upper 16 bits of the hash value to select the directory node.
28e7fd4179SDavid Teigland  * Low bits are used for distribution of rsb's among hash buckets on each node.
29e7fd4179SDavid Teigland  *
30e7fd4179SDavid Teigland  * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
31e7fd4179SDavid Teigland  * num_nodes to the hash value.  This value in the desired range is used as an
32e7fd4179SDavid Teigland  * offset into the sorted list of nodeid's to give the particular nodeid.
33e7fd4179SDavid Teigland  */
34e7fd4179SDavid Teigland 
35e7fd4179SDavid Teigland int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
36e7fd4179SDavid Teigland {
37*c04fecb4SDavid Teigland 	uint32_t node;
38e7fd4179SDavid Teigland 
39*c04fecb4SDavid Teigland 	if (ls->ls_num_nodes == 1)
40*c04fecb4SDavid Teigland 		return dlm_our_nodeid();
41*c04fecb4SDavid Teigland 	else {
42e7fd4179SDavid Teigland 		node = (hash >> 16) % ls->ls_total_weight;
43*c04fecb4SDavid Teigland 		return ls->ls_node_array[node];
44e7fd4179SDavid Teigland 	}
45e7fd4179SDavid Teigland }
46e7fd4179SDavid Teigland 
47e7fd4179SDavid Teigland int dlm_dir_nodeid(struct dlm_rsb *r)
48e7fd4179SDavid Teigland {
49*c04fecb4SDavid Teigland 	return r->res_dir_nodeid;
50e7fd4179SDavid Teigland }
51e7fd4179SDavid Teigland 
52*c04fecb4SDavid Teigland void dlm_recover_dir_nodeid(struct dlm_ls *ls)
53e7fd4179SDavid Teigland {
54*c04fecb4SDavid Teigland 	struct dlm_rsb *r;
55e7fd4179SDavid Teigland 
56*c04fecb4SDavid Teigland 	down_read(&ls->ls_root_sem);
57*c04fecb4SDavid Teigland 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
58*c04fecb4SDavid Teigland 		r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
59e7fd4179SDavid Teigland 	}
60*c04fecb4SDavid Teigland 	up_read(&ls->ls_root_sem);
61e7fd4179SDavid Teigland }
62e7fd4179SDavid Teigland 
63e7fd4179SDavid Teigland int dlm_recover_directory(struct dlm_ls *ls)
64e7fd4179SDavid Teigland {
65e7fd4179SDavid Teigland 	struct dlm_member *memb;
66e7fd4179SDavid Teigland 	char *b, *last_name = NULL;
67*c04fecb4SDavid Teigland 	int error = -ENOMEM, last_len, nodeid, result;
68e7fd4179SDavid Teigland 	uint16_t namelen;
69*c04fecb4SDavid Teigland 	unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
70e7fd4179SDavid Teigland 
71e7fd4179SDavid Teigland 	log_debug(ls, "dlm_recover_directory");
72e7fd4179SDavid Teigland 
73e7fd4179SDavid Teigland 	if (dlm_no_directory(ls))
74e7fd4179SDavid Teigland 		goto out_status;
75e7fd4179SDavid Teigland 
76573c24c4SDavid Teigland 	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
77e7fd4179SDavid Teigland 	if (!last_name)
78e7fd4179SDavid Teigland 		goto out;
79e7fd4179SDavid Teigland 
80e7fd4179SDavid Teigland 	list_for_each_entry(memb, &ls->ls_nodes, list) {
81*c04fecb4SDavid Teigland 		if (memb->nodeid == dlm_our_nodeid())
82*c04fecb4SDavid Teigland 			continue;
83*c04fecb4SDavid Teigland 
84e7fd4179SDavid Teigland 		memset(last_name, 0, DLM_RESNAME_MAXLEN);
85e7fd4179SDavid Teigland 		last_len = 0;
86e7fd4179SDavid Teigland 
87e7fd4179SDavid Teigland 		for (;;) {
88cd9df1aaSAl Viro 			int left;
89e7fd4179SDavid Teigland 			error = dlm_recovery_stopped(ls);
90e7fd4179SDavid Teigland 			if (error)
91e7fd4179SDavid Teigland 				goto out_free;
92e7fd4179SDavid Teigland 
93e7fd4179SDavid Teigland 			error = dlm_rcom_names(ls, memb->nodeid,
94e7fd4179SDavid Teigland 					       last_name, last_len);
95e7fd4179SDavid Teigland 			if (error)
96e7fd4179SDavid Teigland 				goto out_free;
97e7fd4179SDavid Teigland 
98*c04fecb4SDavid Teigland 			cond_resched();
99e7fd4179SDavid Teigland 
100e7fd4179SDavid Teigland 			/*
101e7fd4179SDavid Teigland 			 * pick namelen/name pairs out of received buffer
102e7fd4179SDavid Teigland 			 */
103e7fd4179SDavid Teigland 
1044007685cSAl Viro 			b = ls->ls_recover_buf->rc_buf;
105cd9df1aaSAl Viro 			left = ls->ls_recover_buf->rc_header.h_length;
106cd9df1aaSAl Viro 			left -= sizeof(struct dlm_rcom);
107e7fd4179SDavid Teigland 
108e7fd4179SDavid Teigland 			for (;;) {
109cd9df1aaSAl Viro 				__be16 v;
110cd9df1aaSAl Viro 
111cd9df1aaSAl Viro 				error = -EINVAL;
112cd9df1aaSAl Viro 				if (left < sizeof(__be16))
113cd9df1aaSAl Viro 					goto out_free;
114cd9df1aaSAl Viro 
115cd9df1aaSAl Viro 				memcpy(&v, b, sizeof(__be16));
116cd9df1aaSAl Viro 				namelen = be16_to_cpu(v);
117cd9df1aaSAl Viro 				b += sizeof(__be16);
118cd9df1aaSAl Viro 				left -= sizeof(__be16);
119e7fd4179SDavid Teigland 
120e7fd4179SDavid Teigland 				/* namelen of 0xFFFFF marks end of names for
121e7fd4179SDavid Teigland 				   this node; namelen of 0 marks end of the
122e7fd4179SDavid Teigland 				   buffer */
123e7fd4179SDavid Teigland 
124e7fd4179SDavid Teigland 				if (namelen == 0xFFFF)
125e7fd4179SDavid Teigland 					goto done;
126e7fd4179SDavid Teigland 				if (!namelen)
127e7fd4179SDavid Teigland 					break;
128e7fd4179SDavid Teigland 
129cd9df1aaSAl Viro 				if (namelen > left)
130cd9df1aaSAl Viro 					goto out_free;
131cd9df1aaSAl Viro 
132cd9df1aaSAl Viro 				if (namelen > DLM_RESNAME_MAXLEN)
133cd9df1aaSAl Viro 					goto out_free;
134cd9df1aaSAl Viro 
135*c04fecb4SDavid Teigland 				error = dlm_master_lookup(ls, memb->nodeid,
136*c04fecb4SDavid Teigland 							  b, namelen,
137*c04fecb4SDavid Teigland 							  DLM_LU_RECOVER_DIR,
138*c04fecb4SDavid Teigland 							  &nodeid, &result);
139*c04fecb4SDavid Teigland 				if (error) {
140*c04fecb4SDavid Teigland 					log_error(ls, "recover_dir lookup %d",
141*c04fecb4SDavid Teigland 						  error);
142e7fd4179SDavid Teigland 					goto out_free;
143*c04fecb4SDavid Teigland 				}
144e7fd4179SDavid Teigland 
145*c04fecb4SDavid Teigland 				/* The name was found in rsbtbl, but the
146*c04fecb4SDavid Teigland 				 * master nodeid is different from
147*c04fecb4SDavid Teigland 				 * memb->nodeid which says it is the master.
148*c04fecb4SDavid Teigland 				 * This should not happen. */
149*c04fecb4SDavid Teigland 
150*c04fecb4SDavid Teigland 				if (result == DLM_LU_MATCH &&
151*c04fecb4SDavid Teigland 				    nodeid != memb->nodeid) {
152*c04fecb4SDavid Teigland 					count_bad++;
153*c04fecb4SDavid Teigland 					log_error(ls, "recover_dir lookup %d "
154*c04fecb4SDavid Teigland 						  "nodeid %d memb %d bad %u",
155*c04fecb4SDavid Teigland 						  result, nodeid, memb->nodeid,
156*c04fecb4SDavid Teigland 						  count_bad);
157*c04fecb4SDavid Teigland 					print_hex_dump_bytes("dlm_recover_dir ",
158*c04fecb4SDavid Teigland 							     DUMP_PREFIX_NONE,
159*c04fecb4SDavid Teigland 							     b, namelen);
160*c04fecb4SDavid Teigland 				}
161*c04fecb4SDavid Teigland 
162*c04fecb4SDavid Teigland 				/* The name was found in rsbtbl, and the
163*c04fecb4SDavid Teigland 				 * master nodeid matches memb->nodeid. */
164*c04fecb4SDavid Teigland 
165*c04fecb4SDavid Teigland 				if (result == DLM_LU_MATCH &&
166*c04fecb4SDavid Teigland 				    nodeid == memb->nodeid) {
167*c04fecb4SDavid Teigland 					count_match++;
168*c04fecb4SDavid Teigland 				}
169*c04fecb4SDavid Teigland 
170*c04fecb4SDavid Teigland 				/* The name was not found in rsbtbl and was
171*c04fecb4SDavid Teigland 				 * added with memb->nodeid as the master. */
172*c04fecb4SDavid Teigland 
173*c04fecb4SDavid Teigland 				if (result == DLM_LU_ADD) {
174*c04fecb4SDavid Teigland 					count_add++;
175*c04fecb4SDavid Teigland 				}
176*c04fecb4SDavid Teigland 
177e7fd4179SDavid Teigland 				last_len = namelen;
178e7fd4179SDavid Teigland 				memcpy(last_name, b, namelen);
179e7fd4179SDavid Teigland 				b += namelen;
180cd9df1aaSAl Viro 				left -= namelen;
181e7fd4179SDavid Teigland 				count++;
182e7fd4179SDavid Teigland 			}
183e7fd4179SDavid Teigland 		}
184e7fd4179SDavid Teigland 	 done:
185e7fd4179SDavid Teigland 		;
186e7fd4179SDavid Teigland 	}
187e7fd4179SDavid Teigland 
188e7fd4179SDavid Teigland  out_status:
189e7fd4179SDavid Teigland 	error = 0;
190*c04fecb4SDavid Teigland 	dlm_set_recover_status(ls, DLM_RS_DIR);
191*c04fecb4SDavid Teigland 
192*c04fecb4SDavid Teigland 	log_debug(ls, "dlm_recover_directory %u in %u new",
193*c04fecb4SDavid Teigland 		  count, count_add);
194e7fd4179SDavid Teigland  out_free:
195e7fd4179SDavid Teigland 	kfree(last_name);
196e7fd4179SDavid Teigland  out:
197e7fd4179SDavid Teigland 	return error;
198e7fd4179SDavid Teigland }
199e7fd4179SDavid Teigland 
20085f0379aSDavid Teigland static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
20185f0379aSDavid Teigland {
20285f0379aSDavid Teigland 	struct dlm_rsb *r;
2037210cb7aSDavid Teigland 	uint32_t hash, bucket;
2047210cb7aSDavid Teigland 	int rv;
2057210cb7aSDavid Teigland 
2067210cb7aSDavid Teigland 	hash = jhash(name, len, 0);
2077210cb7aSDavid Teigland 	bucket = hash & (ls->ls_rsbtbl_size - 1);
2087210cb7aSDavid Teigland 
2097210cb7aSDavid Teigland 	spin_lock(&ls->ls_rsbtbl[bucket].lock);
210*c04fecb4SDavid Teigland 	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
2117210cb7aSDavid Teigland 	if (rv)
2127210cb7aSDavid Teigland 		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
213*c04fecb4SDavid Teigland 					 name, len, &r);
2147210cb7aSDavid Teigland 	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
2157210cb7aSDavid Teigland 
2167210cb7aSDavid Teigland 	if (!rv)
2177210cb7aSDavid Teigland 		return r;
21885f0379aSDavid Teigland 
21985f0379aSDavid Teigland 	down_read(&ls->ls_root_sem);
22085f0379aSDavid Teigland 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
22185f0379aSDavid Teigland 		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
22285f0379aSDavid Teigland 			up_read(&ls->ls_root_sem);
223*c04fecb4SDavid Teigland 			log_debug(ls, "find_rsb_root revert to root_list %s",
2247210cb7aSDavid Teigland 				  r->res_name);
22585f0379aSDavid Teigland 			return r;
22685f0379aSDavid Teigland 		}
22785f0379aSDavid Teigland 	}
22885f0379aSDavid Teigland 	up_read(&ls->ls_root_sem);
22985f0379aSDavid Teigland 	return NULL;
23085f0379aSDavid Teigland }
23185f0379aSDavid Teigland 
23285f0379aSDavid Teigland /* Find the rsb where we left off (or start again), then send rsb names
23385f0379aSDavid Teigland    for rsb's we're master of and whose directory node matches the requesting
23485f0379aSDavid Teigland    node.  inbuf is the rsb name last sent, inlen is the name's length */
235e7fd4179SDavid Teigland 
236e7fd4179SDavid Teigland void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
237e7fd4179SDavid Teigland  			   char *outbuf, int outlen, int nodeid)
238e7fd4179SDavid Teigland {
239e7fd4179SDavid Teigland 	struct list_head *list;
24085f0379aSDavid Teigland 	struct dlm_rsb *r;
24185f0379aSDavid Teigland 	int offset = 0, dir_nodeid;
242cd8e4679SHarvey Harrison 	__be16 be_namelen;
243e7fd4179SDavid Teigland 
244e7fd4179SDavid Teigland 	down_read(&ls->ls_root_sem);
24585f0379aSDavid Teigland 
24685f0379aSDavid Teigland 	if (inlen > 1) {
24785f0379aSDavid Teigland 		r = find_rsb_root(ls, inbuf, inlen);
24885f0379aSDavid Teigland 		if (!r) {
24985f0379aSDavid Teigland 			inbuf[inlen - 1] = '\0';
25085f0379aSDavid Teigland 			log_error(ls, "copy_master_names from %d start %d %s",
25185f0379aSDavid Teigland 				  nodeid, inlen, inbuf);
25285f0379aSDavid Teigland 			goto out;
25385f0379aSDavid Teigland 		}
25485f0379aSDavid Teigland 		list = r->res_root_list.next;
25585f0379aSDavid Teigland 	} else {
256e7fd4179SDavid Teigland 		list = ls->ls_root_list.next;
25785f0379aSDavid Teigland 	}
258e7fd4179SDavid Teigland 
259e7fd4179SDavid Teigland 	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
260e7fd4179SDavid Teigland 		r = list_entry(list, struct dlm_rsb, res_root_list);
261e7fd4179SDavid Teigland 		if (r->res_nodeid)
262e7fd4179SDavid Teigland 			continue;
263e7fd4179SDavid Teigland 
264e7fd4179SDavid Teigland 		dir_nodeid = dlm_dir_nodeid(r);
265e7fd4179SDavid Teigland 		if (dir_nodeid != nodeid)
266e7fd4179SDavid Teigland 			continue;
267e7fd4179SDavid Teigland 
268e7fd4179SDavid Teigland 		/*
269e7fd4179SDavid Teigland 		 * The block ends when we can't fit the following in the
270e7fd4179SDavid Teigland 		 * remaining buffer space:
271e7fd4179SDavid Teigland 		 * namelen (uint16_t) +
272e7fd4179SDavid Teigland 		 * name (r->res_length) +
273e7fd4179SDavid Teigland 		 * end-of-block record 0x0000 (uint16_t)
274e7fd4179SDavid Teigland 		 */
275e7fd4179SDavid Teigland 
276e7fd4179SDavid Teigland 		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
277e7fd4179SDavid Teigland 			/* Write end-of-block record */
278cd8e4679SHarvey Harrison 			be_namelen = cpu_to_be16(0);
279cd8e4679SHarvey Harrison 			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
280cd8e4679SHarvey Harrison 			offset += sizeof(__be16);
281*c04fecb4SDavid Teigland 			ls->ls_recover_dir_sent_msg++;
282e7fd4179SDavid Teigland 			goto out;
283e7fd4179SDavid Teigland 		}
284e7fd4179SDavid Teigland 
285e7fd4179SDavid Teigland 		be_namelen = cpu_to_be16(r->res_length);
286cd8e4679SHarvey Harrison 		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
287cd8e4679SHarvey Harrison 		offset += sizeof(__be16);
288e7fd4179SDavid Teigland 		memcpy(outbuf + offset, r->res_name, r->res_length);
289e7fd4179SDavid Teigland 		offset += r->res_length;
290*c04fecb4SDavid Teigland 		ls->ls_recover_dir_sent_res++;
291e7fd4179SDavid Teigland 	}
292e7fd4179SDavid Teigland 
293e7fd4179SDavid Teigland 	/*
294e7fd4179SDavid Teigland 	 * If we've reached the end of the list (and there's room) write a
295e7fd4179SDavid Teigland 	 * terminating record.
296e7fd4179SDavid Teigland 	 */
297e7fd4179SDavid Teigland 
298e7fd4179SDavid Teigland 	if ((list == &ls->ls_root_list) &&
299e7fd4179SDavid Teigland 	    (offset + sizeof(uint16_t) <= outlen)) {
300cd8e4679SHarvey Harrison 		be_namelen = cpu_to_be16(0xFFFF);
301cd8e4679SHarvey Harrison 		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
302cd8e4679SHarvey Harrison 		offset += sizeof(__be16);
303*c04fecb4SDavid Teigland 		ls->ls_recover_dir_sent_msg++;
304e7fd4179SDavid Teigland 	}
305e7fd4179SDavid Teigland  out:
306e7fd4179SDavid Teigland 	up_read(&ls->ls_root_sem);
307e7fd4179SDavid Teigland }
308e7fd4179SDavid Teigland 
309