xref: /openbmc/linux/fs/dlm/midcomms.c (revision b97d6790d03b763eca08847a9a5869a4291b9f9a)
12522fe45SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2e7fd4179SDavid Teigland /******************************************************************************
3e7fd4179SDavid Teigland *******************************************************************************
4e7fd4179SDavid Teigland **
5e7fd4179SDavid Teigland **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
6489d8e55SAlexander Aring **  Copyright (C) 2004-2021 Red Hat, Inc.  All rights reserved.
7e7fd4179SDavid Teigland **
8e7fd4179SDavid Teigland **
9e7fd4179SDavid Teigland *******************************************************************************
10e7fd4179SDavid Teigland ******************************************************************************/
11e7fd4179SDavid Teigland 
12e7fd4179SDavid Teigland /*
13e7fd4179SDavid Teigland  * midcomms.c
14e7fd4179SDavid Teigland  *
15489d8e55SAlexander Aring  * This is the appallingly named "mid-level" comms layer. It takes care about
16489d8e55SAlexander Aring  * deliver an on application layer "reliable" communication above the used
17489d8e55SAlexander Aring  * lowcomms transport layer.
18e7fd4179SDavid Teigland  *
19489d8e55SAlexander Aring  * How it works:
20e7fd4179SDavid Teigland  *
21489d8e55SAlexander Aring  * Each nodes keeps track of all send DLM messages in send_queue with a sequence
22489d8e55SAlexander Aring  * number. The receive will send an DLM_ACK message back for every DLM message
23489d8e55SAlexander Aring  * received at the other side. If a reconnect happens in lowcomms we will send
24489d8e55SAlexander Aring  * all unacknowledged dlm messages again. The receiving side might drop any already
25489d8e55SAlexander Aring  * received message by comparing sequence numbers.
26489d8e55SAlexander Aring  *
27489d8e55SAlexander Aring  * How version detection works:
28489d8e55SAlexander Aring  *
29489d8e55SAlexander Aring  * Due the fact that dlm has pre-configured node addresses on every side
30489d8e55SAlexander Aring  * it is in it's nature that every side connects at starts to transmit
31489d8e55SAlexander Aring  * dlm messages which ends in a race. However DLM_RCOM_NAMES, DLM_RCOM_STATUS
32489d8e55SAlexander Aring  * and their replies are the first messages which are exchanges. Due backwards
33489d8e55SAlexander Aring  * compatibility these messages are not covered by the midcomms re-transmission
34489d8e55SAlexander Aring  * layer. These messages have their own re-transmission handling in the dlm
35489d8e55SAlexander Aring  * application layer. The version field of every node will be set on these RCOM
36489d8e55SAlexander Aring  * messages as soon as they arrived and the node isn't yet part of the nodes
37489d8e55SAlexander Aring  * hash. There exists also logic to detect version mismatched if something weird
38489d8e55SAlexander Aring  * going on or the first messages isn't an expected one.
39489d8e55SAlexander Aring  *
40489d8e55SAlexander Aring  * Termination:
41489d8e55SAlexander Aring  *
42489d8e55SAlexander Aring  * The midcomms layer does a 4 way handshake for termination on DLM protocol
43489d8e55SAlexander Aring  * like TCP supports it with half-closed socket support. SCTP doesn't support
44489d8e55SAlexander Aring  * half-closed socket, so we do it on DLM layer. Also socket shutdown() can be
45489d8e55SAlexander Aring  * interrupted by .e.g. tcp reset itself. Additional there exists the othercon
46489d8e55SAlexander Aring  * paradigm in lowcomms which cannot be easily without breaking backwards
47489d8e55SAlexander Aring  * compatibility. A node cannot send anything to another node when a DLM_FIN
48489d8e55SAlexander Aring  * message was send. There exists additional logic to print a warning if
49489d8e55SAlexander Aring  * DLM wants to do it. There exists a state handling like RFC 793 but reduced
50489d8e55SAlexander Aring  * to termination only. The event "member removal event" describes the cluster
51489d8e55SAlexander Aring  * manager removed the node from internal lists, at this point DLM does not
52489d8e55SAlexander Aring  * send any message to the other node. There exists two cases:
53489d8e55SAlexander Aring  *
54489d8e55SAlexander Aring  * 1. The cluster member was removed and we received a FIN
55489d8e55SAlexander Aring  * OR
56489d8e55SAlexander Aring  * 2. We received a FIN but the member was not removed yet
57489d8e55SAlexander Aring  *
58489d8e55SAlexander Aring  * One of these cases will do the CLOSE_WAIT to LAST_ACK change.
59489d8e55SAlexander Aring  *
60489d8e55SAlexander Aring  *
61489d8e55SAlexander Aring  *                              +---------+
62489d8e55SAlexander Aring  *                              | CLOSED  |
63489d8e55SAlexander Aring  *                              +---------+
64489d8e55SAlexander Aring  *                                   | add member/receive RCOM version
65489d8e55SAlexander Aring  *                                   |            detection msg
66489d8e55SAlexander Aring  *                                   V
67489d8e55SAlexander Aring  *                              +---------+
68489d8e55SAlexander Aring  *                              |  ESTAB  |
69489d8e55SAlexander Aring  *                              +---------+
70489d8e55SAlexander Aring  *                       CLOSE    |     |    rcv FIN
71489d8e55SAlexander Aring  *                      -------   |     |    -------
72489d8e55SAlexander Aring  * +---------+          snd FIN  /       \   snd ACK          +---------+
73489d8e55SAlexander Aring  * |  FIN    |<-----------------           ------------------>|  CLOSE  |
74489d8e55SAlexander Aring  * | WAIT-1  |------------------                              |   WAIT  |
75489d8e55SAlexander Aring  * +---------+          rcv FIN  \                            +---------+
76489d8e55SAlexander Aring  * | rcv ACK of FIN   -------   |                            CLOSE  | member
77489d8e55SAlexander Aring  * | --------------   snd ACK   |                           ------- | removal
78489d8e55SAlexander Aring  * V        x                   V                           snd FIN V event
79489d8e55SAlexander Aring  * +---------+                  +---------+                   +---------+
80489d8e55SAlexander Aring  * |FINWAIT-2|                  | CLOSING |                   | LAST-ACK|
81489d8e55SAlexander Aring  * +---------+                  +---------+                   +---------+
82489d8e55SAlexander Aring  * |                rcv ACK of FIN |                 rcv ACK of FIN |
83489d8e55SAlexander Aring  * |  rcv FIN       -------------- |                 -------------- |
84489d8e55SAlexander Aring  * |  -------              x       V                        x       V
85489d8e55SAlexander Aring  *  \ snd ACK                 +---------+                   +---------+
86489d8e55SAlexander Aring  *   ------------------------>| CLOSED  |                   | CLOSED  |
87489d8e55SAlexander Aring  *                            +---------+                   +---------+
88489d8e55SAlexander Aring  *
89489d8e55SAlexander Aring  * NOTE: any state can interrupted by midcomms_close() and state will be
90489d8e55SAlexander Aring  * switched to CLOSED in case of fencing. There exists also some timeout
91489d8e55SAlexander Aring  * handling when we receive the version detection RCOM messages which is
92489d8e55SAlexander Aring  * made by observation.
93489d8e55SAlexander Aring  *
94489d8e55SAlexander Aring  * Future improvements:
95489d8e55SAlexander Aring  *
96489d8e55SAlexander Aring  * There exists some known issues/improvements of the dlm handling. Some
97489d8e55SAlexander Aring  * of them should be done in a next major dlm version bump which makes
98489d8e55SAlexander Aring  * it incompatible with previous versions.
99489d8e55SAlexander Aring  *
100489d8e55SAlexander Aring  * Unaligned memory access:
101489d8e55SAlexander Aring  *
102489d8e55SAlexander Aring  * There exists cases when the dlm message buffer length is not aligned
103489d8e55SAlexander Aring  * to 8 byte. However seems nobody detected any problem with it. This
104489d8e55SAlexander Aring  * can be fixed in the next major version bump of dlm.
105489d8e55SAlexander Aring  *
106489d8e55SAlexander Aring  * Version detection:
107489d8e55SAlexander Aring  *
108489d8e55SAlexander Aring  * The version detection and how it's done is related to backwards
109489d8e55SAlexander Aring  * compatibility. There exists better ways to make a better handling.
110489d8e55SAlexander Aring  * However this should be changed in the next major version bump of dlm.
111489d8e55SAlexander Aring  *
112489d8e55SAlexander Aring  * Tail Size checking:
113489d8e55SAlexander Aring  *
114489d8e55SAlexander Aring  * There exists a message tail payload in e.g. DLM_MSG however we don't
115489d8e55SAlexander Aring  * check it against the message length yet regarding to the receive buffer
116489d8e55SAlexander Aring  * length. That need to be validated.
117489d8e55SAlexander Aring  *
118489d8e55SAlexander Aring  * Fencing bad nodes:
119489d8e55SAlexander Aring  *
120489d8e55SAlexander Aring  * At timeout places or weird sequence number behaviours we should send
121489d8e55SAlexander Aring  * a fencing request to the cluster manager.
122e7fd4179SDavid Teigland  */
123e7fd4179SDavid Teigland 
124489d8e55SAlexander Aring /* Debug switch to enable a 5 seconds sleep waiting of a termination.
125489d8e55SAlexander Aring  * This can be useful to test fencing while termination is running.
126489d8e55SAlexander Aring  * This requires a setup with only gfs2 as dlm user, so that the
127489d8e55SAlexander Aring  * last umount will terminate the connection.
128489d8e55SAlexander Aring  *
129489d8e55SAlexander Aring  * However it became useful to test, while the 5 seconds block in umount
130489d8e55SAlexander Aring  * just press the reset button. In a lot of dropping the termination
131489d8e55SAlexander Aring  * process can could take several seconds.
132489d8e55SAlexander Aring  */
133489d8e55SAlexander Aring #define DLM_DEBUG_FENCE_TERMINATION	0
134489d8e55SAlexander Aring 
135e01c4b7bSAlexander Aring #include <trace/events/dlm.h>
136489d8e55SAlexander Aring #include <net/tcp.h>
137489d8e55SAlexander Aring 
138e7fd4179SDavid Teigland #include "dlm_internal.h"
139e7fd4179SDavid Teigland #include "lowcomms.h"
140e7fd4179SDavid Teigland #include "config.h"
1416c547f26SAlexander Aring #include "memory.h"
142e7fd4179SDavid Teigland #include "lock.h"
143489d8e55SAlexander Aring #include "util.h"
144e7fd4179SDavid Teigland #include "midcomms.h"
145e7fd4179SDavid Teigland 
146489d8e55SAlexander Aring /* init value for sequence numbers for testing purpose only e.g. overflows */
147489d8e55SAlexander Aring #define DLM_SEQ_INIT		0
14811605353SAlexander Aring /* 5 seconds wait to sync ending of dlm */
14911605353SAlexander Aring #define DLM_SHUTDOWN_TIMEOUT	msecs_to_jiffies(5000)
150489d8e55SAlexander Aring #define DLM_VERSION_NOT_SET	0
1511696c75fSAlexander Aring #define DLM_SEND_ACK_BACK_MSG_THRESHOLD 32
1521696c75fSAlexander Aring #define DLM_RECV_ACK_BACK_MSG_THRESHOLD (DLM_SEND_ACK_BACK_MSG_THRESHOLD * 8)
153489d8e55SAlexander Aring 
154489d8e55SAlexander Aring struct midcomms_node {
155489d8e55SAlexander Aring 	int nodeid;
156489d8e55SAlexander Aring 	uint32_t version;
157d00725caSAlexander Aring 	atomic_t seq_send;
158d00725caSAlexander Aring 	atomic_t seq_next;
159489d8e55SAlexander Aring 	/* These queues are unbound because we cannot drop any message in dlm.
160489d8e55SAlexander Aring 	 * We could send a fence signal for a specific node to the cluster
161489d8e55SAlexander Aring 	 * manager if queues hits some maximum value, however this handling
162489d8e55SAlexander Aring 	 * not supported yet.
163489d8e55SAlexander Aring 	 */
164489d8e55SAlexander Aring 	struct list_head send_queue;
165489d8e55SAlexander Aring 	spinlock_t send_queue_lock;
166489d8e55SAlexander Aring 	atomic_t send_queue_cnt;
167489d8e55SAlexander Aring #define DLM_NODE_FLAG_CLOSE	1
168489d8e55SAlexander Aring #define DLM_NODE_FLAG_STOP_TX	2
169489d8e55SAlexander Aring #define DLM_NODE_FLAG_STOP_RX	3
1701696c75fSAlexander Aring 	atomic_t ulp_delivered;
171489d8e55SAlexander Aring 	unsigned long flags;
172489d8e55SAlexander Aring 	wait_queue_head_t shutdown_wait;
173489d8e55SAlexander Aring 
174489d8e55SAlexander Aring 	/* dlm tcp termination state */
175489d8e55SAlexander Aring #define DLM_CLOSED	1
176489d8e55SAlexander Aring #define DLM_ESTABLISHED	2
177489d8e55SAlexander Aring #define DLM_FIN_WAIT1	3
178489d8e55SAlexander Aring #define DLM_FIN_WAIT2	4
179489d8e55SAlexander Aring #define DLM_CLOSE_WAIT	5
180489d8e55SAlexander Aring #define DLM_LAST_ACK	6
181489d8e55SAlexander Aring #define DLM_CLOSING	7
182489d8e55SAlexander Aring 	int state;
183489d8e55SAlexander Aring 	spinlock_t state_lock;
184489d8e55SAlexander Aring 
185489d8e55SAlexander Aring 	/* counts how many lockspaces are using this node
186489d8e55SAlexander Aring 	 * this refcount is necessary to determine if the
187489d8e55SAlexander Aring 	 * node wants to disconnect.
188489d8e55SAlexander Aring 	 */
189489d8e55SAlexander Aring 	int users;
190489d8e55SAlexander Aring 
1915b2f981fSAlexander Aring 	/* not protected by srcu, node_hash lifetime */
1925b2f981fSAlexander Aring 	void *debugfs;
1935b2f981fSAlexander Aring 
194489d8e55SAlexander Aring 	struct hlist_node hlist;
195489d8e55SAlexander Aring 	struct rcu_head rcu;
196489d8e55SAlexander Aring };
197489d8e55SAlexander Aring 
198489d8e55SAlexander Aring struct dlm_mhandle {
1995b787667SAlexander Aring 	const union dlm_packet *inner_p;
200489d8e55SAlexander Aring 	struct midcomms_node *node;
201489d8e55SAlexander Aring 	struct dlm_opts *opts;
202489d8e55SAlexander Aring 	struct dlm_msg *msg;
203489d8e55SAlexander Aring 	bool committed;
204489d8e55SAlexander Aring 	uint32_t seq;
205489d8e55SAlexander Aring 
206489d8e55SAlexander Aring 	void (*ack_rcv)(struct midcomms_node *node);
207489d8e55SAlexander Aring 
208489d8e55SAlexander Aring 	/* get_mhandle/commit srcu idx exchange */
209489d8e55SAlexander Aring 	int idx;
210489d8e55SAlexander Aring 
211489d8e55SAlexander Aring 	struct list_head list;
212489d8e55SAlexander Aring 	struct rcu_head rcu;
213489d8e55SAlexander Aring };
214489d8e55SAlexander Aring 
215489d8e55SAlexander Aring static struct hlist_head node_hash[CONN_HASH_SIZE];
216489d8e55SAlexander Aring static DEFINE_SPINLOCK(nodes_lock);
217489d8e55SAlexander Aring DEFINE_STATIC_SRCU(nodes_srcu);
218489d8e55SAlexander Aring 
219489d8e55SAlexander Aring /* This mutex prevents that midcomms_close() is running while
220489d8e55SAlexander Aring  * stop() or remove(). As I experienced invalid memory access
221489d8e55SAlexander Aring  * behaviours when DLM_DEBUG_FENCE_TERMINATION is enabled and
222489d8e55SAlexander Aring  * resetting machines. I will end in some double deletion in nodes
223489d8e55SAlexander Aring  * datastructure.
224489d8e55SAlexander Aring  */
225489d8e55SAlexander Aring static DEFINE_MUTEX(close_lock);
226489d8e55SAlexander Aring 
dlm_midcomms_cache_create(void)2276c547f26SAlexander Aring struct kmem_cache *dlm_midcomms_cache_create(void)
2286c547f26SAlexander Aring {
2296c547f26SAlexander Aring 	return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle),
2306c547f26SAlexander Aring 				 0, 0, NULL);
2316c547f26SAlexander Aring }
2326c547f26SAlexander Aring 
dlm_state_str(int state)233489d8e55SAlexander Aring static inline const char *dlm_state_str(int state)
234a070a91cSAlexander Aring {
235489d8e55SAlexander Aring 	switch (state) {
236489d8e55SAlexander Aring 	case DLM_CLOSED:
237489d8e55SAlexander Aring 		return "CLOSED";
238489d8e55SAlexander Aring 	case DLM_ESTABLISHED:
239489d8e55SAlexander Aring 		return "ESTABLISHED";
240489d8e55SAlexander Aring 	case DLM_FIN_WAIT1:
241489d8e55SAlexander Aring 		return "FIN_WAIT1";
242489d8e55SAlexander Aring 	case DLM_FIN_WAIT2:
243489d8e55SAlexander Aring 		return "FIN_WAIT2";
244489d8e55SAlexander Aring 	case DLM_CLOSE_WAIT:
245489d8e55SAlexander Aring 		return "CLOSE_WAIT";
246489d8e55SAlexander Aring 	case DLM_LAST_ACK:
247489d8e55SAlexander Aring 		return "LAST_ACK";
248489d8e55SAlexander Aring 	case DLM_CLOSING:
249489d8e55SAlexander Aring 		return "CLOSING";
250489d8e55SAlexander Aring 	default:
251489d8e55SAlexander Aring 		return "UNKNOWN";
252489d8e55SAlexander Aring 	}
253489d8e55SAlexander Aring }
254489d8e55SAlexander Aring 
dlm_midcomms_state(struct midcomms_node * node)2555b2f981fSAlexander Aring const char *dlm_midcomms_state(struct midcomms_node *node)
2565b2f981fSAlexander Aring {
2575b2f981fSAlexander Aring 	return dlm_state_str(node->state);
2585b2f981fSAlexander Aring }
2595b2f981fSAlexander Aring 
dlm_midcomms_flags(struct midcomms_node * node)2605b2f981fSAlexander Aring unsigned long dlm_midcomms_flags(struct midcomms_node *node)
2615b2f981fSAlexander Aring {
2625b2f981fSAlexander Aring 	return node->flags;
2635b2f981fSAlexander Aring }
2645b2f981fSAlexander Aring 
dlm_midcomms_send_queue_cnt(struct midcomms_node * node)2655b2f981fSAlexander Aring int dlm_midcomms_send_queue_cnt(struct midcomms_node *node)
2665b2f981fSAlexander Aring {
2675b2f981fSAlexander Aring 	return atomic_read(&node->send_queue_cnt);
2685b2f981fSAlexander Aring }
2695b2f981fSAlexander Aring 
dlm_midcomms_version(struct midcomms_node * node)2705b2f981fSAlexander Aring uint32_t dlm_midcomms_version(struct midcomms_node *node)
2715b2f981fSAlexander Aring {
2725b2f981fSAlexander Aring 	return node->version;
2735b2f981fSAlexander Aring }
2745b2f981fSAlexander Aring 
__find_node(int nodeid,int r)275489d8e55SAlexander Aring static struct midcomms_node *__find_node(int nodeid, int r)
276489d8e55SAlexander Aring {
277489d8e55SAlexander Aring 	struct midcomms_node *node;
278489d8e55SAlexander Aring 
279489d8e55SAlexander Aring 	hlist_for_each_entry_rcu(node, &node_hash[r], hlist) {
280489d8e55SAlexander Aring 		if (node->nodeid == nodeid)
281489d8e55SAlexander Aring 			return node;
282489d8e55SAlexander Aring 	}
283489d8e55SAlexander Aring 
284489d8e55SAlexander Aring 	return NULL;
285489d8e55SAlexander Aring }
286489d8e55SAlexander Aring 
dlm_mhandle_release(struct rcu_head * rcu)287489d8e55SAlexander Aring static void dlm_mhandle_release(struct rcu_head *rcu)
288489d8e55SAlexander Aring {
289489d8e55SAlexander Aring 	struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu);
290489d8e55SAlexander Aring 
291489d8e55SAlexander Aring 	dlm_lowcomms_put_msg(mh->msg);
2926c547f26SAlexander Aring 	dlm_free_mhandle(mh);
293489d8e55SAlexander Aring }
294489d8e55SAlexander Aring 
dlm_mhandle_delete(struct midcomms_node * node,struct dlm_mhandle * mh)295f5fe8d51SAlexander Aring static void dlm_mhandle_delete(struct midcomms_node *node,
296f5fe8d51SAlexander Aring 			       struct dlm_mhandle *mh)
297f5fe8d51SAlexander Aring {
298f5fe8d51SAlexander Aring 	list_del_rcu(&mh->list);
299f5fe8d51SAlexander Aring 	atomic_dec(&node->send_queue_cnt);
300f5fe8d51SAlexander Aring 	call_rcu(&mh->rcu, dlm_mhandle_release);
301f5fe8d51SAlexander Aring }
302f5fe8d51SAlexander Aring 
dlm_send_queue_flush(struct midcomms_node * node)303489d8e55SAlexander Aring static void dlm_send_queue_flush(struct midcomms_node *node)
304489d8e55SAlexander Aring {
305489d8e55SAlexander Aring 	struct dlm_mhandle *mh;
306489d8e55SAlexander Aring 
307489d8e55SAlexander Aring 	pr_debug("flush midcomms send queue of node %d\n", node->nodeid);
308489d8e55SAlexander Aring 
309489d8e55SAlexander Aring 	rcu_read_lock();
310dbb751ffSAlexander Aring 	spin_lock_bh(&node->send_queue_lock);
311f5fe8d51SAlexander Aring 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
312f5fe8d51SAlexander Aring 		dlm_mhandle_delete(node, mh);
313489d8e55SAlexander Aring 	}
314dbb751ffSAlexander Aring 	spin_unlock_bh(&node->send_queue_lock);
315489d8e55SAlexander Aring 	rcu_read_unlock();
316489d8e55SAlexander Aring }
317489d8e55SAlexander Aring 
midcomms_node_reset(struct midcomms_node * node)318489d8e55SAlexander Aring static void midcomms_node_reset(struct midcomms_node *node)
319489d8e55SAlexander Aring {
320489d8e55SAlexander Aring 	pr_debug("reset node %d\n", node->nodeid);
321489d8e55SAlexander Aring 
322d00725caSAlexander Aring 	atomic_set(&node->seq_next, DLM_SEQ_INIT);
323d00725caSAlexander Aring 	atomic_set(&node->seq_send, DLM_SEQ_INIT);
3241696c75fSAlexander Aring 	atomic_set(&node->ulp_delivered, 0);
325489d8e55SAlexander Aring 	node->version = DLM_VERSION_NOT_SET;
326489d8e55SAlexander Aring 	node->flags = 0;
327489d8e55SAlexander Aring 
328489d8e55SAlexander Aring 	dlm_send_queue_flush(node);
329489d8e55SAlexander Aring 	node->state = DLM_CLOSED;
330489d8e55SAlexander Aring 	wake_up(&node->shutdown_wait);
331489d8e55SAlexander Aring }
332489d8e55SAlexander Aring 
nodeid2node(int nodeid)33363e711b0SAlexander Aring static struct midcomms_node *nodeid2node(int nodeid)
334489d8e55SAlexander Aring {
33563e711b0SAlexander Aring 	return __find_node(nodeid, nodeid_hash(nodeid));
33663e711b0SAlexander Aring }
337489d8e55SAlexander Aring 
dlm_midcomms_addr(int nodeid,struct sockaddr_storage * addr,int len)33863e711b0SAlexander Aring int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
33963e711b0SAlexander Aring {
340c0c2b346SAlexander Aring 	int ret, idx, r = nodeid_hash(nodeid);
34163e711b0SAlexander Aring 	struct midcomms_node *node;
342489d8e55SAlexander Aring 
34363e711b0SAlexander Aring 	ret = dlm_lowcomms_addr(nodeid, addr, len);
34463e711b0SAlexander Aring 	if (ret)
34563e711b0SAlexander Aring 		return ret;
34663e711b0SAlexander Aring 
347c0c2b346SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
348c0c2b346SAlexander Aring 	node = __find_node(nodeid, r);
349c0c2b346SAlexander Aring 	if (node) {
350c0c2b346SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
351c0c2b346SAlexander Aring 		return 0;
352c0c2b346SAlexander Aring 	}
353c0c2b346SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
354c0c2b346SAlexander Aring 
35563e711b0SAlexander Aring 	node = kmalloc(sizeof(*node), GFP_NOFS);
356489d8e55SAlexander Aring 	if (!node)
35763e711b0SAlexander Aring 		return -ENOMEM;
358489d8e55SAlexander Aring 
359489d8e55SAlexander Aring 	node->nodeid = nodeid;
360489d8e55SAlexander Aring 	spin_lock_init(&node->state_lock);
361489d8e55SAlexander Aring 	spin_lock_init(&node->send_queue_lock);
362489d8e55SAlexander Aring 	atomic_set(&node->send_queue_cnt, 0);
363489d8e55SAlexander Aring 	INIT_LIST_HEAD(&node->send_queue);
364489d8e55SAlexander Aring 	init_waitqueue_head(&node->shutdown_wait);
365489d8e55SAlexander Aring 	node->users = 0;
366489d8e55SAlexander Aring 	midcomms_node_reset(node);
367489d8e55SAlexander Aring 
368489d8e55SAlexander Aring 	spin_lock(&nodes_lock);
369489d8e55SAlexander Aring 	hlist_add_head_rcu(&node->hlist, &node_hash[r]);
370489d8e55SAlexander Aring 	spin_unlock(&nodes_lock);
3715b2f981fSAlexander Aring 
3725b2f981fSAlexander Aring 	node->debugfs = dlm_create_debug_comms_file(nodeid, node);
37363e711b0SAlexander Aring 	return 0;
374489d8e55SAlexander Aring }
375489d8e55SAlexander Aring 
dlm_send_ack(int nodeid,uint32_t seq)376489d8e55SAlexander Aring static int dlm_send_ack(int nodeid, uint32_t seq)
377489d8e55SAlexander Aring {
378489d8e55SAlexander Aring 	int mb_len = sizeof(struct dlm_header);
379489d8e55SAlexander Aring 	struct dlm_header *m_header;
380489d8e55SAlexander Aring 	struct dlm_msg *msg;
381489d8e55SAlexander Aring 	char *ppc;
382489d8e55SAlexander Aring 
38300908b33SAlexander Aring 	msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_ATOMIC, &ppc,
3848f2dc78dSAlexander Aring 				   NULL, NULL);
385489d8e55SAlexander Aring 	if (!msg)
386489d8e55SAlexander Aring 		return -ENOMEM;
387489d8e55SAlexander Aring 
388489d8e55SAlexander Aring 	m_header = (struct dlm_header *)ppc;
389489d8e55SAlexander Aring 
3903428785aSAlexander Aring 	m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
3913428785aSAlexander Aring 	m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid());
3923428785aSAlexander Aring 	m_header->h_length = cpu_to_le16(mb_len);
393489d8e55SAlexander Aring 	m_header->h_cmd = DLM_ACK;
3943428785aSAlexander Aring 	m_header->u.h_seq = cpu_to_le32(seq);
395489d8e55SAlexander Aring 
396489d8e55SAlexander Aring 	dlm_lowcomms_commit_msg(msg);
397489d8e55SAlexander Aring 	dlm_lowcomms_put_msg(msg);
398489d8e55SAlexander Aring 
399489d8e55SAlexander Aring 	return 0;
400a070a91cSAlexander Aring }
401a070a91cSAlexander Aring 
dlm_send_ack_threshold(struct midcomms_node * node,uint32_t threshold)4021696c75fSAlexander Aring static void dlm_send_ack_threshold(struct midcomms_node *node,
4031696c75fSAlexander Aring 				   uint32_t threshold)
4041696c75fSAlexander Aring {
4051696c75fSAlexander Aring 	uint32_t oval, nval;
4061696c75fSAlexander Aring 	bool send_ack;
4071696c75fSAlexander Aring 
4081696c75fSAlexander Aring 	/* let only send one user trigger threshold to send ack back */
4091696c75fSAlexander Aring 	do {
4101696c75fSAlexander Aring 		oval = atomic_read(&node->ulp_delivered);
4111696c75fSAlexander Aring 		send_ack = (oval > threshold);
4121696c75fSAlexander Aring 		/* abort if threshold is not reached */
4131696c75fSAlexander Aring 		if (!send_ack)
4141696c75fSAlexander Aring 			break;
4151696c75fSAlexander Aring 
4161696c75fSAlexander Aring 		nval = 0;
4171696c75fSAlexander Aring 		/* try to reset ulp_delivered counter */
4181696c75fSAlexander Aring 	} while (atomic_cmpxchg(&node->ulp_delivered, oval, nval) != oval);
4191696c75fSAlexander Aring 
4201696c75fSAlexander Aring 	if (send_ack)
4211696c75fSAlexander Aring 		dlm_send_ack(node->nodeid, atomic_read(&node->seq_next));
4221696c75fSAlexander Aring }
4231696c75fSAlexander Aring 
dlm_send_fin(struct midcomms_node * node,void (* ack_rcv)(struct midcomms_node * node))424489d8e55SAlexander Aring static int dlm_send_fin(struct midcomms_node *node,
425489d8e55SAlexander Aring 			void (*ack_rcv)(struct midcomms_node *node))
426a070a91cSAlexander Aring {
427489d8e55SAlexander Aring 	int mb_len = sizeof(struct dlm_header);
428489d8e55SAlexander Aring 	struct dlm_header *m_header;
429489d8e55SAlexander Aring 	struct dlm_mhandle *mh;
430489d8e55SAlexander Aring 	char *ppc;
431489d8e55SAlexander Aring 
432a5849636SAlexander Aring 	mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_ATOMIC, &ppc);
433489d8e55SAlexander Aring 	if (!mh)
434489d8e55SAlexander Aring 		return -ENOMEM;
435489d8e55SAlexander Aring 
43616427211SAlexander Aring 	set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags);
437489d8e55SAlexander Aring 	mh->ack_rcv = ack_rcv;
438489d8e55SAlexander Aring 
439489d8e55SAlexander Aring 	m_header = (struct dlm_header *)ppc;
440489d8e55SAlexander Aring 
4413428785aSAlexander Aring 	m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
4423428785aSAlexander Aring 	m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid());
4433428785aSAlexander Aring 	m_header->h_length = cpu_to_le16(mb_len);
444489d8e55SAlexander Aring 	m_header->h_cmd = DLM_FIN;
445489d8e55SAlexander Aring 
446489d8e55SAlexander Aring 	pr_debug("sending fin msg to node %d\n", node->nodeid);
447e01c4b7bSAlexander Aring 	dlm_midcomms_commit_mhandle(mh, NULL, 0);
448489d8e55SAlexander Aring 
449489d8e55SAlexander Aring 	return 0;
450a070a91cSAlexander Aring }
451a070a91cSAlexander Aring 
dlm_receive_ack(struct midcomms_node * node,uint32_t seq)452489d8e55SAlexander Aring static void dlm_receive_ack(struct midcomms_node *node, uint32_t seq)
453a070a91cSAlexander Aring {
454489d8e55SAlexander Aring 	struct dlm_mhandle *mh;
455489d8e55SAlexander Aring 
456489d8e55SAlexander Aring 	rcu_read_lock();
457489d8e55SAlexander Aring 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
458489d8e55SAlexander Aring 		if (before(mh->seq, seq)) {
459489d8e55SAlexander Aring 			if (mh->ack_rcv)
460489d8e55SAlexander Aring 				mh->ack_rcv(node);
461489d8e55SAlexander Aring 		} else {
462489d8e55SAlexander Aring 			/* send queue should be ordered */
463489d8e55SAlexander Aring 			break;
464489d8e55SAlexander Aring 		}
465489d8e55SAlexander Aring 	}
466f5fe8d51SAlexander Aring 
467dbb751ffSAlexander Aring 	spin_lock_bh(&node->send_queue_lock);
468f5fe8d51SAlexander Aring 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
469f5fe8d51SAlexander Aring 		if (before(mh->seq, seq)) {
470f5fe8d51SAlexander Aring 			dlm_mhandle_delete(node, mh);
471f5fe8d51SAlexander Aring 		} else {
472f5fe8d51SAlexander Aring 			/* send queue should be ordered */
473f5fe8d51SAlexander Aring 			break;
474f5fe8d51SAlexander Aring 		}
475f5fe8d51SAlexander Aring 	}
476dbb751ffSAlexander Aring 	spin_unlock_bh(&node->send_queue_lock);
477489d8e55SAlexander Aring 	rcu_read_unlock();
478a070a91cSAlexander Aring }
479a070a91cSAlexander Aring 
dlm_pas_fin_ack_rcv(struct midcomms_node * node)480489d8e55SAlexander Aring static void dlm_pas_fin_ack_rcv(struct midcomms_node *node)
481a070a91cSAlexander Aring {
482489d8e55SAlexander Aring 	spin_lock(&node->state_lock);
483489d8e55SAlexander Aring 	pr_debug("receive passive fin ack from node %d with state %s\n",
484489d8e55SAlexander Aring 		 node->nodeid, dlm_state_str(node->state));
485489d8e55SAlexander Aring 
486489d8e55SAlexander Aring 	switch (node->state) {
487489d8e55SAlexander Aring 	case DLM_LAST_ACK:
488489d8e55SAlexander Aring 		/* DLM_CLOSED */
489489d8e55SAlexander Aring 		midcomms_node_reset(node);
490489d8e55SAlexander Aring 		break;
491489d8e55SAlexander Aring 	case DLM_CLOSED:
492489d8e55SAlexander Aring 		/* not valid but somehow we got what we want */
493489d8e55SAlexander Aring 		wake_up(&node->shutdown_wait);
494489d8e55SAlexander Aring 		break;
495489d8e55SAlexander Aring 	default:
496489d8e55SAlexander Aring 		spin_unlock(&node->state_lock);
49731864097SAlexander Aring 		log_print("%s: unexpected state: %d",
498489d8e55SAlexander Aring 			  __func__, node->state);
499775af207SAlexander Aring 		WARN_ON_ONCE(1);
500489d8e55SAlexander Aring 		return;
501489d8e55SAlexander Aring 	}
502489d8e55SAlexander Aring 	spin_unlock(&node->state_lock);
503a070a91cSAlexander Aring }
504a070a91cSAlexander Aring 
dlm_receive_buffer_3_2_trace(uint32_t seq,const union dlm_packet * p)50511519351SAlexander Aring static void dlm_receive_buffer_3_2_trace(uint32_t seq,
50611519351SAlexander Aring 					 const union dlm_packet *p)
507e01c4b7bSAlexander Aring {
508e01c4b7bSAlexander Aring 	switch (p->header.h_cmd) {
509e01c4b7bSAlexander Aring 	case DLM_MSG:
51017827754SAlexander Aring 		trace_dlm_recv_message(dlm_our_nodeid(), seq, &p->message);
511e01c4b7bSAlexander Aring 		break;
512e01c4b7bSAlexander Aring 	case DLM_RCOM:
51317827754SAlexander Aring 		trace_dlm_recv_rcom(dlm_our_nodeid(), seq, &p->rcom);
514e01c4b7bSAlexander Aring 		break;
515e01c4b7bSAlexander Aring 	default:
516e01c4b7bSAlexander Aring 		break;
517e01c4b7bSAlexander Aring 	}
518e01c4b7bSAlexander Aring }
519e01c4b7bSAlexander Aring 
dlm_midcomms_receive_buffer(const union dlm_packet * p,struct midcomms_node * node,uint32_t seq)52011519351SAlexander Aring static void dlm_midcomms_receive_buffer(const union dlm_packet *p,
521489d8e55SAlexander Aring 					struct midcomms_node *node,
522489d8e55SAlexander Aring 					uint32_t seq)
523a070a91cSAlexander Aring {
524d00725caSAlexander Aring 	bool is_expected_seq;
525d00725caSAlexander Aring 	uint32_t oval, nval;
526489d8e55SAlexander Aring 
527d00725caSAlexander Aring 	do {
528d00725caSAlexander Aring 		oval = atomic_read(&node->seq_next);
529d00725caSAlexander Aring 		is_expected_seq = (oval == seq);
530d00725caSAlexander Aring 		if (!is_expected_seq)
531d00725caSAlexander Aring 			break;
532d00725caSAlexander Aring 
533d00725caSAlexander Aring 		nval = oval + 1;
534d00725caSAlexander Aring 	} while (atomic_cmpxchg(&node->seq_next, oval, nval) != oval);
535d00725caSAlexander Aring 
536d00725caSAlexander Aring 	if (is_expected_seq) {
537489d8e55SAlexander Aring 		switch (p->header.h_cmd) {
538489d8e55SAlexander Aring 		case DLM_FIN:
539489d8e55SAlexander Aring 			spin_lock(&node->state_lock);
540489d8e55SAlexander Aring 			pr_debug("receive fin msg from node %d with state %s\n",
541489d8e55SAlexander Aring 				 node->nodeid, dlm_state_str(node->state));
542489d8e55SAlexander Aring 
543489d8e55SAlexander Aring 			switch (node->state) {
544489d8e55SAlexander Aring 			case DLM_ESTABLISHED:
545d00725caSAlexander Aring 				dlm_send_ack(node->nodeid, nval);
54600908b33SAlexander Aring 
547489d8e55SAlexander Aring 				/* passive shutdown DLM_LAST_ACK case 1
548489d8e55SAlexander Aring 				 * additional we check if the node is used by
549489d8e55SAlexander Aring 				 * cluster manager events at all.
550489d8e55SAlexander Aring 				 */
551489d8e55SAlexander Aring 				if (node->users == 0) {
552489d8e55SAlexander Aring 					node->state = DLM_LAST_ACK;
553489d8e55SAlexander Aring 					pr_debug("switch node %d to state %s case 1\n",
554489d8e55SAlexander Aring 						 node->nodeid, dlm_state_str(node->state));
555a5849636SAlexander Aring 					set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
556a5849636SAlexander Aring 					dlm_send_fin(node, dlm_pas_fin_ack_rcv);
557ef7ef015SAlexander Aring 				} else {
558ef7ef015SAlexander Aring 					node->state = DLM_CLOSE_WAIT;
559ef7ef015SAlexander Aring 					pr_debug("switch node %d to state %s\n",
560ef7ef015SAlexander Aring 						 node->nodeid, dlm_state_str(node->state));
561489d8e55SAlexander Aring 				}
562489d8e55SAlexander Aring 				break;
563489d8e55SAlexander Aring 			case DLM_FIN_WAIT1:
564d00725caSAlexander Aring 				dlm_send_ack(node->nodeid, nval);
565489d8e55SAlexander Aring 				node->state = DLM_CLOSING;
56615c63db8SAlexander Aring 				set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
567489d8e55SAlexander Aring 				pr_debug("switch node %d to state %s\n",
568489d8e55SAlexander Aring 					 node->nodeid, dlm_state_str(node->state));
569489d8e55SAlexander Aring 				break;
570489d8e55SAlexander Aring 			case DLM_FIN_WAIT2:
571d00725caSAlexander Aring 				dlm_send_ack(node->nodeid, nval);
572489d8e55SAlexander Aring 				midcomms_node_reset(node);
573489d8e55SAlexander Aring 				pr_debug("switch node %d to state %s\n",
574489d8e55SAlexander Aring 					 node->nodeid, dlm_state_str(node->state));
575489d8e55SAlexander Aring 				break;
576489d8e55SAlexander Aring 			case DLM_LAST_ACK:
577489d8e55SAlexander Aring 				/* probably remove_member caught it, do nothing */
578489d8e55SAlexander Aring 				break;
579489d8e55SAlexander Aring 			default:
580489d8e55SAlexander Aring 				spin_unlock(&node->state_lock);
58131864097SAlexander Aring 				log_print("%s: unexpected state: %d",
582489d8e55SAlexander Aring 					  __func__, node->state);
583775af207SAlexander Aring 				WARN_ON_ONCE(1);
584489d8e55SAlexander Aring 				return;
585489d8e55SAlexander Aring 			}
586489d8e55SAlexander Aring 			spin_unlock(&node->state_lock);
587489d8e55SAlexander Aring 			break;
588489d8e55SAlexander Aring 		default:
589775af207SAlexander Aring 			WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
590e01c4b7bSAlexander Aring 			dlm_receive_buffer_3_2_trace(seq, p);
591489d8e55SAlexander Aring 			dlm_receive_buffer(p, node->nodeid);
5921696c75fSAlexander Aring 			atomic_inc(&node->ulp_delivered);
5931696c75fSAlexander Aring 			/* unlikely case to send ack back when we don't transmit */
5941696c75fSAlexander Aring 			dlm_send_ack_threshold(node, DLM_RECV_ACK_BACK_MSG_THRESHOLD);
595489d8e55SAlexander Aring 			break;
596489d8e55SAlexander Aring 		}
597489d8e55SAlexander Aring 	} else {
598489d8e55SAlexander Aring 		/* retry to ack message which we already have by sending back
599489d8e55SAlexander Aring 		 * current node->seq_next number as ack.
600489d8e55SAlexander Aring 		 */
601d00725caSAlexander Aring 		if (seq < oval)
602d00725caSAlexander Aring 			dlm_send_ack(node->nodeid, oval);
603489d8e55SAlexander Aring 
604489d8e55SAlexander Aring 		log_print_ratelimited("ignore dlm msg because seq mismatch, seq: %u, expected: %u, nodeid: %d",
605d00725caSAlexander Aring 				      seq, oval, node->nodeid);
606489d8e55SAlexander Aring 	}
607489d8e55SAlexander Aring }
608489d8e55SAlexander Aring 
dlm_opts_check_msglen(const union dlm_packet * p,uint16_t msglen,int nodeid)60911519351SAlexander Aring static int dlm_opts_check_msglen(const union dlm_packet *p, uint16_t msglen,
61011519351SAlexander Aring 				 int nodeid)
611489d8e55SAlexander Aring {
612489d8e55SAlexander Aring 	int len = msglen;
613489d8e55SAlexander Aring 
614489d8e55SAlexander Aring 	/* we only trust outer header msglen because
615489d8e55SAlexander Aring 	 * it's checked against receive buffer length.
616489d8e55SAlexander Aring 	 */
617489d8e55SAlexander Aring 	if (len < sizeof(struct dlm_opts))
618489d8e55SAlexander Aring 		return -1;
619489d8e55SAlexander Aring 	len -= sizeof(struct dlm_opts);
620489d8e55SAlexander Aring 
621489d8e55SAlexander Aring 	if (len < le16_to_cpu(p->opts.o_optlen))
622489d8e55SAlexander Aring 		return -1;
623489d8e55SAlexander Aring 	len -= le16_to_cpu(p->opts.o_optlen);
624489d8e55SAlexander Aring 
625489d8e55SAlexander Aring 	switch (p->opts.o_nextcmd) {
626489d8e55SAlexander Aring 	case DLM_FIN:
627489d8e55SAlexander Aring 		if (len < sizeof(struct dlm_header)) {
628489d8e55SAlexander Aring 			log_print("fin too small: %d, will skip this message from node %d",
629489d8e55SAlexander Aring 				  len, nodeid);
630489d8e55SAlexander Aring 			return -1;
631489d8e55SAlexander Aring 		}
632489d8e55SAlexander Aring 
633489d8e55SAlexander Aring 		break;
634489d8e55SAlexander Aring 	case DLM_MSG:
635489d8e55SAlexander Aring 		if (len < sizeof(struct dlm_message)) {
636489d8e55SAlexander Aring 			log_print("msg too small: %d, will skip this message from node %d",
637489d8e55SAlexander Aring 				  msglen, nodeid);
638489d8e55SAlexander Aring 			return -1;
639489d8e55SAlexander Aring 		}
640489d8e55SAlexander Aring 
641489d8e55SAlexander Aring 		break;
642489d8e55SAlexander Aring 	case DLM_RCOM:
643489d8e55SAlexander Aring 		if (len < sizeof(struct dlm_rcom)) {
644489d8e55SAlexander Aring 			log_print("rcom msg too small: %d, will skip this message from node %d",
645489d8e55SAlexander Aring 				  len, nodeid);
646489d8e55SAlexander Aring 			return -1;
647489d8e55SAlexander Aring 		}
648489d8e55SAlexander Aring 
649489d8e55SAlexander Aring 		break;
650489d8e55SAlexander Aring 	default:
651489d8e55SAlexander Aring 		log_print("unsupported o_nextcmd received: %u, will skip this message from node %d",
652489d8e55SAlexander Aring 			  p->opts.o_nextcmd, nodeid);
653489d8e55SAlexander Aring 		return -1;
654489d8e55SAlexander Aring 	}
655489d8e55SAlexander Aring 
656489d8e55SAlexander Aring 	return 0;
657489d8e55SAlexander Aring }
658489d8e55SAlexander Aring 
dlm_midcomms_receive_buffer_3_2(const union dlm_packet * p,int nodeid)65911519351SAlexander Aring static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodeid)
660489d8e55SAlexander Aring {
661489d8e55SAlexander Aring 	uint16_t msglen = le16_to_cpu(p->header.h_length);
662489d8e55SAlexander Aring 	struct midcomms_node *node;
663489d8e55SAlexander Aring 	uint32_t seq;
664489d8e55SAlexander Aring 	int ret, idx;
665489d8e55SAlexander Aring 
666489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
66763e711b0SAlexander Aring 	node = nodeid2node(nodeid);
66863e711b0SAlexander Aring 	if (WARN_ON_ONCE(!node))
669489d8e55SAlexander Aring 		goto out;
670489d8e55SAlexander Aring 
67163e711b0SAlexander Aring 	switch (node->version) {
67263e711b0SAlexander Aring 	case DLM_VERSION_NOT_SET:
67363e711b0SAlexander Aring 		node->version = DLM_VERSION_3_2;
67463e711b0SAlexander Aring 		wake_up(&node->shutdown_wait);
67563e711b0SAlexander Aring 		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
67663e711b0SAlexander Aring 			  node->nodeid);
67763e711b0SAlexander Aring 
67863e711b0SAlexander Aring 		spin_lock(&node->state_lock);
67963e711b0SAlexander Aring 		switch (node->state) {
68063e711b0SAlexander Aring 		case DLM_CLOSED:
68163e711b0SAlexander Aring 			node->state = DLM_ESTABLISHED;
68263e711b0SAlexander Aring 			pr_debug("switch node %d to state %s\n",
68363e711b0SAlexander Aring 				 node->nodeid, dlm_state_str(node->state));
68463e711b0SAlexander Aring 			break;
68563e711b0SAlexander Aring 		default:
68663e711b0SAlexander Aring 			break;
68763e711b0SAlexander Aring 		}
68863e711b0SAlexander Aring 		spin_unlock(&node->state_lock);
68963e711b0SAlexander Aring 
69063e711b0SAlexander Aring 		break;
69163e711b0SAlexander Aring 	case DLM_VERSION_3_2:
69263e711b0SAlexander Aring 		break;
69363e711b0SAlexander Aring 	default:
69463e711b0SAlexander Aring 		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
69563e711b0SAlexander Aring 				      DLM_VERSION_3_2, node->nodeid, node->version);
69663e711b0SAlexander Aring 		goto out;
69763e711b0SAlexander Aring 	}
69863e711b0SAlexander Aring 
699489d8e55SAlexander Aring 	switch (p->header.h_cmd) {
700489d8e55SAlexander Aring 	case DLM_RCOM:
701489d8e55SAlexander Aring 		/* these rcom message we use to determine version.
702489d8e55SAlexander Aring 		 * they have their own retransmission handling and
703489d8e55SAlexander Aring 		 * are the first messages of dlm.
704489d8e55SAlexander Aring 		 *
705489d8e55SAlexander Aring 		 * length already checked.
706489d8e55SAlexander Aring 		 */
70714a92fd7SAlexander Aring 		switch (p->rcom.rc_type) {
70814a92fd7SAlexander Aring 		case cpu_to_le32(DLM_RCOM_NAMES):
709489d8e55SAlexander Aring 			fallthrough;
71014a92fd7SAlexander Aring 		case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
711489d8e55SAlexander Aring 			fallthrough;
71214a92fd7SAlexander Aring 		case cpu_to_le32(DLM_RCOM_STATUS):
713489d8e55SAlexander Aring 			fallthrough;
71414a92fd7SAlexander Aring 		case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
715489d8e55SAlexander Aring 			break;
716489d8e55SAlexander Aring 		default:
717489d8e55SAlexander Aring 			log_print("unsupported rcom type received: %u, will skip this message from node %d",
718489d8e55SAlexander Aring 				  le32_to_cpu(p->rcom.rc_type), nodeid);
719489d8e55SAlexander Aring 			goto out;
720489d8e55SAlexander Aring 		}
721489d8e55SAlexander Aring 
722775af207SAlexander Aring 		WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
723489d8e55SAlexander Aring 		dlm_receive_buffer(p, nodeid);
724489d8e55SAlexander Aring 		break;
725489d8e55SAlexander Aring 	case DLM_OPTS:
726489d8e55SAlexander Aring 		seq = le32_to_cpu(p->header.u.h_seq);
727489d8e55SAlexander Aring 
728489d8e55SAlexander Aring 		ret = dlm_opts_check_msglen(p, msglen, nodeid);
729489d8e55SAlexander Aring 		if (ret < 0) {
730489d8e55SAlexander Aring 			log_print("opts msg too small: %u, will skip this message from node %d",
731489d8e55SAlexander Aring 				  msglen, nodeid);
732489d8e55SAlexander Aring 			goto out;
733489d8e55SAlexander Aring 		}
734489d8e55SAlexander Aring 
735489d8e55SAlexander Aring 		p = (union dlm_packet *)((unsigned char *)p->opts.o_opts +
736489d8e55SAlexander Aring 					 le16_to_cpu(p->opts.o_optlen));
737489d8e55SAlexander Aring 
738489d8e55SAlexander Aring 		/* recheck inner msglen just if it's not garbage */
739489d8e55SAlexander Aring 		msglen = le16_to_cpu(p->header.h_length);
740489d8e55SAlexander Aring 		switch (p->header.h_cmd) {
741489d8e55SAlexander Aring 		case DLM_RCOM:
742489d8e55SAlexander Aring 			if (msglen < sizeof(struct dlm_rcom)) {
743489d8e55SAlexander Aring 				log_print("inner rcom msg too small: %u, will skip this message from node %d",
744489d8e55SAlexander Aring 					  msglen, nodeid);
745489d8e55SAlexander Aring 				goto out;
746489d8e55SAlexander Aring 			}
747489d8e55SAlexander Aring 
748489d8e55SAlexander Aring 			break;
749489d8e55SAlexander Aring 		case DLM_MSG:
750489d8e55SAlexander Aring 			if (msglen < sizeof(struct dlm_message)) {
751489d8e55SAlexander Aring 				log_print("inner msg too small: %u, will skip this message from node %d",
752489d8e55SAlexander Aring 					  msglen, nodeid);
753489d8e55SAlexander Aring 				goto out;
754489d8e55SAlexander Aring 			}
755489d8e55SAlexander Aring 
756489d8e55SAlexander Aring 			break;
757489d8e55SAlexander Aring 		case DLM_FIN:
758489d8e55SAlexander Aring 			if (msglen < sizeof(struct dlm_header)) {
759489d8e55SAlexander Aring 				log_print("inner fin too small: %u, will skip this message from node %d",
760489d8e55SAlexander Aring 					  msglen, nodeid);
761489d8e55SAlexander Aring 				goto out;
762489d8e55SAlexander Aring 			}
763489d8e55SAlexander Aring 
764489d8e55SAlexander Aring 			break;
765489d8e55SAlexander Aring 		default:
766489d8e55SAlexander Aring 			log_print("unsupported inner h_cmd received: %u, will skip this message from node %d",
767489d8e55SAlexander Aring 				  msglen, nodeid);
768489d8e55SAlexander Aring 			goto out;
769489d8e55SAlexander Aring 		}
770489d8e55SAlexander Aring 
771489d8e55SAlexander Aring 		dlm_midcomms_receive_buffer(p, node, seq);
772489d8e55SAlexander Aring 		break;
773489d8e55SAlexander Aring 	case DLM_ACK:
774489d8e55SAlexander Aring 		seq = le32_to_cpu(p->header.u.h_seq);
775489d8e55SAlexander Aring 		dlm_receive_ack(node, seq);
776489d8e55SAlexander Aring 		break;
777489d8e55SAlexander Aring 	default:
778489d8e55SAlexander Aring 		log_print("unsupported h_cmd received: %u, will skip this message from node %d",
779489d8e55SAlexander Aring 			  p->header.h_cmd, nodeid);
780489d8e55SAlexander Aring 		break;
781489d8e55SAlexander Aring 	}
782489d8e55SAlexander Aring 
783489d8e55SAlexander Aring out:
784489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
785489d8e55SAlexander Aring }
786489d8e55SAlexander Aring 
dlm_midcomms_receive_buffer_3_1(const union dlm_packet * p,int nodeid)78763e711b0SAlexander Aring static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
788489d8e55SAlexander Aring {
78963e711b0SAlexander Aring 	uint16_t msglen = le16_to_cpu(p->header.h_length);
79063e711b0SAlexander Aring 	struct midcomms_node *node;
79163e711b0SAlexander Aring 	int idx;
79263e711b0SAlexander Aring 
79363e711b0SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
79463e711b0SAlexander Aring 	node = nodeid2node(nodeid);
79563e711b0SAlexander Aring 	if (WARN_ON_ONCE(!node)) {
79663e711b0SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
79763e711b0SAlexander Aring 		return;
79863e711b0SAlexander Aring 	}
79963e711b0SAlexander Aring 
800489d8e55SAlexander Aring 	switch (node->version) {
801489d8e55SAlexander Aring 	case DLM_VERSION_NOT_SET:
802489d8e55SAlexander Aring 		node->version = DLM_VERSION_3_1;
803b8b750e0SAlexander Aring 		wake_up(&node->shutdown_wait);
804489d8e55SAlexander Aring 		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_1,
805489d8e55SAlexander Aring 			  node->nodeid);
806489d8e55SAlexander Aring 		break;
807489d8e55SAlexander Aring 	case DLM_VERSION_3_1:
808489d8e55SAlexander Aring 		break;
809489d8e55SAlexander Aring 	default:
810489d8e55SAlexander Aring 		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
811489d8e55SAlexander Aring 				      DLM_VERSION_3_1, node->nodeid, node->version);
812489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
813489d8e55SAlexander Aring 		return;
814489d8e55SAlexander Aring 	}
815489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
816489d8e55SAlexander Aring 
817489d8e55SAlexander Aring 	switch (p->header.h_cmd) {
818489d8e55SAlexander Aring 	case DLM_RCOM:
819489d8e55SAlexander Aring 		/* length already checked */
820489d8e55SAlexander Aring 		break;
821489d8e55SAlexander Aring 	case DLM_MSG:
822489d8e55SAlexander Aring 		if (msglen < sizeof(struct dlm_message)) {
823489d8e55SAlexander Aring 			log_print("msg too small: %u, will skip this message from node %d",
824489d8e55SAlexander Aring 				  msglen, nodeid);
825489d8e55SAlexander Aring 			return;
826489d8e55SAlexander Aring 		}
827489d8e55SAlexander Aring 
828489d8e55SAlexander Aring 		break;
829489d8e55SAlexander Aring 	default:
830489d8e55SAlexander Aring 		log_print("unsupported h_cmd received: %u, will skip this message from node %d",
831489d8e55SAlexander Aring 			  p->header.h_cmd, nodeid);
832489d8e55SAlexander Aring 		return;
833489d8e55SAlexander Aring 	}
834489d8e55SAlexander Aring 
835489d8e55SAlexander Aring 	dlm_receive_buffer(p, nodeid);
836a070a91cSAlexander Aring }
837a070a91cSAlexander Aring 
dlm_validate_incoming_buffer(int nodeid,unsigned char * buf,int len)838dbb751ffSAlexander Aring int dlm_validate_incoming_buffer(int nodeid, unsigned char *buf, int len)
839e7fd4179SDavid Teigland {
8404798cbbfSAlexander Aring 	const unsigned char *ptr = buf;
8414798cbbfSAlexander Aring 	const struct dlm_header *hd;
842e7fd4179SDavid Teigland 	uint16_t msglen;
8434798cbbfSAlexander Aring 	int ret = 0;
844e7fd4179SDavid Teigland 
8454798cbbfSAlexander Aring 	while (len >= sizeof(struct dlm_header)) {
8464798cbbfSAlexander Aring 		hd = (struct dlm_header *)ptr;
847e7fd4179SDavid Teigland 
848d10a0b88SAlexander Aring 		/* no message should be more than DLM_MAX_SOCKET_BUFSIZE or
849df9e06b8SAlexander Aring 		 * less than dlm_header size.
850df9e06b8SAlexander Aring 		 *
851df9e06b8SAlexander Aring 		 * Some messages does not have a 8 byte length boundary yet
852df9e06b8SAlexander Aring 		 * which can occur in a unaligned memory access of some dlm
853df9e06b8SAlexander Aring 		 * messages. However this problem need to be fixed at the
854df9e06b8SAlexander Aring 		 * sending side, for now it seems nobody run into architecture
855df9e06b8SAlexander Aring 		 * related issues yet but it slows down some processing.
856df9e06b8SAlexander Aring 		 * Fixing this issue should be scheduled in future by doing
857df9e06b8SAlexander Aring 		 * the next major version bump.
8584798cbbfSAlexander Aring 		 */
859df9e06b8SAlexander Aring 		msglen = le16_to_cpu(hd->h_length);
860d10a0b88SAlexander Aring 		if (msglen > DLM_MAX_SOCKET_BUFSIZE ||
861710176e8SAlexander Aring 		    msglen < sizeof(struct dlm_header)) {
862710176e8SAlexander Aring 			log_print("received invalid length header: %u from node %d, will abort message parsing",
863710176e8SAlexander Aring 				  msglen, nodeid);
8644798cbbfSAlexander Aring 			return -EBADMSG;
865eef7d739SAl Viro 		}
866e7fd4179SDavid Teigland 
8674798cbbfSAlexander Aring 		/* caller will take care that leftover
8684798cbbfSAlexander Aring 		 * will be parsed next call with more data
8694798cbbfSAlexander Aring 		 */
870e7fd4179SDavid Teigland 		if (msglen > len)
871e7fd4179SDavid Teigland 			break;
872e7fd4179SDavid Teigland 
873dbb751ffSAlexander Aring 		ret += msglen;
874dbb751ffSAlexander Aring 		len -= msglen;
875dbb751ffSAlexander Aring 		ptr += msglen;
876dbb751ffSAlexander Aring 	}
877dbb751ffSAlexander Aring 
878dbb751ffSAlexander Aring 	return ret;
879dbb751ffSAlexander Aring }
880dbb751ffSAlexander Aring 
881dbb751ffSAlexander Aring /*
882dbb751ffSAlexander Aring  * Called from the low-level comms layer to process a buffer of
883dbb751ffSAlexander Aring  * commands.
884dbb751ffSAlexander Aring  */
dlm_process_incoming_buffer(int nodeid,unsigned char * buf,int len)885dbb751ffSAlexander Aring int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
886dbb751ffSAlexander Aring {
887dbb751ffSAlexander Aring 	const unsigned char *ptr = buf;
888dbb751ffSAlexander Aring 	const struct dlm_header *hd;
889dbb751ffSAlexander Aring 	uint16_t msglen;
890dbb751ffSAlexander Aring 	int ret = 0;
891dbb751ffSAlexander Aring 
892dbb751ffSAlexander Aring 	while (len >= sizeof(struct dlm_header)) {
893dbb751ffSAlexander Aring 		hd = (struct dlm_header *)ptr;
894dbb751ffSAlexander Aring 
895dbb751ffSAlexander Aring 		msglen = le16_to_cpu(hd->h_length);
896dbb751ffSAlexander Aring 		if (msglen > len)
897dbb751ffSAlexander Aring 			break;
898dbb751ffSAlexander Aring 
899658bd576SAlexander Aring 		switch (hd->h_version) {
900658bd576SAlexander Aring 		case cpu_to_le32(DLM_VERSION_3_1):
90111519351SAlexander Aring 			dlm_midcomms_receive_buffer_3_1((const union dlm_packet *)ptr, nodeid);
9024798cbbfSAlexander Aring 			break;
903658bd576SAlexander Aring 		case cpu_to_le32(DLM_VERSION_3_2):
90411519351SAlexander Aring 			dlm_midcomms_receive_buffer_3_2((const union dlm_packet *)ptr, nodeid);
9054798cbbfSAlexander Aring 			break;
9064798cbbfSAlexander Aring 		default:
907489d8e55SAlexander Aring 			log_print("received invalid version header: %u from node %d, will skip this message",
908489d8e55SAlexander Aring 				  le32_to_cpu(hd->h_version), nodeid);
909489d8e55SAlexander Aring 			break;
9104798cbbfSAlexander Aring 		}
911e7fd4179SDavid Teigland 
912e7fd4179SDavid Teigland 		ret += msglen;
913e7fd4179SDavid Teigland 		len -= msglen;
9144798cbbfSAlexander Aring 		ptr += msglen;
915e7fd4179SDavid Teigland 	}
916e7fd4179SDavid Teigland 
9174798cbbfSAlexander Aring 	return ret;
918e7fd4179SDavid Teigland }
919489d8e55SAlexander Aring 
dlm_midcomms_unack_msg_resend(int nodeid)920489d8e55SAlexander Aring void dlm_midcomms_unack_msg_resend(int nodeid)
921489d8e55SAlexander Aring {
922489d8e55SAlexander Aring 	struct midcomms_node *node;
923489d8e55SAlexander Aring 	struct dlm_mhandle *mh;
924489d8e55SAlexander Aring 	int idx, ret;
925489d8e55SAlexander Aring 
926489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
92763e711b0SAlexander Aring 	node = nodeid2node(nodeid);
92863e711b0SAlexander Aring 	if (WARN_ON_ONCE(!node)) {
929489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
930489d8e55SAlexander Aring 		return;
931489d8e55SAlexander Aring 	}
932489d8e55SAlexander Aring 
933489d8e55SAlexander Aring 	/* old protocol, we don't support to retransmit on failure */
934489d8e55SAlexander Aring 	switch (node->version) {
935489d8e55SAlexander Aring 	case DLM_VERSION_3_2:
936489d8e55SAlexander Aring 		break;
937489d8e55SAlexander Aring 	default:
938489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
939489d8e55SAlexander Aring 		return;
940489d8e55SAlexander Aring 	}
941489d8e55SAlexander Aring 
942489d8e55SAlexander Aring 	rcu_read_lock();
943489d8e55SAlexander Aring 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
944489d8e55SAlexander Aring 		if (!mh->committed)
945489d8e55SAlexander Aring 			continue;
946489d8e55SAlexander Aring 
947489d8e55SAlexander Aring 		ret = dlm_lowcomms_resend_msg(mh->msg);
948489d8e55SAlexander Aring 		if (!ret)
949489d8e55SAlexander Aring 			log_print_ratelimited("retransmit dlm msg, seq %u, nodeid %d",
950489d8e55SAlexander Aring 					      mh->seq, node->nodeid);
951489d8e55SAlexander Aring 	}
952489d8e55SAlexander Aring 	rcu_read_unlock();
953489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
954489d8e55SAlexander Aring }
955489d8e55SAlexander Aring 
dlm_fill_opts_header(struct dlm_opts * opts,uint16_t inner_len,uint32_t seq)956489d8e55SAlexander Aring static void dlm_fill_opts_header(struct dlm_opts *opts, uint16_t inner_len,
957489d8e55SAlexander Aring 				 uint32_t seq)
958489d8e55SAlexander Aring {
959489d8e55SAlexander Aring 	opts->o_header.h_cmd = DLM_OPTS;
9603428785aSAlexander Aring 	opts->o_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
9613428785aSAlexander Aring 	opts->o_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
9623428785aSAlexander Aring 	opts->o_header.h_length = cpu_to_le16(DLM_MIDCOMMS_OPT_LEN + inner_len);
9633428785aSAlexander Aring 	opts->o_header.u.h_seq = cpu_to_le32(seq);
964489d8e55SAlexander Aring }
965489d8e55SAlexander Aring 
midcomms_new_msg_cb(void * data)9665c16febbSAlexander Aring static void midcomms_new_msg_cb(void *data)
967489d8e55SAlexander Aring {
9685c16febbSAlexander Aring 	struct dlm_mhandle *mh = data;
9695c16febbSAlexander Aring 
970489d8e55SAlexander Aring 	atomic_inc(&mh->node->send_queue_cnt);
971489d8e55SAlexander Aring 
972dbb751ffSAlexander Aring 	spin_lock_bh(&mh->node->send_queue_lock);
973489d8e55SAlexander Aring 	list_add_tail_rcu(&mh->list, &mh->node->send_queue);
974dbb751ffSAlexander Aring 	spin_unlock_bh(&mh->node->send_queue_lock);
975489d8e55SAlexander Aring 
976d00725caSAlexander Aring 	mh->seq = atomic_fetch_inc(&mh->node->seq_send);
977489d8e55SAlexander Aring }
978489d8e55SAlexander Aring 
dlm_midcomms_get_msg_3_2(struct dlm_mhandle * mh,int nodeid,int len,gfp_t allocation,char ** ppc)979489d8e55SAlexander Aring static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int nodeid,
980489d8e55SAlexander Aring 						int len, gfp_t allocation, char **ppc)
981489d8e55SAlexander Aring {
982489d8e55SAlexander Aring 	struct dlm_opts *opts;
983489d8e55SAlexander Aring 	struct dlm_msg *msg;
984489d8e55SAlexander Aring 
985489d8e55SAlexander Aring 	msg = dlm_lowcomms_new_msg(nodeid, len + DLM_MIDCOMMS_OPT_LEN,
986489d8e55SAlexander Aring 				   allocation, ppc, midcomms_new_msg_cb, mh);
987489d8e55SAlexander Aring 	if (!msg)
988489d8e55SAlexander Aring 		return NULL;
989489d8e55SAlexander Aring 
990489d8e55SAlexander Aring 	opts = (struct dlm_opts *)*ppc;
991489d8e55SAlexander Aring 	mh->opts = opts;
992489d8e55SAlexander Aring 
993489d8e55SAlexander Aring 	/* add possible options here */
994489d8e55SAlexander Aring 	dlm_fill_opts_header(opts, len, mh->seq);
995489d8e55SAlexander Aring 
996489d8e55SAlexander Aring 	*ppc += sizeof(*opts);
9975b787667SAlexander Aring 	mh->inner_p = (const union dlm_packet *)*ppc;
998489d8e55SAlexander Aring 	return msg;
999489d8e55SAlexander Aring }
1000489d8e55SAlexander Aring 
1001a8449f23SAlexander Aring /* avoid false positive for nodes_srcu, unlock happens in
1002a8449f23SAlexander Aring  * dlm_midcomms_commit_mhandle which is a must call if success
1003a8449f23SAlexander Aring  */
1004a8449f23SAlexander Aring #ifndef __CHECKER__
dlm_midcomms_get_mhandle(int nodeid,int len,gfp_t allocation,char ** ppc)1005489d8e55SAlexander Aring struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
1006489d8e55SAlexander Aring 					     gfp_t allocation, char **ppc)
1007489d8e55SAlexander Aring {
1008489d8e55SAlexander Aring 	struct midcomms_node *node;
1009489d8e55SAlexander Aring 	struct dlm_mhandle *mh;
1010489d8e55SAlexander Aring 	struct dlm_msg *msg;
1011489d8e55SAlexander Aring 	int idx;
1012489d8e55SAlexander Aring 
1013489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
101463e711b0SAlexander Aring 	node = nodeid2node(nodeid);
101563e711b0SAlexander Aring 	if (WARN_ON_ONCE(!node))
1016489d8e55SAlexander Aring 		goto err;
1017489d8e55SAlexander Aring 
1018489d8e55SAlexander Aring 	/* this is a bug, however we going on and hope it will be resolved */
1019775af207SAlexander Aring 	WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
1020489d8e55SAlexander Aring 
1021e1711fe3SAlexander Aring 	mh = dlm_allocate_mhandle(allocation);
1022489d8e55SAlexander Aring 	if (!mh)
1023489d8e55SAlexander Aring 		goto err;
1024489d8e55SAlexander Aring 
10256c547f26SAlexander Aring 	mh->committed = false;
10266c547f26SAlexander Aring 	mh->ack_rcv = NULL;
1027489d8e55SAlexander Aring 	mh->idx = idx;
1028489d8e55SAlexander Aring 	mh->node = node;
1029489d8e55SAlexander Aring 
1030489d8e55SAlexander Aring 	switch (node->version) {
1031489d8e55SAlexander Aring 	case DLM_VERSION_3_1:
1032489d8e55SAlexander Aring 		msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc,
1033489d8e55SAlexander Aring 					   NULL, NULL);
1034489d8e55SAlexander Aring 		if (!msg) {
10356c547f26SAlexander Aring 			dlm_free_mhandle(mh);
1036489d8e55SAlexander Aring 			goto err;
1037489d8e55SAlexander Aring 		}
1038489d8e55SAlexander Aring 
1039489d8e55SAlexander Aring 		break;
1040489d8e55SAlexander Aring 	case DLM_VERSION_3_2:
1041*95411515SAlexander Aring 		/* send ack back if necessary */
1042*95411515SAlexander Aring 		dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD);
1043*95411515SAlexander Aring 
1044489d8e55SAlexander Aring 		msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation,
1045489d8e55SAlexander Aring 					       ppc);
1046489d8e55SAlexander Aring 		if (!msg) {
10476c547f26SAlexander Aring 			dlm_free_mhandle(mh);
1048489d8e55SAlexander Aring 			goto err;
1049489d8e55SAlexander Aring 		}
1050489d8e55SAlexander Aring 		break;
1051489d8e55SAlexander Aring 	default:
10526c547f26SAlexander Aring 		dlm_free_mhandle(mh);
1053775af207SAlexander Aring 		WARN_ON_ONCE(1);
1054489d8e55SAlexander Aring 		goto err;
1055489d8e55SAlexander Aring 	}
1056489d8e55SAlexander Aring 
1057489d8e55SAlexander Aring 	mh->msg = msg;
1058489d8e55SAlexander Aring 
1059489d8e55SAlexander Aring 	/* keep in mind that is a must to call
1060489d8e55SAlexander Aring 	 * dlm_midcomms_commit_msg() which releases
1061489d8e55SAlexander Aring 	 * nodes_srcu using mh->idx which is assumed
1062489d8e55SAlexander Aring 	 * here that the application will call it.
1063489d8e55SAlexander Aring 	 */
1064489d8e55SAlexander Aring 	return mh;
1065489d8e55SAlexander Aring 
1066489d8e55SAlexander Aring err:
1067489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1068489d8e55SAlexander Aring 	return NULL;
1069489d8e55SAlexander Aring }
1070a8449f23SAlexander Aring #endif
1071489d8e55SAlexander Aring 
dlm_midcomms_commit_msg_3_2_trace(const struct dlm_mhandle * mh,const void * name,int namelen)1072e01c4b7bSAlexander Aring static void dlm_midcomms_commit_msg_3_2_trace(const struct dlm_mhandle *mh,
1073e01c4b7bSAlexander Aring 					      const void *name, int namelen)
1074e01c4b7bSAlexander Aring {
1075e01c4b7bSAlexander Aring 	switch (mh->inner_p->header.h_cmd) {
1076e01c4b7bSAlexander Aring 	case DLM_MSG:
107717827754SAlexander Aring 		trace_dlm_send_message(mh->node->nodeid, mh->seq,
107817827754SAlexander Aring 				       &mh->inner_p->message,
1079e01c4b7bSAlexander Aring 				       name, namelen);
1080e01c4b7bSAlexander Aring 		break;
1081e01c4b7bSAlexander Aring 	case DLM_RCOM:
108217827754SAlexander Aring 		trace_dlm_send_rcom(mh->node->nodeid, mh->seq,
108317827754SAlexander Aring 				    &mh->inner_p->rcom);
1084e01c4b7bSAlexander Aring 		break;
1085e01c4b7bSAlexander Aring 	default:
1086e01c4b7bSAlexander Aring 		/* nothing to trace */
1087e01c4b7bSAlexander Aring 		break;
1088e01c4b7bSAlexander Aring 	}
1089e01c4b7bSAlexander Aring }
1090e01c4b7bSAlexander Aring 
dlm_midcomms_commit_msg_3_2(struct dlm_mhandle * mh,const void * name,int namelen)1091e01c4b7bSAlexander Aring static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh,
1092e01c4b7bSAlexander Aring 					const void *name, int namelen)
1093489d8e55SAlexander Aring {
1094489d8e55SAlexander Aring 	/* nexthdr chain for fast lookup */
10955b787667SAlexander Aring 	mh->opts->o_nextcmd = mh->inner_p->header.h_cmd;
1096489d8e55SAlexander Aring 	mh->committed = true;
1097e01c4b7bSAlexander Aring 	dlm_midcomms_commit_msg_3_2_trace(mh, name, namelen);
1098489d8e55SAlexander Aring 	dlm_lowcomms_commit_msg(mh->msg);
1099489d8e55SAlexander Aring }
1100489d8e55SAlexander Aring 
1101a8449f23SAlexander Aring /* avoid false positive for nodes_srcu, lock was happen in
1102a8449f23SAlexander Aring  * dlm_midcomms_get_mhandle
1103a8449f23SAlexander Aring  */
1104a8449f23SAlexander Aring #ifndef __CHECKER__
dlm_midcomms_commit_mhandle(struct dlm_mhandle * mh,const void * name,int namelen)1105e01c4b7bSAlexander Aring void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh,
1106e01c4b7bSAlexander Aring 				 const void *name, int namelen)
1107489d8e55SAlexander Aring {
1108e01c4b7bSAlexander Aring 
1109489d8e55SAlexander Aring 	switch (mh->node->version) {
1110489d8e55SAlexander Aring 	case DLM_VERSION_3_1:
1111489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, mh->idx);
1112489d8e55SAlexander Aring 
1113489d8e55SAlexander Aring 		dlm_lowcomms_commit_msg(mh->msg);
1114489d8e55SAlexander Aring 		dlm_lowcomms_put_msg(mh->msg);
1115489d8e55SAlexander Aring 		/* mh is not part of rcu list in this case */
11166c547f26SAlexander Aring 		dlm_free_mhandle(mh);
1117489d8e55SAlexander Aring 		break;
1118489d8e55SAlexander Aring 	case DLM_VERSION_3_2:
1119724b6babSAlexander Aring 		/* held rcu read lock here, because we sending the
1120724b6babSAlexander Aring 		 * dlm message out, when we do that we could receive
1121724b6babSAlexander Aring 		 * an ack back which releases the mhandle and we
1122724b6babSAlexander Aring 		 * get a use after free.
1123724b6babSAlexander Aring 		 */
1124724b6babSAlexander Aring 		rcu_read_lock();
1125e01c4b7bSAlexander Aring 		dlm_midcomms_commit_msg_3_2(mh, name, namelen);
1126489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, mh->idx);
1127724b6babSAlexander Aring 		rcu_read_unlock();
1128489d8e55SAlexander Aring 		break;
1129489d8e55SAlexander Aring 	default:
1130489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, mh->idx);
1131775af207SAlexander Aring 		WARN_ON_ONCE(1);
1132489d8e55SAlexander Aring 		break;
1133489d8e55SAlexander Aring 	}
1134489d8e55SAlexander Aring }
1135a8449f23SAlexander Aring #endif
1136489d8e55SAlexander Aring 
dlm_midcomms_start(void)1137489d8e55SAlexander Aring int dlm_midcomms_start(void)
1138489d8e55SAlexander Aring {
11398b0188b0SAlexander Aring 	return dlm_lowcomms_start();
11408b0188b0SAlexander Aring }
11418b0188b0SAlexander Aring 
dlm_midcomms_stop(void)11428b0188b0SAlexander Aring void dlm_midcomms_stop(void)
11438b0188b0SAlexander Aring {
11448b0188b0SAlexander Aring 	dlm_lowcomms_stop();
11458b0188b0SAlexander Aring }
11468b0188b0SAlexander Aring 
dlm_midcomms_init(void)11478b0188b0SAlexander Aring void dlm_midcomms_init(void)
11488b0188b0SAlexander Aring {
1149489d8e55SAlexander Aring 	int i;
1150489d8e55SAlexander Aring 
1151489d8e55SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++)
1152489d8e55SAlexander Aring 		INIT_HLIST_HEAD(&node_hash[i]);
1153489d8e55SAlexander Aring 
11548b0188b0SAlexander Aring 	dlm_lowcomms_init();
11558b0188b0SAlexander Aring }
11568b0188b0SAlexander Aring 
midcomms_node_release(struct rcu_head * rcu)115763e711b0SAlexander Aring static void midcomms_node_release(struct rcu_head *rcu)
115863e711b0SAlexander Aring {
115963e711b0SAlexander Aring 	struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
116063e711b0SAlexander Aring 
116163e711b0SAlexander Aring 	WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
116263e711b0SAlexander Aring 	dlm_send_queue_flush(node);
116363e711b0SAlexander Aring 	kfree(node);
116463e711b0SAlexander Aring }
116563e711b0SAlexander Aring 
dlm_midcomms_exit(void)11668b0188b0SAlexander Aring void dlm_midcomms_exit(void)
11678b0188b0SAlexander Aring {
116863e711b0SAlexander Aring 	struct midcomms_node *node;
116963e711b0SAlexander Aring 	int i, idx;
117063e711b0SAlexander Aring 
117163e711b0SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
117263e711b0SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
117363e711b0SAlexander Aring 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
117463e711b0SAlexander Aring 			dlm_delete_debug_comms_file(node->debugfs);
117563e711b0SAlexander Aring 
117663e711b0SAlexander Aring 			spin_lock(&nodes_lock);
117763e711b0SAlexander Aring 			hlist_del_rcu(&node->hlist);
117863e711b0SAlexander Aring 			spin_unlock(&nodes_lock);
117963e711b0SAlexander Aring 
118063e711b0SAlexander Aring 			call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
118163e711b0SAlexander Aring 		}
118263e711b0SAlexander Aring 	}
118363e711b0SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
118463e711b0SAlexander Aring 
11858b0188b0SAlexander Aring 	dlm_lowcomms_exit();
1186489d8e55SAlexander Aring }
1187489d8e55SAlexander Aring 
dlm_act_fin_ack_rcv(struct midcomms_node * node)1188489d8e55SAlexander Aring static void dlm_act_fin_ack_rcv(struct midcomms_node *node)
1189489d8e55SAlexander Aring {
1190489d8e55SAlexander Aring 	spin_lock(&node->state_lock);
1191489d8e55SAlexander Aring 	pr_debug("receive active fin ack from node %d with state %s\n",
1192489d8e55SAlexander Aring 		 node->nodeid, dlm_state_str(node->state));
1193489d8e55SAlexander Aring 
1194489d8e55SAlexander Aring 	switch (node->state) {
1195489d8e55SAlexander Aring 	case DLM_FIN_WAIT1:
1196489d8e55SAlexander Aring 		node->state = DLM_FIN_WAIT2;
1197489d8e55SAlexander Aring 		pr_debug("switch node %d to state %s\n",
1198489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1199489d8e55SAlexander Aring 		break;
1200489d8e55SAlexander Aring 	case DLM_CLOSING:
1201489d8e55SAlexander Aring 		midcomms_node_reset(node);
1202489d8e55SAlexander Aring 		pr_debug("switch node %d to state %s\n",
1203489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1204489d8e55SAlexander Aring 		break;
1205489d8e55SAlexander Aring 	case DLM_CLOSED:
1206489d8e55SAlexander Aring 		/* not valid but somehow we got what we want */
1207489d8e55SAlexander Aring 		wake_up(&node->shutdown_wait);
1208489d8e55SAlexander Aring 		break;
1209489d8e55SAlexander Aring 	default:
1210489d8e55SAlexander Aring 		spin_unlock(&node->state_lock);
121131864097SAlexander Aring 		log_print("%s: unexpected state: %d",
1212489d8e55SAlexander Aring 			  __func__, node->state);
1213775af207SAlexander Aring 		WARN_ON_ONCE(1);
1214489d8e55SAlexander Aring 		return;
1215489d8e55SAlexander Aring 	}
1216489d8e55SAlexander Aring 	spin_unlock(&node->state_lock);
1217489d8e55SAlexander Aring }
1218489d8e55SAlexander Aring 
dlm_midcomms_add_member(int nodeid)1219489d8e55SAlexander Aring void dlm_midcomms_add_member(int nodeid)
1220489d8e55SAlexander Aring {
1221489d8e55SAlexander Aring 	struct midcomms_node *node;
1222489d8e55SAlexander Aring 	int idx;
1223489d8e55SAlexander Aring 
1224489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
122563e711b0SAlexander Aring 	node = nodeid2node(nodeid);
122663e711b0SAlexander Aring 	if (WARN_ON_ONCE(!node)) {
1227489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
1228489d8e55SAlexander Aring 		return;
1229489d8e55SAlexander Aring 	}
1230489d8e55SAlexander Aring 
1231489d8e55SAlexander Aring 	spin_lock(&node->state_lock);
1232489d8e55SAlexander Aring 	if (!node->users) {
1233489d8e55SAlexander Aring 		pr_debug("receive add member from node %d with state %s\n",
1234489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1235489d8e55SAlexander Aring 		switch (node->state) {
1236489d8e55SAlexander Aring 		case DLM_ESTABLISHED:
1237489d8e55SAlexander Aring 			break;
1238489d8e55SAlexander Aring 		case DLM_CLOSED:
1239489d8e55SAlexander Aring 			node->state = DLM_ESTABLISHED;
1240489d8e55SAlexander Aring 			pr_debug("switch node %d to state %s\n",
1241489d8e55SAlexander Aring 				 node->nodeid, dlm_state_str(node->state));
1242489d8e55SAlexander Aring 			break;
1243489d8e55SAlexander Aring 		default:
1244489d8e55SAlexander Aring 			/* some invalid state passive shutdown
1245489d8e55SAlexander Aring 			 * was failed, we try to reset and
1246489d8e55SAlexander Aring 			 * hope it will go on.
1247489d8e55SAlexander Aring 			 */
12487d3848c0SColin Ian King 			log_print("reset node %d because shutdown stuck",
1249489d8e55SAlexander Aring 				  node->nodeid);
1250489d8e55SAlexander Aring 
1251489d8e55SAlexander Aring 			midcomms_node_reset(node);
1252489d8e55SAlexander Aring 			node->state = DLM_ESTABLISHED;
1253489d8e55SAlexander Aring 			break;
1254489d8e55SAlexander Aring 		}
1255489d8e55SAlexander Aring 	}
1256489d8e55SAlexander Aring 
1257489d8e55SAlexander Aring 	node->users++;
12581aafd9c2SAlexander Aring 	pr_debug("node %d users inc count %d\n", nodeid, node->users);
1259489d8e55SAlexander Aring 	spin_unlock(&node->state_lock);
1260489d8e55SAlexander Aring 
1261489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1262489d8e55SAlexander Aring }
1263489d8e55SAlexander Aring 
dlm_midcomms_remove_member(int nodeid)1264489d8e55SAlexander Aring void dlm_midcomms_remove_member(int nodeid)
1265489d8e55SAlexander Aring {
1266489d8e55SAlexander Aring 	struct midcomms_node *node;
1267489d8e55SAlexander Aring 	int idx;
1268489d8e55SAlexander Aring 
1269489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
127063e711b0SAlexander Aring 	node = nodeid2node(nodeid);
1271e4393e9eSAlexander Aring 	/* in case of dlm_midcomms_close() removes node */
1272e4393e9eSAlexander Aring 	if (!node) {
1273489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
1274489d8e55SAlexander Aring 		return;
1275489d8e55SAlexander Aring 	}
1276489d8e55SAlexander Aring 
1277489d8e55SAlexander Aring 	spin_lock(&node->state_lock);
1278e4393e9eSAlexander Aring 	/* case of dlm_midcomms_addr() created node but
1279e4393e9eSAlexander Aring 	 * was not added before because dlm_midcomms_close()
1280e4393e9eSAlexander Aring 	 * removed the node
1281e4393e9eSAlexander Aring 	 */
1282e4393e9eSAlexander Aring 	if (!node->users) {
1283e4393e9eSAlexander Aring 		spin_unlock(&node->state_lock);
1284e4393e9eSAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
1285e4393e9eSAlexander Aring 		return;
1286e4393e9eSAlexander Aring 	}
1287e4393e9eSAlexander Aring 
1288489d8e55SAlexander Aring 	node->users--;
12891aafd9c2SAlexander Aring 	pr_debug("node %d users dec count %d\n", nodeid, node->users);
1290489d8e55SAlexander Aring 
1291489d8e55SAlexander Aring 	/* hitting users count to zero means the
1292489d8e55SAlexander Aring 	 * other side is running dlm_midcomms_stop()
1293489d8e55SAlexander Aring 	 * we meet us to have a clean disconnect.
1294489d8e55SAlexander Aring 	 */
1295489d8e55SAlexander Aring 	if (node->users == 0) {
1296489d8e55SAlexander Aring 		pr_debug("receive remove member from node %d with state %s\n",
1297489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1298489d8e55SAlexander Aring 		switch (node->state) {
1299489d8e55SAlexander Aring 		case DLM_ESTABLISHED:
1300489d8e55SAlexander Aring 			break;
1301489d8e55SAlexander Aring 		case DLM_CLOSE_WAIT:
1302489d8e55SAlexander Aring 			/* passive shutdown DLM_LAST_ACK case 2 */
1303489d8e55SAlexander Aring 			node->state = DLM_LAST_ACK;
1304489d8e55SAlexander Aring 			pr_debug("switch node %d to state %s case 2\n",
1305489d8e55SAlexander Aring 				 node->nodeid, dlm_state_str(node->state));
1306a5849636SAlexander Aring 			set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
1307a5849636SAlexander Aring 			dlm_send_fin(node, dlm_pas_fin_ack_rcv);
1308a5849636SAlexander Aring 			break;
1309489d8e55SAlexander Aring 		case DLM_LAST_ACK:
1310489d8e55SAlexander Aring 			/* probably receive fin caught it, do nothing */
1311489d8e55SAlexander Aring 			break;
1312489d8e55SAlexander Aring 		case DLM_CLOSED:
1313489d8e55SAlexander Aring 			/* already gone, do nothing */
1314489d8e55SAlexander Aring 			break;
1315489d8e55SAlexander Aring 		default:
131631864097SAlexander Aring 			log_print("%s: unexpected state: %d",
1317489d8e55SAlexander Aring 				  __func__, node->state);
1318489d8e55SAlexander Aring 			break;
1319489d8e55SAlexander Aring 		}
1320489d8e55SAlexander Aring 	}
1321489d8e55SAlexander Aring 	spin_unlock(&node->state_lock);
1322489d8e55SAlexander Aring 
1323489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1324489d8e55SAlexander Aring }
1325489d8e55SAlexander Aring 
dlm_midcomms_version_wait(void)1326b8b750e0SAlexander Aring void dlm_midcomms_version_wait(void)
1327b8b750e0SAlexander Aring {
1328b8b750e0SAlexander Aring 	struct midcomms_node *node;
1329b8b750e0SAlexander Aring 	int i, idx, ret;
1330b8b750e0SAlexander Aring 
1331b8b750e0SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
1332b8b750e0SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
1333b8b750e0SAlexander Aring 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
1334b8b750e0SAlexander Aring 			ret = wait_event_timeout(node->shutdown_wait,
1335b8b750e0SAlexander Aring 						 node->version != DLM_VERSION_NOT_SET ||
1336b8b750e0SAlexander Aring 						 node->state == DLM_CLOSED ||
1337b8b750e0SAlexander Aring 						 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
1338b8b750e0SAlexander Aring 						 DLM_SHUTDOWN_TIMEOUT);
1339b8b750e0SAlexander Aring 			if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
1340b8b750e0SAlexander Aring 				pr_debug("version wait timed out for node %d with state %s\n",
1341b8b750e0SAlexander Aring 					 node->nodeid, dlm_state_str(node->state));
1342b8b750e0SAlexander Aring 		}
1343b8b750e0SAlexander Aring 	}
1344b8b750e0SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1345b8b750e0SAlexander Aring }
1346b8b750e0SAlexander Aring 
midcomms_shutdown(struct midcomms_node * node)1347489d8e55SAlexander Aring static void midcomms_shutdown(struct midcomms_node *node)
1348489d8e55SAlexander Aring {
1349489d8e55SAlexander Aring 	int ret;
1350489d8e55SAlexander Aring 
1351489d8e55SAlexander Aring 	/* old protocol, we don't wait for pending operations */
1352489d8e55SAlexander Aring 	switch (node->version) {
1353489d8e55SAlexander Aring 	case DLM_VERSION_3_2:
1354489d8e55SAlexander Aring 		break;
1355489d8e55SAlexander Aring 	default:
1356489d8e55SAlexander Aring 		return;
1357489d8e55SAlexander Aring 	}
1358489d8e55SAlexander Aring 
1359489d8e55SAlexander Aring 	spin_lock(&node->state_lock);
1360489d8e55SAlexander Aring 	pr_debug("receive active shutdown for node %d with state %s\n",
1361489d8e55SAlexander Aring 		 node->nodeid, dlm_state_str(node->state));
1362489d8e55SAlexander Aring 	switch (node->state) {
1363489d8e55SAlexander Aring 	case DLM_ESTABLISHED:
1364489d8e55SAlexander Aring 		node->state = DLM_FIN_WAIT1;
1365489d8e55SAlexander Aring 		pr_debug("switch node %d to state %s case 2\n",
1366489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1367a5849636SAlexander Aring 		dlm_send_fin(node, dlm_act_fin_ack_rcv);
1368489d8e55SAlexander Aring 		break;
1369489d8e55SAlexander Aring 	case DLM_CLOSED:
1370489d8e55SAlexander Aring 		/* we have what we want */
137154fbe0c1SAlexander Aring 		break;
1372489d8e55SAlexander Aring 	default:
1373489d8e55SAlexander Aring 		/* busy to enter DLM_FIN_WAIT1, wait until passive
1374489d8e55SAlexander Aring 		 * done in shutdown_wait to enter DLM_CLOSED.
1375489d8e55SAlexander Aring 		 */
1376489d8e55SAlexander Aring 		break;
1377489d8e55SAlexander Aring 	}
1378489d8e55SAlexander Aring 	spin_unlock(&node->state_lock);
1379489d8e55SAlexander Aring 
1380489d8e55SAlexander Aring 	if (DLM_DEBUG_FENCE_TERMINATION)
1381489d8e55SAlexander Aring 		msleep(5000);
1382489d8e55SAlexander Aring 
1383489d8e55SAlexander Aring 	/* wait for other side dlm + fin */
1384489d8e55SAlexander Aring 	ret = wait_event_timeout(node->shutdown_wait,
1385489d8e55SAlexander Aring 				 node->state == DLM_CLOSED ||
1386489d8e55SAlexander Aring 				 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
1387489d8e55SAlexander Aring 				 DLM_SHUTDOWN_TIMEOUT);
138863e711b0SAlexander Aring 	if (!ret)
1389489d8e55SAlexander Aring 		pr_debug("active shutdown timed out for node %d with state %s\n",
1390489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
139154fbe0c1SAlexander Aring 	else
1392489d8e55SAlexander Aring 		pr_debug("active shutdown done for node %d with state %s\n",
1393489d8e55SAlexander Aring 			 node->nodeid, dlm_state_str(node->state));
1394489d8e55SAlexander Aring }
1395489d8e55SAlexander Aring 
dlm_midcomms_shutdown(void)1396489d8e55SAlexander Aring void dlm_midcomms_shutdown(void)
1397489d8e55SAlexander Aring {
1398489d8e55SAlexander Aring 	struct midcomms_node *node;
1399489d8e55SAlexander Aring 	int i, idx;
1400489d8e55SAlexander Aring 
1401489d8e55SAlexander Aring 	mutex_lock(&close_lock);
1402489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
1403489d8e55SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
1404489d8e55SAlexander Aring 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
1405489d8e55SAlexander Aring 			midcomms_shutdown(node);
1406489d8e55SAlexander Aring 		}
1407489d8e55SAlexander Aring 	}
140854fbe0c1SAlexander Aring 
140954fbe0c1SAlexander Aring 	dlm_lowcomms_shutdown();
1410fd508e08SAlexander Aring 
1411fd508e08SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
1412fd508e08SAlexander Aring 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
1413fd508e08SAlexander Aring 			midcomms_node_reset(node);
1414fd508e08SAlexander Aring 		}
1415fd508e08SAlexander Aring 	}
1416fd508e08SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1417fd508e08SAlexander Aring 	mutex_unlock(&close_lock);
1418489d8e55SAlexander Aring }
1419489d8e55SAlexander Aring 
dlm_midcomms_close(int nodeid)1420489d8e55SAlexander Aring int dlm_midcomms_close(int nodeid)
1421489d8e55SAlexander Aring {
1422489d8e55SAlexander Aring 	struct midcomms_node *node;
1423489d8e55SAlexander Aring 	int idx, ret;
1424489d8e55SAlexander Aring 
1425489d8e55SAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
1426489d8e55SAlexander Aring 	/* Abort pending close/remove operation */
142763e711b0SAlexander Aring 	node = nodeid2node(nodeid);
1428489d8e55SAlexander Aring 	if (node) {
1429489d8e55SAlexander Aring 		/* let shutdown waiters leave */
1430489d8e55SAlexander Aring 		set_bit(DLM_NODE_FLAG_CLOSE, &node->flags);
1431489d8e55SAlexander Aring 		wake_up(&node->shutdown_wait);
1432489d8e55SAlexander Aring 	}
1433489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
1434489d8e55SAlexander Aring 
1435489d8e55SAlexander Aring 	synchronize_srcu(&nodes_srcu);
1436489d8e55SAlexander Aring 
1437489d8e55SAlexander Aring 	mutex_lock(&close_lock);
1438643f5cfaSAlexander Aring 	idx = srcu_read_lock(&nodes_srcu);
143963e711b0SAlexander Aring 	node = nodeid2node(nodeid);
1440489d8e55SAlexander Aring 	if (!node) {
1441489d8e55SAlexander Aring 		srcu_read_unlock(&nodes_srcu, idx);
1442643f5cfaSAlexander Aring 		mutex_unlock(&close_lock);
1443489d8e55SAlexander Aring 		return dlm_lowcomms_close(nodeid);
1444489d8e55SAlexander Aring 	}
1445489d8e55SAlexander Aring 
1446489d8e55SAlexander Aring 	ret = dlm_lowcomms_close(nodeid);
144763e711b0SAlexander Aring 	dlm_delete_debug_comms_file(node->debugfs);
144863e711b0SAlexander Aring 
144963e711b0SAlexander Aring 	spin_lock(&nodes_lock);
145063e711b0SAlexander Aring 	hlist_del_rcu(&node->hlist);
145163e711b0SAlexander Aring 	spin_unlock(&nodes_lock);
1452489d8e55SAlexander Aring 	srcu_read_unlock(&nodes_srcu, idx);
145363e711b0SAlexander Aring 
145463e711b0SAlexander Aring 	/* wait that all readers left until flush send queue */
145563e711b0SAlexander Aring 	synchronize_srcu(&nodes_srcu);
145663e711b0SAlexander Aring 
145763e711b0SAlexander Aring 	/* drop all pending dlm messages, this is fine as
145863e711b0SAlexander Aring 	 * this function get called when the node is fenced
145963e711b0SAlexander Aring 	 */
146063e711b0SAlexander Aring 	dlm_send_queue_flush(node);
146163e711b0SAlexander Aring 
146263e711b0SAlexander Aring 	call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
1463489d8e55SAlexander Aring 	mutex_unlock(&close_lock);
1464489d8e55SAlexander Aring 
1465489d8e55SAlexander Aring 	return ret;
1466489d8e55SAlexander Aring }
14679af5b8f0SAlexander Aring 
14689af5b8f0SAlexander Aring /* debug functionality to send raw dlm msg from user space */
14699af5b8f0SAlexander Aring struct dlm_rawmsg_data {
14709af5b8f0SAlexander Aring 	struct midcomms_node *node;
14719af5b8f0SAlexander Aring 	void *buf;
14729af5b8f0SAlexander Aring };
14739af5b8f0SAlexander Aring 
midcomms_new_rawmsg_cb(void * data)14749af5b8f0SAlexander Aring static void midcomms_new_rawmsg_cb(void *data)
14759af5b8f0SAlexander Aring {
14769af5b8f0SAlexander Aring 	struct dlm_rawmsg_data *rd = data;
14779af5b8f0SAlexander Aring 	struct dlm_header *h = rd->buf;
14789af5b8f0SAlexander Aring 
14799af5b8f0SAlexander Aring 	switch (h->h_version) {
14809af5b8f0SAlexander Aring 	case cpu_to_le32(DLM_VERSION_3_1):
14819af5b8f0SAlexander Aring 		break;
14829af5b8f0SAlexander Aring 	default:
14839af5b8f0SAlexander Aring 		switch (h->h_cmd) {
14849af5b8f0SAlexander Aring 		case DLM_OPTS:
14859af5b8f0SAlexander Aring 			if (!h->u.h_seq)
1486d00725caSAlexander Aring 				h->u.h_seq = cpu_to_le32(atomic_fetch_inc(&rd->node->seq_send));
14879af5b8f0SAlexander Aring 			break;
14889af5b8f0SAlexander Aring 		default:
14899af5b8f0SAlexander Aring 			break;
14909af5b8f0SAlexander Aring 		}
14919af5b8f0SAlexander Aring 		break;
14929af5b8f0SAlexander Aring 	}
14939af5b8f0SAlexander Aring }
14949af5b8f0SAlexander Aring 
dlm_midcomms_rawmsg_send(struct midcomms_node * node,void * buf,int buflen)14959af5b8f0SAlexander Aring int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
14969af5b8f0SAlexander Aring 			     int buflen)
14979af5b8f0SAlexander Aring {
14989af5b8f0SAlexander Aring 	struct dlm_rawmsg_data rd;
14999af5b8f0SAlexander Aring 	struct dlm_msg *msg;
15009af5b8f0SAlexander Aring 	char *msgbuf;
15019af5b8f0SAlexander Aring 
15029af5b8f0SAlexander Aring 	rd.node = node;
15039af5b8f0SAlexander Aring 	rd.buf = buf;
15049af5b8f0SAlexander Aring 
15059af5b8f0SAlexander Aring 	msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS,
15069af5b8f0SAlexander Aring 				   &msgbuf, midcomms_new_rawmsg_cb, &rd);
15079af5b8f0SAlexander Aring 	if (!msg)
15089af5b8f0SAlexander Aring 		return -ENOMEM;
15099af5b8f0SAlexander Aring 
15109af5b8f0SAlexander Aring 	memcpy(msgbuf, buf, buflen);
15119af5b8f0SAlexander Aring 	dlm_lowcomms_commit_msg(msg);
15129af5b8f0SAlexander Aring 	return 0;
15139af5b8f0SAlexander Aring }
15149af5b8f0SAlexander Aring 
1515