xref: /openbmc/linux/fs/dlm/lowcomms.c (revision 6f0b0b5d)
12522fe45SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
26ed7257bSPatrick Caulfield /******************************************************************************
36ed7257bSPatrick Caulfield *******************************************************************************
46ed7257bSPatrick Caulfield **
56ed7257bSPatrick Caulfield **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
65e9ccc37SChristine Caulfield **  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
76ed7257bSPatrick Caulfield **
86ed7257bSPatrick Caulfield **
96ed7257bSPatrick Caulfield *******************************************************************************
106ed7257bSPatrick Caulfield ******************************************************************************/
116ed7257bSPatrick Caulfield 
126ed7257bSPatrick Caulfield /*
136ed7257bSPatrick Caulfield  * lowcomms.c
146ed7257bSPatrick Caulfield  *
156ed7257bSPatrick Caulfield  * This is the "low-level" comms layer.
166ed7257bSPatrick Caulfield  *
176ed7257bSPatrick Caulfield  * It is responsible for sending/receiving messages
186ed7257bSPatrick Caulfield  * from other nodes in the cluster.
196ed7257bSPatrick Caulfield  *
206ed7257bSPatrick Caulfield  * Cluster nodes are referred to by their nodeids. nodeids are
216ed7257bSPatrick Caulfield  * simply 32 bit numbers to the locking module - if they need to
222cf12c0bSJoe Perches  * be expanded for the cluster infrastructure then that is its
236ed7257bSPatrick Caulfield  * responsibility. It is this layer's
246ed7257bSPatrick Caulfield  * responsibility to resolve these into IP address or
256ed7257bSPatrick Caulfield  * whatever it needs for inter-node communication.
266ed7257bSPatrick Caulfield  *
276ed7257bSPatrick Caulfield  * The comms level is two kernel threads that deal mainly with
286ed7257bSPatrick Caulfield  * the receiving of messages from other nodes and passing them
296ed7257bSPatrick Caulfield  * up to the mid-level comms layer (which understands the
306ed7257bSPatrick Caulfield  * message format) for execution by the locking core, and
316ed7257bSPatrick Caulfield  * a send thread which does all the setting up of connections
326ed7257bSPatrick Caulfield  * to remote nodes and the sending of data. Threads are not allowed
336ed7257bSPatrick Caulfield  * to send their own data because it may cause them to wait in times
346ed7257bSPatrick Caulfield  * of high load. Also, this way, the sending thread can collect together
356ed7257bSPatrick Caulfield  * messages bound for one node and send them in one block.
366ed7257bSPatrick Caulfield  *
372cf12c0bSJoe Perches  * lowcomms will choose to use either TCP or SCTP as its transport layer
386ed7257bSPatrick Caulfield  * depending on the configuration variable 'protocol'. This should be set
396ed7257bSPatrick Caulfield  * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
406ed7257bSPatrick Caulfield  * cluster-wide mechanism as it must be the same on all nodes of the cluster
416ed7257bSPatrick Caulfield  * for the DLM to function.
426ed7257bSPatrick Caulfield  *
436ed7257bSPatrick Caulfield  */
446ed7257bSPatrick Caulfield 
456ed7257bSPatrick Caulfield #include <asm/ioctls.h>
466ed7257bSPatrick Caulfield #include <net/sock.h>
476ed7257bSPatrick Caulfield #include <net/tcp.h>
486ed7257bSPatrick Caulfield #include <linux/pagemap.h>
496ed7257bSPatrick Caulfield #include <linux/file.h>
507a936ce7SMatthias Kaehlcke #include <linux/mutex.h>
516ed7257bSPatrick Caulfield #include <linux/sctp.h>
525a0e3ad6STejun Heo #include <linux/slab.h>
532f2d76ccSBenjamin Poirier #include <net/sctp/sctp.h>
5444ad532bSJoe Perches #include <net/ipv6.h>
556ed7257bSPatrick Caulfield 
5692732376SAlexander Aring #include <trace/events/dlm.h>
5792732376SAlexander Aring 
586ed7257bSPatrick Caulfield #include "dlm_internal.h"
596ed7257bSPatrick Caulfield #include "lowcomms.h"
606ed7257bSPatrick Caulfield #include "midcomms.h"
613af2326cSAlexander Aring #include "memory.h"
626ed7257bSPatrick Caulfield #include "config.h"
636ed7257bSPatrick Caulfield 
646ed7257bSPatrick Caulfield #define NEEDED_RMEM (4*1024*1024)
656ed7257bSPatrick Caulfield 
66f92c8dd7SBob Peterson /* Number of messages to send before rescheduling */
67f92c8dd7SBob Peterson #define MAX_SEND_MSG_COUNT 25
68f92c8dd7SBob Peterson 
696ed7257bSPatrick Caulfield struct connection {
706ed7257bSPatrick Caulfield 	struct socket *sock;	/* NULL if not connected */
716ed7257bSPatrick Caulfield 	uint32_t nodeid;	/* So we know who we are in the list */
726ed7257bSPatrick Caulfield 	struct mutex sock_mutex;
736ed7257bSPatrick Caulfield 	unsigned long flags;
746ed7257bSPatrick Caulfield #define CF_READ_PENDING 1
758a4abb08Stsutomu.owa@toshiba.co.jp #define CF_WRITE_PENDING 2
766ed7257bSPatrick Caulfield #define CF_INIT_PENDING 4
776ed7257bSPatrick Caulfield #define CF_IS_OTHERCON 5
78063c4c99SLars Marowsky-Bree #define CF_CLOSE 6
79b36930ddSDavid Miller #define CF_APP_LIMITED 7
80b2a66629Stsutomu.owa@toshiba.co.jp #define CF_CLOSING 8
814f567acbSAlexander Aring #define CF_CONNECTED 9
824f567acbSAlexander Aring #define CF_RECONNECT 10
834f567acbSAlexander Aring #define CF_DELAY_CONNECT 11
846ed7257bSPatrick Caulfield 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
856ed7257bSPatrick Caulfield 	spinlock_t writequeue_lock;
866ed7257bSPatrick Caulfield 	int retries;
876ed7257bSPatrick Caulfield #define MAX_CONNECT_RETRIES 3
885e9ccc37SChristine Caulfield 	struct hlist_node list;
896ed7257bSPatrick Caulfield 	struct connection *othercon;
90ba868d9dSAlexander Aring 	struct connection *sendcon;
916ed7257bSPatrick Caulfield 	struct work_struct rwork; /* Receive workqueue */
926ed7257bSPatrick Caulfield 	struct work_struct swork; /* Send workqueue */
934798cbbfSAlexander Aring 	unsigned char *rx_buf;
944798cbbfSAlexander Aring 	int rx_buflen;
954798cbbfSAlexander Aring 	int rx_leftover;
96*6f0b0b5dSAlexander Aring 	int mark;
97*6f0b0b5dSAlexander Aring 	int addr_count;
98*6f0b0b5dSAlexander Aring 	int curr_addr_index;
99*6f0b0b5dSAlexander Aring 	struct sockaddr_storage addr[DLM_MAX_ADDR_COUNT];
100*6f0b0b5dSAlexander Aring 	spinlock_t addrs_lock;
101a47666ebSAlexander Aring 	struct rcu_head rcu;
1026ed7257bSPatrick Caulfield };
1036ed7257bSPatrick Caulfield #define sock2con(x) ((struct connection *)(x)->sk_user_data)
1046ed7257bSPatrick Caulfield 
105d11ccd45SAlexander Aring struct listen_connection {
106d11ccd45SAlexander Aring 	struct socket *sock;
107d11ccd45SAlexander Aring 	struct work_struct rwork;
108d11ccd45SAlexander Aring };
109d11ccd45SAlexander Aring 
110f0747ebfSAlexander Aring #define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end)
111f0747ebfSAlexander Aring #define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset)
112f0747ebfSAlexander Aring 
1136ed7257bSPatrick Caulfield /* An entry waiting to be sent */
1146ed7257bSPatrick Caulfield struct writequeue_entry {
1156ed7257bSPatrick Caulfield 	struct list_head list;
1166ed7257bSPatrick Caulfield 	struct page *page;
1176ed7257bSPatrick Caulfield 	int offset;
1186ed7257bSPatrick Caulfield 	int len;
1196ed7257bSPatrick Caulfield 	int end;
1206ed7257bSPatrick Caulfield 	int users;
121706474fbSAlexander Aring 	bool dirty;
1226ed7257bSPatrick Caulfield 	struct connection *con;
1238f2dc78dSAlexander Aring 	struct list_head msgs;
1248f2dc78dSAlexander Aring 	struct kref ref;
1258f2dc78dSAlexander Aring };
1268f2dc78dSAlexander Aring 
1278f2dc78dSAlexander Aring struct dlm_msg {
1288f2dc78dSAlexander Aring 	struct writequeue_entry *entry;
1292874d1a6SAlexander Aring 	struct dlm_msg *orig_msg;
1302874d1a6SAlexander Aring 	bool retransmit;
1318f2dc78dSAlexander Aring 	void *ppc;
1328f2dc78dSAlexander Aring 	int len;
1338f2dc78dSAlexander Aring 	int idx; /* new()/commit() idx exchange */
1348f2dc78dSAlexander Aring 
1358f2dc78dSAlexander Aring 	struct list_head list;
1368f2dc78dSAlexander Aring 	struct kref ref;
1376ed7257bSPatrick Caulfield };
1386ed7257bSPatrick Caulfield 
139a66c008cSAlexander Aring struct dlm_proto_ops {
1408728a455SAlexander Aring 	bool try_new_addr;
1412dc6b115SAlexander Aring 	const char *name;
1422dc6b115SAlexander Aring 	int proto;
1432dc6b115SAlexander Aring 
1448728a455SAlexander Aring 	int (*connect)(struct connection *con, struct socket *sock,
1458728a455SAlexander Aring 		       struct sockaddr *addr, int addr_len);
1468728a455SAlexander Aring 	void (*sockopts)(struct socket *sock);
1478728a455SAlexander Aring 	int (*bind)(struct socket *sock);
1482dc6b115SAlexander Aring 	int (*listen_validate)(void);
1492dc6b115SAlexander Aring 	void (*listen_sockopts)(struct socket *sock);
1502dc6b115SAlexander Aring 	int (*listen_bind)(struct socket *sock);
151a66c008cSAlexander Aring };
152a66c008cSAlexander Aring 
153cc661fc9SBob Peterson static struct listen_sock_callbacks {
154cc661fc9SBob Peterson 	void (*sk_error_report)(struct sock *);
155cc661fc9SBob Peterson 	void (*sk_data_ready)(struct sock *);
156cc661fc9SBob Peterson 	void (*sk_state_change)(struct sock *);
157cc661fc9SBob Peterson 	void (*sk_write_space)(struct sock *);
158cc661fc9SBob Peterson } listen_sock;
159cc661fc9SBob Peterson 
160d11ccd45SAlexander Aring static struct listen_connection listen_con;
161c51c9cd8SAlexander Aring static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT];
1626ed7257bSPatrick Caulfield static int dlm_local_count;
1636ed7257bSPatrick Caulfield 
1646ed7257bSPatrick Caulfield /* Work queues */
1656ed7257bSPatrick Caulfield static struct workqueue_struct *recv_workqueue;
1666ed7257bSPatrick Caulfield static struct workqueue_struct *send_workqueue;
1676ed7257bSPatrick Caulfield 
1685e9ccc37SChristine Caulfield static struct hlist_head connection_hash[CONN_HASH_SIZE];
169a47666ebSAlexander Aring static DEFINE_SPINLOCK(connections_lock);
170a47666ebSAlexander Aring DEFINE_STATIC_SRCU(connections_srcu);
1716ed7257bSPatrick Caulfield 
172a66c008cSAlexander Aring static const struct dlm_proto_ops *dlm_proto_ops;
173a66c008cSAlexander Aring 
1746ed7257bSPatrick Caulfield static void process_recv_sockets(struct work_struct *work);
1756ed7257bSPatrick Caulfield static void process_send_sockets(struct work_struct *work);
1766ed7257bSPatrick Caulfield 
1771037c2a9SAlexander Aring bool dlm_lowcomms_is_running(void)
1781037c2a9SAlexander Aring {
1791037c2a9SAlexander Aring 	return !!listen_con.sock;
1801037c2a9SAlexander Aring }
1811037c2a9SAlexander Aring 
1823af2326cSAlexander Aring static void writequeue_entry_ctor(void *data)
1833af2326cSAlexander Aring {
1843af2326cSAlexander Aring 	struct writequeue_entry *entry = data;
1853af2326cSAlexander Aring 
1863af2326cSAlexander Aring 	INIT_LIST_HEAD(&entry->msgs);
1873af2326cSAlexander Aring }
1883af2326cSAlexander Aring 
1893af2326cSAlexander Aring struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void)
1903af2326cSAlexander Aring {
1913af2326cSAlexander Aring 	return kmem_cache_create("dlm_writequeue", sizeof(struct writequeue_entry),
1923af2326cSAlexander Aring 				 0, 0, writequeue_entry_ctor);
1933af2326cSAlexander Aring }
1943af2326cSAlexander Aring 
195e4dc81edSAlexander Aring struct kmem_cache *dlm_lowcomms_msg_cache_create(void)
196e4dc81edSAlexander Aring {
197e4dc81edSAlexander Aring 	return kmem_cache_create("dlm_msg", sizeof(struct dlm_msg), 0, 0, NULL);
198e4dc81edSAlexander Aring }
199e4dc81edSAlexander Aring 
20066d5955aSAlexander Aring /* need to held writequeue_lock */
20166d5955aSAlexander Aring static struct writequeue_entry *con_next_wq(struct connection *con)
20266d5955aSAlexander Aring {
20366d5955aSAlexander Aring 	struct writequeue_entry *e;
20466d5955aSAlexander Aring 
205dd070a56SAlexander Aring 	e = list_first_entry_or_null(&con->writequeue, struct writequeue_entry,
20666d5955aSAlexander Aring 				     list);
207bcbfea41SAlexander Aring 	/* if len is zero nothing is to send, if there are users filling
208bcbfea41SAlexander Aring 	 * buffers we wait until the users are done so we can send more.
209bcbfea41SAlexander Aring 	 */
210dd070a56SAlexander Aring 	if (!e || e->users || e->len == 0)
21166d5955aSAlexander Aring 		return NULL;
21266d5955aSAlexander Aring 
21366d5955aSAlexander Aring 	return e;
21466d5955aSAlexander Aring }
21566d5955aSAlexander Aring 
216b38bc9c2SAlexander Aring static struct connection *__find_con(int nodeid, int r)
2175e9ccc37SChristine Caulfield {
2185e9ccc37SChristine Caulfield 	struct connection *con;
2195e9ccc37SChristine Caulfield 
220a47666ebSAlexander Aring 	hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
221b38bc9c2SAlexander Aring 		if (con->nodeid == nodeid)
2225e9ccc37SChristine Caulfield 			return con;
2235e9ccc37SChristine Caulfield 	}
224a47666ebSAlexander Aring 
2255e9ccc37SChristine Caulfield 	return NULL;
2265e9ccc37SChristine Caulfield }
2275e9ccc37SChristine Caulfield 
2286cde210aSAlexander Aring static int dlm_con_init(struct connection *con, int nodeid)
2296ed7257bSPatrick Caulfield {
2304798cbbfSAlexander Aring 	con->rx_buflen = dlm_config.ci_buffer_size;
2314798cbbfSAlexander Aring 	con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
2326cde210aSAlexander Aring 	if (!con->rx_buf)
2336cde210aSAlexander Aring 		return -ENOMEM;
2344798cbbfSAlexander Aring 
2356ed7257bSPatrick Caulfield 	con->nodeid = nodeid;
2366ed7257bSPatrick Caulfield 	mutex_init(&con->sock_mutex);
2376ed7257bSPatrick Caulfield 	INIT_LIST_HEAD(&con->writequeue);
2386ed7257bSPatrick Caulfield 	spin_lock_init(&con->writequeue_lock);
2396ed7257bSPatrick Caulfield 	INIT_WORK(&con->swork, process_send_sockets);
2406ed7257bSPatrick Caulfield 	INIT_WORK(&con->rwork, process_recv_sockets);
2416ed7257bSPatrick Caulfield 
2426cde210aSAlexander Aring 	return 0;
2436cde210aSAlexander Aring }
2446cde210aSAlexander Aring 
2456cde210aSAlexander Aring /*
2466cde210aSAlexander Aring  * If 'allocation' is zero then we don't attempt to create a new
2476cde210aSAlexander Aring  * connection structure for this node.
2486cde210aSAlexander Aring  */
2496cde210aSAlexander Aring static struct connection *nodeid2con(int nodeid, gfp_t alloc)
2506cde210aSAlexander Aring {
2516cde210aSAlexander Aring 	struct connection *con, *tmp;
2526cde210aSAlexander Aring 	int r, ret;
2536cde210aSAlexander Aring 
254b38bc9c2SAlexander Aring 	r = nodeid_hash(nodeid);
255b38bc9c2SAlexander Aring 	con = __find_con(nodeid, r);
2566cde210aSAlexander Aring 	if (con || !alloc)
2576cde210aSAlexander Aring 		return con;
2586cde210aSAlexander Aring 
2596cde210aSAlexander Aring 	con = kzalloc(sizeof(*con), alloc);
2606cde210aSAlexander Aring 	if (!con)
2616cde210aSAlexander Aring 		return NULL;
2626cde210aSAlexander Aring 
2636cde210aSAlexander Aring 	ret = dlm_con_init(con, nodeid);
2646cde210aSAlexander Aring 	if (ret) {
2656cde210aSAlexander Aring 		kfree(con);
2666cde210aSAlexander Aring 		return NULL;
2676cde210aSAlexander Aring 	}
2686cde210aSAlexander Aring 
269a47666ebSAlexander Aring 	spin_lock(&connections_lock);
2704f2b30fdSAlexander Aring 	/* Because multiple workqueues/threads calls this function it can
2714f2b30fdSAlexander Aring 	 * race on multiple cpu's. Instead of locking hot path __find_con()
2724f2b30fdSAlexander Aring 	 * we just check in rare cases of recently added nodes again
2734f2b30fdSAlexander Aring 	 * under protection of connections_lock. If this is the case we
2744f2b30fdSAlexander Aring 	 * abort our connection creation and return the existing connection.
2754f2b30fdSAlexander Aring 	 */
276b38bc9c2SAlexander Aring 	tmp = __find_con(nodeid, r);
2774f2b30fdSAlexander Aring 	if (tmp) {
2784f2b30fdSAlexander Aring 		spin_unlock(&connections_lock);
2794f2b30fdSAlexander Aring 		kfree(con->rx_buf);
2804f2b30fdSAlexander Aring 		kfree(con);
2814f2b30fdSAlexander Aring 		return tmp;
2824f2b30fdSAlexander Aring 	}
2834f2b30fdSAlexander Aring 
284a47666ebSAlexander Aring 	hlist_add_head_rcu(&con->list, &connection_hash[r]);
285a47666ebSAlexander Aring 	spin_unlock(&connections_lock);
286a47666ebSAlexander Aring 
2876ed7257bSPatrick Caulfield 	return con;
2886ed7257bSPatrick Caulfield }
2896ed7257bSPatrick Caulfield 
2905e9ccc37SChristine Caulfield /* Loop round all connections */
2915e9ccc37SChristine Caulfield static void foreach_conn(void (*conn_func)(struct connection *c))
2925e9ccc37SChristine Caulfield {
293b38bc9c2SAlexander Aring 	int i;
2945e9ccc37SChristine Caulfield 	struct connection *con;
2955e9ccc37SChristine Caulfield 
2965e9ccc37SChristine Caulfield 	for (i = 0; i < CONN_HASH_SIZE; i++) {
297a47666ebSAlexander Aring 		hlist_for_each_entry_rcu(con, &connection_hash[i], list)
2985e9ccc37SChristine Caulfield 			conn_func(con);
2995e9ccc37SChristine Caulfield 	}
3006ed7257bSPatrick Caulfield }
3016ed7257bSPatrick Caulfield 
30240c6b83eSAlexander Aring static int addr_compare(const struct sockaddr_storage *x,
30340c6b83eSAlexander Aring 			const struct sockaddr_storage *y)
30436b71a8bSDavid Teigland {
30536b71a8bSDavid Teigland 	switch (x->ss_family) {
30636b71a8bSDavid Teigland 	case AF_INET: {
30736b71a8bSDavid Teigland 		struct sockaddr_in *sinx = (struct sockaddr_in *)x;
30836b71a8bSDavid Teigland 		struct sockaddr_in *siny = (struct sockaddr_in *)y;
30936b71a8bSDavid Teigland 		if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
31036b71a8bSDavid Teigland 			return 0;
31136b71a8bSDavid Teigland 		if (sinx->sin_port != siny->sin_port)
31236b71a8bSDavid Teigland 			return 0;
31336b71a8bSDavid Teigland 		break;
31436b71a8bSDavid Teigland 	}
31536b71a8bSDavid Teigland 	case AF_INET6: {
31636b71a8bSDavid Teigland 		struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
31736b71a8bSDavid Teigland 		struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
31836b71a8bSDavid Teigland 		if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
31936b71a8bSDavid Teigland 			return 0;
32036b71a8bSDavid Teigland 		if (sinx->sin6_port != siny->sin6_port)
32136b71a8bSDavid Teigland 			return 0;
32236b71a8bSDavid Teigland 		break;
32336b71a8bSDavid Teigland 	}
32436b71a8bSDavid Teigland 	default:
32536b71a8bSDavid Teigland 		return 0;
32636b71a8bSDavid Teigland 	}
32736b71a8bSDavid Teigland 	return 1;
32836b71a8bSDavid Teigland }
32936b71a8bSDavid Teigland 
33036b71a8bSDavid Teigland static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
331e125fbebSAlexander Aring 			  struct sockaddr *sa_out, bool try_new_addr,
332e125fbebSAlexander Aring 			  unsigned int *mark)
33336b71a8bSDavid Teigland {
33436b71a8bSDavid Teigland 	struct sockaddr_storage sas;
335*6f0b0b5dSAlexander Aring 	struct connection *con;
336*6f0b0b5dSAlexander Aring 	int idx;
3376ed7257bSPatrick Caulfield 
3386ed7257bSPatrick Caulfield 	if (!dlm_local_count)
3396ed7257bSPatrick Caulfield 		return -1;
3406ed7257bSPatrick Caulfield 
341*6f0b0b5dSAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
342*6f0b0b5dSAlexander Aring 	con = nodeid2con(nodeid, 0);
343*6f0b0b5dSAlexander Aring 	if (!con) {
344*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
345*6f0b0b5dSAlexander Aring 		return -ENOENT;
346*6f0b0b5dSAlexander Aring 	}
347*6f0b0b5dSAlexander Aring 
348*6f0b0b5dSAlexander Aring 	spin_lock(&con->addrs_lock);
349*6f0b0b5dSAlexander Aring 	if (!con->addr_count) {
350*6f0b0b5dSAlexander Aring 		spin_unlock(&con->addrs_lock);
351*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
352*6f0b0b5dSAlexander Aring 		return -ENOENT;
353*6f0b0b5dSAlexander Aring 	}
354*6f0b0b5dSAlexander Aring 
355*6f0b0b5dSAlexander Aring 	memcpy(&sas, &con->addr[con->curr_addr_index],
356ee44b4bcSMarcelo Ricardo Leitner 	       sizeof(struct sockaddr_storage));
357ee44b4bcSMarcelo Ricardo Leitner 
35898e1b60eSMike Christie 	if (try_new_addr) {
359*6f0b0b5dSAlexander Aring 		con->curr_addr_index++;
360*6f0b0b5dSAlexander Aring 		if (con->curr_addr_index == con->addr_count)
361*6f0b0b5dSAlexander Aring 			con->curr_addr_index = 0;
36298e1b60eSMike Christie 	}
36336b71a8bSDavid Teigland 
364*6f0b0b5dSAlexander Aring 	*mark = con->mark;
365*6f0b0b5dSAlexander Aring 	spin_unlock(&con->addrs_lock);
366e125fbebSAlexander Aring 
36736b71a8bSDavid Teigland 	if (sas_out)
36836b71a8bSDavid Teigland 		memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
36936b71a8bSDavid Teigland 
370*6f0b0b5dSAlexander Aring 	if (!sa_out) {
371*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
37236b71a8bSDavid Teigland 		return 0;
373*6f0b0b5dSAlexander Aring 	}
3746ed7257bSPatrick Caulfield 
375c51c9cd8SAlexander Aring 	if (dlm_local_addr[0].ss_family == AF_INET) {
37636b71a8bSDavid Teigland 		struct sockaddr_in *in4  = (struct sockaddr_in *) &sas;
37736b71a8bSDavid Teigland 		struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
3786ed7257bSPatrick Caulfield 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
3796ed7257bSPatrick Caulfield 	} else {
38036b71a8bSDavid Teigland 		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &sas;
38136b71a8bSDavid Teigland 		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
3824e3fd7a0SAlexey Dobriyan 		ret6->sin6_addr = in6->sin6_addr;
3836ed7257bSPatrick Caulfield 	}
3846ed7257bSPatrick Caulfield 
385*6f0b0b5dSAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
3866ed7257bSPatrick Caulfield 	return 0;
3876ed7257bSPatrick Caulfield }
3886ed7257bSPatrick Caulfield 
389e125fbebSAlexander Aring static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
390e125fbebSAlexander Aring 			  unsigned int *mark)
39136b71a8bSDavid Teigland {
392*6f0b0b5dSAlexander Aring 	struct connection *con;
393*6f0b0b5dSAlexander Aring 	int i, idx, addr_i;
39436b71a8bSDavid Teigland 
395*6f0b0b5dSAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
396*6f0b0b5dSAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
397*6f0b0b5dSAlexander Aring 		hlist_for_each_entry_rcu(con, &connection_hash[i], list) {
398*6f0b0b5dSAlexander Aring 			WARN_ON_ONCE(!con->addr_count);
39936b71a8bSDavid Teigland 
400*6f0b0b5dSAlexander Aring 			spin_lock(&con->addrs_lock);
401*6f0b0b5dSAlexander Aring 			for (addr_i = 0; addr_i < con->addr_count; addr_i++) {
402*6f0b0b5dSAlexander Aring 				if (addr_compare(&con->addr[addr_i], addr)) {
403*6f0b0b5dSAlexander Aring 					*nodeid = con->nodeid;
404*6f0b0b5dSAlexander Aring 					*mark = con->mark;
405*6f0b0b5dSAlexander Aring 					spin_unlock(&con->addrs_lock);
406*6f0b0b5dSAlexander Aring 					srcu_read_unlock(&connections_srcu, idx);
407*6f0b0b5dSAlexander Aring 					return 0;
40836b71a8bSDavid Teigland 				}
40998e1b60eSMike Christie 			}
410*6f0b0b5dSAlexander Aring 			spin_unlock(&con->addrs_lock);
41198e1b60eSMike Christie 		}
412*6f0b0b5dSAlexander Aring 	}
413*6f0b0b5dSAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
414*6f0b0b5dSAlexander Aring 
415*6f0b0b5dSAlexander Aring 	return -ENOENT;
41636b71a8bSDavid Teigland }
41736b71a8bSDavid Teigland 
418*6f0b0b5dSAlexander Aring static bool dlm_lowcomms_con_has_addr(const struct connection *con,
4194f19d071SAlexander Aring 				      const struct sockaddr_storage *addr)
4204f19d071SAlexander Aring {
4214f19d071SAlexander Aring 	int i;
4224f19d071SAlexander Aring 
423*6f0b0b5dSAlexander Aring 	for (i = 0; i < con->addr_count; i++) {
424*6f0b0b5dSAlexander Aring 		if (addr_compare(&con->addr[i], addr))
4254f19d071SAlexander Aring 			return true;
4264f19d071SAlexander Aring 	}
4274f19d071SAlexander Aring 
4284f19d071SAlexander Aring 	return false;
4294f19d071SAlexander Aring }
4304f19d071SAlexander Aring 
43136b71a8bSDavid Teigland int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
43236b71a8bSDavid Teigland {
433*6f0b0b5dSAlexander Aring 	struct connection *con;
434*6f0b0b5dSAlexander Aring 	bool ret, idx;
43536b71a8bSDavid Teigland 
436*6f0b0b5dSAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
437*6f0b0b5dSAlexander Aring 	con = nodeid2con(nodeid, GFP_NOFS);
438*6f0b0b5dSAlexander Aring 	if (!con) {
439*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
44036b71a8bSDavid Teigland 		return -ENOMEM;
44136b71a8bSDavid Teigland 	}
44236b71a8bSDavid Teigland 
443*6f0b0b5dSAlexander Aring 	spin_lock(&con->addrs_lock);
444*6f0b0b5dSAlexander Aring 	if (!con->addr_count) {
445*6f0b0b5dSAlexander Aring 		memcpy(&con->addr[0], addr, sizeof(*addr));
446*6f0b0b5dSAlexander Aring 		con->addr_count = 1;
447*6f0b0b5dSAlexander Aring 		con->mark = dlm_config.ci_mark;
448*6f0b0b5dSAlexander Aring 		spin_unlock(&con->addrs_lock);
449*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
45036b71a8bSDavid Teigland 		return 0;
45136b71a8bSDavid Teigland 	}
45236b71a8bSDavid Teigland 
453*6f0b0b5dSAlexander Aring 	ret = dlm_lowcomms_con_has_addr(con, addr);
4544f19d071SAlexander Aring 	if (ret) {
455*6f0b0b5dSAlexander Aring 		spin_unlock(&con->addrs_lock);
456*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
4574f19d071SAlexander Aring 		return -EEXIST;
4584f19d071SAlexander Aring 	}
4594f19d071SAlexander Aring 
460*6f0b0b5dSAlexander Aring 	if (con->addr_count >= DLM_MAX_ADDR_COUNT) {
461*6f0b0b5dSAlexander Aring 		spin_unlock(&con->addrs_lock);
462*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
46336b71a8bSDavid Teigland 		return -ENOSPC;
46436b71a8bSDavid Teigland 	}
46536b71a8bSDavid Teigland 
466*6f0b0b5dSAlexander Aring 	memcpy(&con->addr[con->addr_count++], addr, sizeof(*addr));
467*6f0b0b5dSAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
468*6f0b0b5dSAlexander Aring 	spin_unlock(&con->addrs_lock);
46936b71a8bSDavid Teigland 	return 0;
47036b71a8bSDavid Teigland }
47136b71a8bSDavid Teigland 
4726ed7257bSPatrick Caulfield /* Data available on socket or listen socket received a connect */
473676d2369SDavid S. Miller static void lowcomms_data_ready(struct sock *sk)
4746ed7257bSPatrick Caulfield {
47593eaadebStsutomu.owa@toshiba.co.jp 	struct connection *con;
47693eaadebStsutomu.owa@toshiba.co.jp 
47793eaadebStsutomu.owa@toshiba.co.jp 	con = sock2con(sk);
478afb853fbSPatrick Caulfield 	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
4796ed7257bSPatrick Caulfield 		queue_work(recv_workqueue, &con->rwork);
4806ed7257bSPatrick Caulfield }
4816ed7257bSPatrick Caulfield 
482d11ccd45SAlexander Aring static void lowcomms_listen_data_ready(struct sock *sk)
483d11ccd45SAlexander Aring {
484d11ccd45SAlexander Aring 	queue_work(recv_workqueue, &listen_con.rwork);
485d11ccd45SAlexander Aring }
486d11ccd45SAlexander Aring 
4876ed7257bSPatrick Caulfield static void lowcomms_write_space(struct sock *sk)
4886ed7257bSPatrick Caulfield {
48993eaadebStsutomu.owa@toshiba.co.jp 	struct connection *con;
4906ed7257bSPatrick Caulfield 
49193eaadebStsutomu.owa@toshiba.co.jp 	con = sock2con(sk);
492b36930ddSDavid Miller 	if (!con)
49392c44605SAlexander Aring 		return;
494b36930ddSDavid Miller 
49519633c7eSAlexander Aring 	if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
496dfc020f3SAlexander Aring 		log_print("connected to node %d", con->nodeid);
49719633c7eSAlexander Aring 		queue_work(send_workqueue, &con->swork);
49892c44605SAlexander Aring 		return;
49919633c7eSAlexander Aring 	}
50019633c7eSAlexander Aring 
501b36930ddSDavid Miller 	clear_bit(SOCK_NOSPACE, &con->sock->flags);
502b36930ddSDavid Miller 
503b36930ddSDavid Miller 	if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
504b36930ddSDavid Miller 		con->sock->sk->sk_write_pending--;
5059cd3e072SEric Dumazet 		clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
506b36930ddSDavid Miller 	}
507b36930ddSDavid Miller 
5086ed7257bSPatrick Caulfield 	queue_work(send_workqueue, &con->swork);
5096ed7257bSPatrick Caulfield }
5106ed7257bSPatrick Caulfield 
5116ed7257bSPatrick Caulfield static inline void lowcomms_connect_sock(struct connection *con)
5126ed7257bSPatrick Caulfield {
513063c4c99SLars Marowsky-Bree 	if (test_bit(CF_CLOSE, &con->flags))
514063c4c99SLars Marowsky-Bree 		return;
5156ed7257bSPatrick Caulfield 	queue_work(send_workqueue, &con->swork);
51661d9102bSBob Peterson 	cond_resched();
5176ed7257bSPatrick Caulfield }
5186ed7257bSPatrick Caulfield 
5196ed7257bSPatrick Caulfield static void lowcomms_state_change(struct sock *sk)
5206ed7257bSPatrick Caulfield {
521ee44b4bcSMarcelo Ricardo Leitner 	/* SCTP layer is not calling sk_data_ready when the connection
522ee44b4bcSMarcelo Ricardo Leitner 	 * is done, so we catch the signal through here. Also, it
523ee44b4bcSMarcelo Ricardo Leitner 	 * doesn't switch socket state when entering shutdown, so we
524ee44b4bcSMarcelo Ricardo Leitner 	 * skip the write in that case.
525ee44b4bcSMarcelo Ricardo Leitner 	 */
526ee44b4bcSMarcelo Ricardo Leitner 	if (sk->sk_shutdown) {
527ee44b4bcSMarcelo Ricardo Leitner 		if (sk->sk_shutdown == RCV_SHUTDOWN)
528ee44b4bcSMarcelo Ricardo Leitner 			lowcomms_data_ready(sk);
529ee44b4bcSMarcelo Ricardo Leitner 	} else if (sk->sk_state == TCP_ESTABLISHED) {
5306ed7257bSPatrick Caulfield 		lowcomms_write_space(sk);
5316ed7257bSPatrick Caulfield 	}
532ee44b4bcSMarcelo Ricardo Leitner }
5336ed7257bSPatrick Caulfield 
534391fbdc5SChristine Caulfield int dlm_lowcomms_connect_node(int nodeid)
535391fbdc5SChristine Caulfield {
536391fbdc5SChristine Caulfield 	struct connection *con;
537b38bc9c2SAlexander Aring 	int idx;
538391fbdc5SChristine Caulfield 
539391fbdc5SChristine Caulfield 	if (nodeid == dlm_our_nodeid())
540391fbdc5SChristine Caulfield 		return 0;
541391fbdc5SChristine Caulfield 
542b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
543*6f0b0b5dSAlexander Aring 	con = nodeid2con(nodeid, 0);
544*6f0b0b5dSAlexander Aring 	if (WARN_ON_ONCE(!con)) {
545b38bc9c2SAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
546*6f0b0b5dSAlexander Aring 		return -ENOENT;
547b38bc9c2SAlexander Aring 	}
548b38bc9c2SAlexander Aring 
549391fbdc5SChristine Caulfield 	lowcomms_connect_sock(con);
550b38bc9c2SAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
551b38bc9c2SAlexander Aring 
552391fbdc5SChristine Caulfield 	return 0;
553391fbdc5SChristine Caulfield }
554391fbdc5SChristine Caulfield 
555e125fbebSAlexander Aring int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
556e125fbebSAlexander Aring {
557*6f0b0b5dSAlexander Aring 	struct connection *con;
558*6f0b0b5dSAlexander Aring 	int idx;
559e125fbebSAlexander Aring 
560*6f0b0b5dSAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
561*6f0b0b5dSAlexander Aring 	con = nodeid2con(nodeid, 0);
562*6f0b0b5dSAlexander Aring 	if (!con) {
563*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
564e125fbebSAlexander Aring 		return -ENOENT;
565e125fbebSAlexander Aring 	}
566e125fbebSAlexander Aring 
567*6f0b0b5dSAlexander Aring 	spin_lock(&con->addrs_lock);
568*6f0b0b5dSAlexander Aring 	con->mark = mark;
569*6f0b0b5dSAlexander Aring 	spin_unlock(&con->addrs_lock);
570*6f0b0b5dSAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
571e125fbebSAlexander Aring 	return 0;
572e125fbebSAlexander Aring }
573e125fbebSAlexander Aring 
574b3a5bbfdSBob Peterson static void lowcomms_error_report(struct sock *sk)
575b3a5bbfdSBob Peterson {
576b81171cbSBob Peterson 	struct connection *con;
577b81171cbSBob Peterson 	void (*orig_report)(struct sock *) = NULL;
5784c3d9057SAlexander Aring 	struct inet_sock *inet;
579b3a5bbfdSBob Peterson 
580b81171cbSBob Peterson 	con = sock2con(sk);
581b81171cbSBob Peterson 	if (con == NULL)
582b81171cbSBob Peterson 		goto out;
583b81171cbSBob Peterson 
584cc661fc9SBob Peterson 	orig_report = listen_sock.sk_error_report;
585b3a5bbfdSBob Peterson 
5864c3d9057SAlexander Aring 	inet = inet_sk(sk);
5874c3d9057SAlexander Aring 	switch (sk->sk_family) {
5884c3d9057SAlexander Aring 	case AF_INET:
589b3a5bbfdSBob Peterson 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
5904c3d9057SAlexander Aring 				   "sending to node %d at %pI4, dport %d, "
591b3a5bbfdSBob Peterson 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
5924c3d9057SAlexander Aring 				   con->nodeid, &inet->inet_daddr,
5934c3d9057SAlexander Aring 				   ntohs(inet->inet_dport), sk->sk_err,
594b3a5bbfdSBob Peterson 				   sk->sk_err_soft);
5954c3d9057SAlexander Aring 		break;
5961b9beda8SAlexander Aring #if IS_ENABLED(CONFIG_IPV6)
5974c3d9057SAlexander Aring 	case AF_INET6:
598b3a5bbfdSBob Peterson 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
5994c3d9057SAlexander Aring 				   "sending to node %d at %pI6c, "
6004c3d9057SAlexander Aring 				   "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
6014c3d9057SAlexander Aring 				   con->nodeid, &sk->sk_v6_daddr,
6024c3d9057SAlexander Aring 				   ntohs(inet->inet_dport), sk->sk_err,
603b3a5bbfdSBob Peterson 				   sk->sk_err_soft);
6044c3d9057SAlexander Aring 		break;
6051b9beda8SAlexander Aring #endif
6064c3d9057SAlexander Aring 	default:
6074c3d9057SAlexander Aring 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
6084c3d9057SAlexander Aring 				   "invalid socket family %d set, "
6094c3d9057SAlexander Aring 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
6104c3d9057SAlexander Aring 				   sk->sk_family, sk->sk_err, sk->sk_err_soft);
6114c3d9057SAlexander Aring 		goto out;
612b3a5bbfdSBob Peterson 	}
613ba868d9dSAlexander Aring 
614ba868d9dSAlexander Aring 	/* below sendcon only handling */
615ba868d9dSAlexander Aring 	if (test_bit(CF_IS_OTHERCON, &con->flags))
616ba868d9dSAlexander Aring 		con = con->sendcon;
617ba868d9dSAlexander Aring 
618ba868d9dSAlexander Aring 	switch (sk->sk_err) {
619ba868d9dSAlexander Aring 	case ECONNREFUSED:
620ba868d9dSAlexander Aring 		set_bit(CF_DELAY_CONNECT, &con->flags);
621ba868d9dSAlexander Aring 		break;
622ba868d9dSAlexander Aring 	default:
623ba868d9dSAlexander Aring 		break;
624ba868d9dSAlexander Aring 	}
625ba868d9dSAlexander Aring 
626ba868d9dSAlexander Aring 	if (!test_and_set_bit(CF_RECONNECT, &con->flags))
627ba868d9dSAlexander Aring 		queue_work(send_workqueue, &con->swork);
628ba868d9dSAlexander Aring 
629b81171cbSBob Peterson out:
630b81171cbSBob Peterson 	if (orig_report)
631b81171cbSBob Peterson 		orig_report(sk);
632b81171cbSBob Peterson }
633b81171cbSBob Peterson 
634cc661fc9SBob Peterson static void restore_callbacks(struct socket *sock)
635b81171cbSBob Peterson {
636cc661fc9SBob Peterson 	struct sock *sk = sock->sk;
637cc661fc9SBob Peterson 
63892c44605SAlexander Aring 	lock_sock(sk);
639b81171cbSBob Peterson 	sk->sk_user_data = NULL;
640cc661fc9SBob Peterson 	sk->sk_data_ready = listen_sock.sk_data_ready;
641cc661fc9SBob Peterson 	sk->sk_state_change = listen_sock.sk_state_change;
642cc661fc9SBob Peterson 	sk->sk_write_space = listen_sock.sk_write_space;
643cc661fc9SBob Peterson 	sk->sk_error_report = listen_sock.sk_error_report;
64492c44605SAlexander Aring 	release_sock(sk);
645b3a5bbfdSBob Peterson }
646b3a5bbfdSBob Peterson 
6476ed7257bSPatrick Caulfield /* Make a socket active */
648988419a9Stsutomu.owa@toshiba.co.jp static void add_sock(struct socket *sock, struct connection *con)
6496ed7257bSPatrick Caulfield {
650b81171cbSBob Peterson 	struct sock *sk = sock->sk;
651b81171cbSBob Peterson 
65292c44605SAlexander Aring 	lock_sock(sk);
6536ed7257bSPatrick Caulfield 	con->sock = sock;
6546ed7257bSPatrick Caulfield 
655b81171cbSBob Peterson 	sk->sk_user_data = con;
6566ed7257bSPatrick Caulfield 	/* Install a data_ready callback */
657b81171cbSBob Peterson 	sk->sk_data_ready = lowcomms_data_ready;
658b81171cbSBob Peterson 	sk->sk_write_space = lowcomms_write_space;
659b81171cbSBob Peterson 	sk->sk_state_change = lowcomms_state_change;
660b81171cbSBob Peterson 	sk->sk_allocation = GFP_NOFS;
661b81171cbSBob Peterson 	sk->sk_error_report = lowcomms_error_report;
66292c44605SAlexander Aring 	release_sock(sk);
6636ed7257bSPatrick Caulfield }
6646ed7257bSPatrick Caulfield 
6656ed7257bSPatrick Caulfield /* Add the port number to an IPv6 or 4 sockaddr and return the address
6666ed7257bSPatrick Caulfield    length */
6676ed7257bSPatrick Caulfield static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
6686ed7257bSPatrick Caulfield 			  int *addr_len)
6696ed7257bSPatrick Caulfield {
670c51c9cd8SAlexander Aring 	saddr->ss_family =  dlm_local_addr[0].ss_family;
6716ed7257bSPatrick Caulfield 	if (saddr->ss_family == AF_INET) {
6726ed7257bSPatrick Caulfield 		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
6736ed7257bSPatrick Caulfield 		in4_addr->sin_port = cpu_to_be16(port);
6746ed7257bSPatrick Caulfield 		*addr_len = sizeof(struct sockaddr_in);
6756ed7257bSPatrick Caulfield 		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
6766ed7257bSPatrick Caulfield 	} else {
6776ed7257bSPatrick Caulfield 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
6786ed7257bSPatrick Caulfield 		in6_addr->sin6_port = cpu_to_be16(port);
6796ed7257bSPatrick Caulfield 		*addr_len = sizeof(struct sockaddr_in6);
6806ed7257bSPatrick Caulfield 	}
68101c8cab2SPatrick Caulfield 	memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
6826ed7257bSPatrick Caulfield }
6836ed7257bSPatrick Caulfield 
684706474fbSAlexander Aring static void dlm_page_release(struct kref *kref)
685706474fbSAlexander Aring {
686706474fbSAlexander Aring 	struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
687706474fbSAlexander Aring 						  ref);
688706474fbSAlexander Aring 
689706474fbSAlexander Aring 	__free_page(e->page);
6903af2326cSAlexander Aring 	dlm_free_writequeue(e);
691706474fbSAlexander Aring }
692706474fbSAlexander Aring 
693706474fbSAlexander Aring static void dlm_msg_release(struct kref *kref)
694706474fbSAlexander Aring {
695706474fbSAlexander Aring 	struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
696706474fbSAlexander Aring 
697706474fbSAlexander Aring 	kref_put(&msg->entry->ref, dlm_page_release);
698e4dc81edSAlexander Aring 	dlm_free_msg(msg);
699706474fbSAlexander Aring }
700706474fbSAlexander Aring 
701706474fbSAlexander Aring static void free_entry(struct writequeue_entry *e)
702706474fbSAlexander Aring {
703706474fbSAlexander Aring 	struct dlm_msg *msg, *tmp;
704706474fbSAlexander Aring 
705706474fbSAlexander Aring 	list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
706706474fbSAlexander Aring 		if (msg->orig_msg) {
707706474fbSAlexander Aring 			msg->orig_msg->retransmit = false;
708706474fbSAlexander Aring 			kref_put(&msg->orig_msg->ref, dlm_msg_release);
709706474fbSAlexander Aring 		}
710706474fbSAlexander Aring 
711706474fbSAlexander Aring 		list_del(&msg->list);
712706474fbSAlexander Aring 		kref_put(&msg->ref, dlm_msg_release);
713706474fbSAlexander Aring 	}
714706474fbSAlexander Aring 
715706474fbSAlexander Aring 	list_del(&e->list);
716706474fbSAlexander Aring 	kref_put(&e->ref, dlm_page_release);
717706474fbSAlexander Aring }
718706474fbSAlexander Aring 
719d11ccd45SAlexander Aring static void dlm_close_sock(struct socket **sock)
720d11ccd45SAlexander Aring {
721d11ccd45SAlexander Aring 	if (*sock) {
722d11ccd45SAlexander Aring 		restore_callbacks(*sock);
723d11ccd45SAlexander Aring 		sock_release(*sock);
724d11ccd45SAlexander Aring 		*sock = NULL;
725d11ccd45SAlexander Aring 	}
726d11ccd45SAlexander Aring }
727d11ccd45SAlexander Aring 
7286ed7257bSPatrick Caulfield /* Close a remote connection and tidy up */
7290d737a8cSMarcelo Ricardo Leitner static void close_connection(struct connection *con, bool and_other,
7300d737a8cSMarcelo Ricardo Leitner 			     bool tx, bool rx)
7316ed7257bSPatrick Caulfield {
732b2a66629Stsutomu.owa@toshiba.co.jp 	bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
733706474fbSAlexander Aring 	struct writequeue_entry *e;
734b2a66629Stsutomu.owa@toshiba.co.jp 
7350aa18464Stsutomu.owa@toshiba.co.jp 	if (tx && !closing && cancel_work_sync(&con->swork)) {
7360d737a8cSMarcelo Ricardo Leitner 		log_print("canceled swork for node %d", con->nodeid);
7370aa18464Stsutomu.owa@toshiba.co.jp 		clear_bit(CF_WRITE_PENDING, &con->flags);
7380aa18464Stsutomu.owa@toshiba.co.jp 	}
7390aa18464Stsutomu.owa@toshiba.co.jp 	if (rx && !closing && cancel_work_sync(&con->rwork)) {
7400d737a8cSMarcelo Ricardo Leitner 		log_print("canceled rwork for node %d", con->nodeid);
7410aa18464Stsutomu.owa@toshiba.co.jp 		clear_bit(CF_READ_PENDING, &con->flags);
7420aa18464Stsutomu.owa@toshiba.co.jp 	}
7436ed7257bSPatrick Caulfield 
7440d737a8cSMarcelo Ricardo Leitner 	mutex_lock(&con->sock_mutex);
745d11ccd45SAlexander Aring 	dlm_close_sock(&con->sock);
746d11ccd45SAlexander Aring 
7476ed7257bSPatrick Caulfield 	if (con->othercon && and_other) {
7486ed7257bSPatrick Caulfield 		/* Will only re-enter once. */
749c6aa00e3SAlexander Aring 		close_connection(con->othercon, false, tx, rx);
7506ed7257bSPatrick Caulfield 	}
7519e5f2825SPatrick Caulfield 
752706474fbSAlexander Aring 	/* if we send a writequeue entry only a half way, we drop the
753706474fbSAlexander Aring 	 * whole entry because reconnection and that we not start of the
754706474fbSAlexander Aring 	 * middle of a msg which will confuse the other end.
755706474fbSAlexander Aring 	 *
756706474fbSAlexander Aring 	 * we can always drop messages because retransmits, but what we
757706474fbSAlexander Aring 	 * cannot allow is to transmit half messages which may be processed
758706474fbSAlexander Aring 	 * at the other side.
759706474fbSAlexander Aring 	 *
760706474fbSAlexander Aring 	 * our policy is to start on a clean state when disconnects, we don't
761706474fbSAlexander Aring 	 * know what's send/received on transport layer in this case.
762706474fbSAlexander Aring 	 */
763706474fbSAlexander Aring 	spin_lock(&con->writequeue_lock);
764706474fbSAlexander Aring 	if (!list_empty(&con->writequeue)) {
765706474fbSAlexander Aring 		e = list_first_entry(&con->writequeue, struct writequeue_entry,
766706474fbSAlexander Aring 				     list);
767706474fbSAlexander Aring 		if (e->dirty)
768706474fbSAlexander Aring 			free_entry(e);
769706474fbSAlexander Aring 	}
770706474fbSAlexander Aring 	spin_unlock(&con->writequeue_lock);
771706474fbSAlexander Aring 
7724798cbbfSAlexander Aring 	con->rx_leftover = 0;
7736ed7257bSPatrick Caulfield 	con->retries = 0;
774052849beSAlexander Aring 	clear_bit(CF_APP_LIMITED, &con->flags);
77519633c7eSAlexander Aring 	clear_bit(CF_CONNECTED, &con->flags);
776ba868d9dSAlexander Aring 	clear_bit(CF_DELAY_CONNECT, &con->flags);
777ba868d9dSAlexander Aring 	clear_bit(CF_RECONNECT, &con->flags);
7786ed7257bSPatrick Caulfield 	mutex_unlock(&con->sock_mutex);
779b2a66629Stsutomu.owa@toshiba.co.jp 	clear_bit(CF_CLOSING, &con->flags);
7806ed7257bSPatrick Caulfield }
7816ed7257bSPatrick Caulfield 
7824798cbbfSAlexander Aring static int con_realloc_receive_buf(struct connection *con, int newlen)
7834798cbbfSAlexander Aring {
7844798cbbfSAlexander Aring 	unsigned char *newbuf;
7854798cbbfSAlexander Aring 
7864798cbbfSAlexander Aring 	newbuf = kmalloc(newlen, GFP_NOFS);
7874798cbbfSAlexander Aring 	if (!newbuf)
7884798cbbfSAlexander Aring 		return -ENOMEM;
7894798cbbfSAlexander Aring 
7904798cbbfSAlexander Aring 	/* copy any leftover from last receive */
7914798cbbfSAlexander Aring 	if (con->rx_leftover)
7924798cbbfSAlexander Aring 		memmove(newbuf, con->rx_buf, con->rx_leftover);
7934798cbbfSAlexander Aring 
7944798cbbfSAlexander Aring 	/* swap to new buffer space */
7954798cbbfSAlexander Aring 	kfree(con->rx_buf);
7964798cbbfSAlexander Aring 	con->rx_buflen = newlen;
7974798cbbfSAlexander Aring 	con->rx_buf = newbuf;
7984798cbbfSAlexander Aring 
7994798cbbfSAlexander Aring 	return 0;
8004798cbbfSAlexander Aring }
8014798cbbfSAlexander Aring 
8026ed7257bSPatrick Caulfield /* Data received from remote end */
8036ed7257bSPatrick Caulfield static int receive_from_sock(struct connection *con)
8046ed7257bSPatrick Caulfield {
8054798cbbfSAlexander Aring 	struct msghdr msg;
8064798cbbfSAlexander Aring 	struct kvec iov;
8074798cbbfSAlexander Aring 	int ret, buflen;
8086ed7257bSPatrick Caulfield 
8096ed7257bSPatrick Caulfield 	mutex_lock(&con->sock_mutex);
8106ed7257bSPatrick Caulfield 
8116ed7257bSPatrick Caulfield 	if (con->sock == NULL) {
8126ed7257bSPatrick Caulfield 		ret = -EAGAIN;
8136ed7257bSPatrick Caulfield 		goto out_close;
8146ed7257bSPatrick Caulfield 	}
8154798cbbfSAlexander Aring 
8164798cbbfSAlexander Aring 	/* realloc if we get new buffer size to read out */
8174798cbbfSAlexander Aring 	buflen = dlm_config.ci_buffer_size;
8184798cbbfSAlexander Aring 	if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
8194798cbbfSAlexander Aring 		ret = con_realloc_receive_buf(con, buflen);
8204798cbbfSAlexander Aring 		if (ret < 0)
8216ed7257bSPatrick Caulfield 			goto out_resched;
8226ed7257bSPatrick Caulfield 	}
8236ed7257bSPatrick Caulfield 
82462699b3fSAlexander Aring 	for (;;) {
8254798cbbfSAlexander Aring 		/* calculate new buffer parameter regarding last receive and
8264798cbbfSAlexander Aring 		 * possible leftover bytes
8276ed7257bSPatrick Caulfield 		 */
8284798cbbfSAlexander Aring 		iov.iov_base = con->rx_buf + con->rx_leftover;
8294798cbbfSAlexander Aring 		iov.iov_len = con->rx_buflen - con->rx_leftover;
8306ed7257bSPatrick Caulfield 
8314798cbbfSAlexander Aring 		memset(&msg, 0, sizeof(msg));
8324798cbbfSAlexander Aring 		msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
8334798cbbfSAlexander Aring 		ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
8344798cbbfSAlexander Aring 				     msg.msg_flags);
83592732376SAlexander Aring 		trace_dlm_recv(con->nodeid, ret);
83662699b3fSAlexander Aring 		if (ret == -EAGAIN)
83762699b3fSAlexander Aring 			break;
83862699b3fSAlexander Aring 		else if (ret <= 0)
8396ed7257bSPatrick Caulfield 			goto out_close;
8406ed7257bSPatrick Caulfield 
8414798cbbfSAlexander Aring 		/* new buflen according readed bytes and leftover from last receive */
8424798cbbfSAlexander Aring 		buflen = ret + con->rx_leftover;
8434798cbbfSAlexander Aring 		ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
8444798cbbfSAlexander Aring 		if (ret < 0)
8454798cbbfSAlexander Aring 			goto out_close;
8466ed7257bSPatrick Caulfield 
8474798cbbfSAlexander Aring 		/* calculate leftover bytes from process and put it into begin of
8484798cbbfSAlexander Aring 		 * the receive buffer, so next receive we have the full message
8494798cbbfSAlexander Aring 		 * at the start address of the receive buffer.
8504798cbbfSAlexander Aring 		 */
8514798cbbfSAlexander Aring 		con->rx_leftover = buflen - ret;
8524798cbbfSAlexander Aring 		if (con->rx_leftover) {
8534798cbbfSAlexander Aring 			memmove(con->rx_buf, con->rx_buf + ret,
8544798cbbfSAlexander Aring 				con->rx_leftover);
8556ed7257bSPatrick Caulfield 		}
85662699b3fSAlexander Aring 	}
8574798cbbfSAlexander Aring 
858b97f8525SAlexander Aring 	dlm_midcomms_receive_done(con->nodeid);
8596ed7257bSPatrick Caulfield 	mutex_unlock(&con->sock_mutex);
8606ed7257bSPatrick Caulfield 	return 0;
8616ed7257bSPatrick Caulfield 
8626ed7257bSPatrick Caulfield out_resched:
8636ed7257bSPatrick Caulfield 	if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
8646ed7257bSPatrick Caulfield 		queue_work(recv_workqueue, &con->rwork);
8656ed7257bSPatrick Caulfield 	mutex_unlock(&con->sock_mutex);
8666ed7257bSPatrick Caulfield 	return -EAGAIN;
8676ed7257bSPatrick Caulfield 
8686ed7257bSPatrick Caulfield out_close:
869055923bfSAlexander Aring 	if (ret == 0) {
870055923bfSAlexander Aring 		log_print("connection %p got EOF from %d",
871055923bfSAlexander Aring 			  con, con->nodeid);
8728aa31cbfSAlexander Aring 
8738aa31cbfSAlexander Aring 		mutex_unlock(&con->sock_mutex);
8748aa31cbfSAlexander Aring 		close_connection(con, false, true, false);
875055923bfSAlexander Aring 		/* signal to breaking receive worker */
876055923bfSAlexander Aring 		ret = -1;
8778aa31cbfSAlexander Aring 	} else {
8788aa31cbfSAlexander Aring 		mutex_unlock(&con->sock_mutex);
8796ed7257bSPatrick Caulfield 	}
8806ed7257bSPatrick Caulfield 	return ret;
8816ed7257bSPatrick Caulfield }
8826ed7257bSPatrick Caulfield 
8836ed7257bSPatrick Caulfield /* Listening socket is busy, accept a connection */
884d11ccd45SAlexander Aring static int accept_from_sock(struct listen_connection *con)
8856ed7257bSPatrick Caulfield {
8866ed7257bSPatrick Caulfield 	int result;
8876ed7257bSPatrick Caulfield 	struct sockaddr_storage peeraddr;
8886ed7257bSPatrick Caulfield 	struct socket *newsock;
889b38bc9c2SAlexander Aring 	int len, idx;
8906ed7257bSPatrick Caulfield 	int nodeid;
8916ed7257bSPatrick Caulfield 	struct connection *newcon;
8926ed7257bSPatrick Caulfield 	struct connection *addcon;
8933f78cd7dSAlexander Aring 	unsigned int mark;
8946ed7257bSPatrick Caulfield 
895d11ccd45SAlexander Aring 	if (!con->sock)
8963421fb15Stsutomu.owa@toshiba.co.jp 		return -ENOTCONN;
8976ed7257bSPatrick Caulfield 
8983421fb15Stsutomu.owa@toshiba.co.jp 	result = kernel_accept(con->sock, &newsock, O_NONBLOCK);
8996ed7257bSPatrick Caulfield 	if (result < 0)
9006ed7257bSPatrick Caulfield 		goto accept_err;
9016ed7257bSPatrick Caulfield 
9026ed7257bSPatrick Caulfield 	/* Get the connected socket's peer */
9036ed7257bSPatrick Caulfield 	memset(&peeraddr, 0, sizeof(peeraddr));
9049b2c45d4SDenys Vlasenko 	len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
9059b2c45d4SDenys Vlasenko 	if (len < 0) {
9066ed7257bSPatrick Caulfield 		result = -ECONNABORTED;
9076ed7257bSPatrick Caulfield 		goto accept_err;
9086ed7257bSPatrick Caulfield 	}
9096ed7257bSPatrick Caulfield 
9106ed7257bSPatrick Caulfield 	/* Get the new node's NODEID */
9116ed7257bSPatrick Caulfield 	make_sockaddr(&peeraddr, 0, &len);
912e125fbebSAlexander Aring 	if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
913feae43f8SAlexander Aring 		switch (peeraddr.ss_family) {
914feae43f8SAlexander Aring 		case AF_INET: {
915feae43f8SAlexander Aring 			struct sockaddr_in *sin = (struct sockaddr_in *)&peeraddr;
916feae43f8SAlexander Aring 
917feae43f8SAlexander Aring 			log_print("connect from non cluster IPv4 node %pI4",
918feae43f8SAlexander Aring 				  &sin->sin_addr);
919feae43f8SAlexander Aring 			break;
920feae43f8SAlexander Aring 		}
921feae43f8SAlexander Aring #if IS_ENABLED(CONFIG_IPV6)
922feae43f8SAlexander Aring 		case AF_INET6: {
923feae43f8SAlexander Aring 			struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&peeraddr;
924feae43f8SAlexander Aring 
925feae43f8SAlexander Aring 			log_print("connect from non cluster IPv6 node %pI6c",
926feae43f8SAlexander Aring 				  &sin6->sin6_addr);
927feae43f8SAlexander Aring 			break;
928feae43f8SAlexander Aring 		}
929feae43f8SAlexander Aring #endif
930feae43f8SAlexander Aring 		default:
931feae43f8SAlexander Aring 			log_print("invalid family from non cluster node");
932feae43f8SAlexander Aring 			break;
933feae43f8SAlexander Aring 		}
934feae43f8SAlexander Aring 
9356ed7257bSPatrick Caulfield 		sock_release(newsock);
9366ed7257bSPatrick Caulfield 		return -1;
9376ed7257bSPatrick Caulfield 	}
9386ed7257bSPatrick Caulfield 
9396ed7257bSPatrick Caulfield 	log_print("got connection from %d", nodeid);
9406ed7257bSPatrick Caulfield 
9416ed7257bSPatrick Caulfield 	/*  Check to see if we already have a connection to this node. This
9426ed7257bSPatrick Caulfield 	 *  could happen if the two nodes initiate a connection at roughly
9436ed7257bSPatrick Caulfield 	 *  the same time and the connections cross on the wire.
9446ed7257bSPatrick Caulfield 	 *  In this case we store the incoming one in "othercon"
9456ed7257bSPatrick Caulfield 	 */
946b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
947*6f0b0b5dSAlexander Aring 	newcon = nodeid2con(nodeid, 0);
948*6f0b0b5dSAlexander Aring 	if (WARN_ON_ONCE(!newcon)) {
949b38bc9c2SAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
950*6f0b0b5dSAlexander Aring 		result = -ENOENT;
9516ed7257bSPatrick Caulfield 		goto accept_err;
9526ed7257bSPatrick Caulfield 	}
953d11ccd45SAlexander Aring 
954e125fbebSAlexander Aring 	sock_set_mark(newsock->sk, mark);
955e125fbebSAlexander Aring 
956d11ccd45SAlexander Aring 	mutex_lock(&newcon->sock_mutex);
9576ed7257bSPatrick Caulfield 	if (newcon->sock) {
9586ed7257bSPatrick Caulfield 		struct connection *othercon = newcon->othercon;
9596ed7257bSPatrick Caulfield 
9606ed7257bSPatrick Caulfield 		if (!othercon) {
961a47666ebSAlexander Aring 			othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
9626ed7257bSPatrick Caulfield 			if (!othercon) {
963617e82e1SDavid Teigland 				log_print("failed to allocate incoming socket");
9646ed7257bSPatrick Caulfield 				mutex_unlock(&newcon->sock_mutex);
965b38bc9c2SAlexander Aring 				srcu_read_unlock(&connections_srcu, idx);
9666ed7257bSPatrick Caulfield 				result = -ENOMEM;
9676ed7257bSPatrick Caulfield 				goto accept_err;
9686ed7257bSPatrick Caulfield 			}
9694798cbbfSAlexander Aring 
9706cde210aSAlexander Aring 			result = dlm_con_init(othercon, nodeid);
9716cde210aSAlexander Aring 			if (result < 0) {
9724798cbbfSAlexander Aring 				kfree(othercon);
9732fd8db2dSYang Yingliang 				mutex_unlock(&newcon->sock_mutex);
974b38bc9c2SAlexander Aring 				srcu_read_unlock(&connections_srcu, idx);
9754798cbbfSAlexander Aring 				goto accept_err;
9764798cbbfSAlexander Aring 			}
9774798cbbfSAlexander Aring 
978e9a470acSAlexander Aring 			lockdep_set_subclass(&othercon->sock_mutex, 1);
9797443bc96SAlexander Aring 			set_bit(CF_IS_OTHERCON, &othercon->flags);
9806cde210aSAlexander Aring 			newcon->othercon = othercon;
981ba868d9dSAlexander Aring 			othercon->sendcon = newcon;
982ba3ab3caSAlexander Aring 		} else {
983ba3ab3caSAlexander Aring 			/* close other sock con if we have something new */
984ba3ab3caSAlexander Aring 			close_connection(othercon, false, true, false);
98561d96be0SPatrick Caulfield 		}
986ba3ab3caSAlexander Aring 
987e9a470acSAlexander Aring 		mutex_lock(&othercon->sock_mutex);
988988419a9Stsutomu.owa@toshiba.co.jp 		add_sock(newsock, othercon);
9896ed7257bSPatrick Caulfield 		addcon = othercon;
990c7355827Stsutomu.owa@toshiba.co.jp 		mutex_unlock(&othercon->sock_mutex);
9916ed7257bSPatrick Caulfield 	}
9926ed7257bSPatrick Caulfield 	else {
9933735b4b9SBob Peterson 		/* accept copies the sk after we've saved the callbacks, so we
9943735b4b9SBob Peterson 		   don't want to save them a second time or comm errors will
9953735b4b9SBob Peterson 		   result in calling sk_error_report recursively. */
996988419a9Stsutomu.owa@toshiba.co.jp 		add_sock(newsock, newcon);
9976ed7257bSPatrick Caulfield 		addcon = newcon;
9986ed7257bSPatrick Caulfield 	}
9996ed7257bSPatrick Caulfield 
1000b30a624fSAlexander Aring 	set_bit(CF_CONNECTED, &addcon->flags);
10016ed7257bSPatrick Caulfield 	mutex_unlock(&newcon->sock_mutex);
10026ed7257bSPatrick Caulfield 
10036ed7257bSPatrick Caulfield 	/*
10046ed7257bSPatrick Caulfield 	 * Add it to the active queue in case we got data
100525985edcSLucas De Marchi 	 * between processing the accept adding the socket
10066ed7257bSPatrick Caulfield 	 * to the read_sockets list
10076ed7257bSPatrick Caulfield 	 */
10086ed7257bSPatrick Caulfield 	if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
10096ed7257bSPatrick Caulfield 		queue_work(recv_workqueue, &addcon->rwork);
10106ed7257bSPatrick Caulfield 
1011b38bc9c2SAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
1012b38bc9c2SAlexander Aring 
10136ed7257bSPatrick Caulfield 	return 0;
10146ed7257bSPatrick Caulfield 
10156ed7257bSPatrick Caulfield accept_err:
10163421fb15Stsutomu.owa@toshiba.co.jp 	if (newsock)
10176ed7257bSPatrick Caulfield 		sock_release(newsock);
10186ed7257bSPatrick Caulfield 
10196ed7257bSPatrick Caulfield 	if (result != -EAGAIN)
1020617e82e1SDavid Teigland 		log_print("error accepting connection from node: %d", result);
10216ed7257bSPatrick Caulfield 	return result;
10226ed7257bSPatrick Caulfield }
10236ed7257bSPatrick Caulfield 
10245d689871SMike Christie /*
10255d689871SMike Christie  * writequeue_entry_complete - try to delete and free write queue entry
10265d689871SMike Christie  * @e: write queue entry to try to delete
10275d689871SMike Christie  * @completed: bytes completed
10285d689871SMike Christie  *
10295d689871SMike Christie  * writequeue_lock must be held.
10305d689871SMike Christie  */
10315d689871SMike Christie static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
10325d689871SMike Christie {
10335d689871SMike Christie 	e->offset += completed;
10345d689871SMike Christie 	e->len -= completed;
1035706474fbSAlexander Aring 	/* signal that page was half way transmitted */
1036706474fbSAlexander Aring 	e->dirty = true;
10375d689871SMike Christie 
10388f2dc78dSAlexander Aring 	if (e->len == 0 && e->users == 0)
10395d689871SMike Christie 		free_entry(e);
10405d689871SMike Christie }
10415d689871SMike Christie 
1042ee44b4bcSMarcelo Ricardo Leitner /*
1043ee44b4bcSMarcelo Ricardo Leitner  * sctp_bind_addrs - bind a SCTP socket to all our addresses
1044ee44b4bcSMarcelo Ricardo Leitner  */
104513004e8aSAlexander Aring static int sctp_bind_addrs(struct socket *sock, uint16_t port)
1046ee44b4bcSMarcelo Ricardo Leitner {
1047ee44b4bcSMarcelo Ricardo Leitner 	struct sockaddr_storage localaddr;
1048c0425a42SChristoph Hellwig 	struct sockaddr *addr = (struct sockaddr *)&localaddr;
1049ee44b4bcSMarcelo Ricardo Leitner 	int i, addr_len, result = 0;
1050ee44b4bcSMarcelo Ricardo Leitner 
1051ee44b4bcSMarcelo Ricardo Leitner 	for (i = 0; i < dlm_local_count; i++) {
1052c51c9cd8SAlexander Aring 		memcpy(&localaddr, &dlm_local_addr[i], sizeof(localaddr));
1053ee44b4bcSMarcelo Ricardo Leitner 		make_sockaddr(&localaddr, port, &addr_len);
1054ee44b4bcSMarcelo Ricardo Leitner 
1055ee44b4bcSMarcelo Ricardo Leitner 		if (!i)
105613004e8aSAlexander Aring 			result = kernel_bind(sock, addr, addr_len);
1057ee44b4bcSMarcelo Ricardo Leitner 		else
105813004e8aSAlexander Aring 			result = sock_bind_add(sock->sk, addr, addr_len);
1059ee44b4bcSMarcelo Ricardo Leitner 
1060ee44b4bcSMarcelo Ricardo Leitner 		if (result < 0) {
1061ee44b4bcSMarcelo Ricardo Leitner 			log_print("Can't bind to %d addr number %d, %d.\n",
1062ee44b4bcSMarcelo Ricardo Leitner 				  port, i + 1, result);
1063ee44b4bcSMarcelo Ricardo Leitner 			break;
1064ee44b4bcSMarcelo Ricardo Leitner 		}
1065ee44b4bcSMarcelo Ricardo Leitner 	}
1066ee44b4bcSMarcelo Ricardo Leitner 	return result;
1067ee44b4bcSMarcelo Ricardo Leitner }
1068ee44b4bcSMarcelo Ricardo Leitner 
10696ed7257bSPatrick Caulfield /* Get local addresses */
10706ed7257bSPatrick Caulfield static void init_local(void)
10716ed7257bSPatrick Caulfield {
1072c51c9cd8SAlexander Aring 	struct sockaddr_storage sas;
10736ed7257bSPatrick Caulfield 	int i;
10746ed7257bSPatrick Caulfield 
107530d3a237SPatrick Caulfield 	dlm_local_count = 0;
10761b189b88SDavid Teigland 	for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
10776ed7257bSPatrick Caulfield 		if (dlm_our_addr(&sas, i))
10786ed7257bSPatrick Caulfield 			break;
10796ed7257bSPatrick Caulfield 
1080c51c9cd8SAlexander Aring 		memcpy(&dlm_local_addr[dlm_local_count++], &sas, sizeof(sas));
10816ed7257bSPatrick Caulfield 	}
10826ed7257bSPatrick Caulfield }
10836ed7257bSPatrick Caulfield 
1084be3b0400SAlexander Aring static struct writequeue_entry *new_writequeue_entry(struct connection *con)
10856ed7257bSPatrick Caulfield {
10866ed7257bSPatrick Caulfield 	struct writequeue_entry *entry;
10876ed7257bSPatrick Caulfield 
10883af2326cSAlexander Aring 	entry = dlm_allocate_writequeue();
10896ed7257bSPatrick Caulfield 	if (!entry)
10906ed7257bSPatrick Caulfield 		return NULL;
10916ed7257bSPatrick Caulfield 
1092be3b0400SAlexander Aring 	entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
10936ed7257bSPatrick Caulfield 	if (!entry->page) {
10943af2326cSAlexander Aring 		dlm_free_writequeue(entry);
10956ed7257bSPatrick Caulfield 		return NULL;
10966ed7257bSPatrick Caulfield 	}
10976ed7257bSPatrick Caulfield 
10983af2326cSAlexander Aring 	entry->offset = 0;
10993af2326cSAlexander Aring 	entry->len = 0;
11003af2326cSAlexander Aring 	entry->end = 0;
11013af2326cSAlexander Aring 	entry->dirty = false;
11026ed7257bSPatrick Caulfield 	entry->con = con;
1103f0747ebfSAlexander Aring 	entry->users = 1;
11048f2dc78dSAlexander Aring 	kref_init(&entry->ref);
11056ed7257bSPatrick Caulfield 	return entry;
11066ed7257bSPatrick Caulfield }
11076ed7257bSPatrick Caulfield 
1108f0747ebfSAlexander Aring static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
1109be3b0400SAlexander Aring 					     char **ppc, void (*cb)(void *data),
1110be3b0400SAlexander Aring 					     void *data)
1111f0747ebfSAlexander Aring {
1112f0747ebfSAlexander Aring 	struct writequeue_entry *e;
1113f0747ebfSAlexander Aring 
1114f0747ebfSAlexander Aring 	spin_lock(&con->writequeue_lock);
1115f0747ebfSAlexander Aring 	if (!list_empty(&con->writequeue)) {
1116f0747ebfSAlexander Aring 		e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
1117f0747ebfSAlexander Aring 		if (DLM_WQ_REMAIN_BYTES(e) >= len) {
11188f2dc78dSAlexander Aring 			kref_get(&e->ref);
11198f2dc78dSAlexander Aring 
1120f0747ebfSAlexander Aring 			*ppc = page_address(e->page) + e->end;
11218f2dc78dSAlexander Aring 			if (cb)
11225c16febbSAlexander Aring 				cb(data);
11238f2dc78dSAlexander Aring 
1124f0747ebfSAlexander Aring 			e->end += len;
1125f0747ebfSAlexander Aring 			e->users++;
1126be3b0400SAlexander Aring 			goto out;
1127f0747ebfSAlexander Aring 		}
1128f0747ebfSAlexander Aring 	}
1129f0747ebfSAlexander Aring 
1130be3b0400SAlexander Aring 	e = new_writequeue_entry(con);
1131f0747ebfSAlexander Aring 	if (!e)
1132be3b0400SAlexander Aring 		goto out;
1133f0747ebfSAlexander Aring 
11348f2dc78dSAlexander Aring 	kref_get(&e->ref);
1135f0747ebfSAlexander Aring 	*ppc = page_address(e->page);
1136f0747ebfSAlexander Aring 	e->end += len;
11378f2dc78dSAlexander Aring 	if (cb)
11385c16febbSAlexander Aring 		cb(data);
11398f2dc78dSAlexander Aring 
1140f0747ebfSAlexander Aring 	list_add_tail(&e->list, &con->writequeue);
1141f0747ebfSAlexander Aring 
1142be3b0400SAlexander Aring out:
1143be3b0400SAlexander Aring 	spin_unlock(&con->writequeue_lock);
1144f0747ebfSAlexander Aring 	return e;
1145f0747ebfSAlexander Aring };
1146f0747ebfSAlexander Aring 
11472874d1a6SAlexander Aring static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
11482874d1a6SAlexander Aring 						gfp_t allocation, char **ppc,
11495c16febbSAlexander Aring 						void (*cb)(void *data),
11505c16febbSAlexander Aring 						void *data)
11512874d1a6SAlexander Aring {
11522874d1a6SAlexander Aring 	struct writequeue_entry *e;
11532874d1a6SAlexander Aring 	struct dlm_msg *msg;
11542874d1a6SAlexander Aring 
1155e4dc81edSAlexander Aring 	msg = dlm_allocate_msg(allocation);
11562874d1a6SAlexander Aring 	if (!msg)
11572874d1a6SAlexander Aring 		return NULL;
11582874d1a6SAlexander Aring 
11592874d1a6SAlexander Aring 	kref_init(&msg->ref);
11602874d1a6SAlexander Aring 
1161be3b0400SAlexander Aring 	e = new_wq_entry(con, len, ppc, cb, data);
11622874d1a6SAlexander Aring 	if (!e) {
1163e4dc81edSAlexander Aring 		dlm_free_msg(msg);
11642874d1a6SAlexander Aring 		return NULL;
11652874d1a6SAlexander Aring 	}
11662874d1a6SAlexander Aring 
1167e4dc81edSAlexander Aring 	msg->retransmit = false;
1168e4dc81edSAlexander Aring 	msg->orig_msg = NULL;
11692874d1a6SAlexander Aring 	msg->ppc = *ppc;
11702874d1a6SAlexander Aring 	msg->len = len;
11712874d1a6SAlexander Aring 	msg->entry = e;
11722874d1a6SAlexander Aring 
11732874d1a6SAlexander Aring 	return msg;
11742874d1a6SAlexander Aring }
11752874d1a6SAlexander Aring 
1176a8449f23SAlexander Aring /* avoid false positive for nodes_srcu, unlock happens in
1177a8449f23SAlexander Aring  * dlm_lowcomms_commit_msg which is a must call if success
1178a8449f23SAlexander Aring  */
1179a8449f23SAlexander Aring #ifndef __CHECKER__
11808f2dc78dSAlexander Aring struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
11815c16febbSAlexander Aring 				     char **ppc, void (*cb)(void *data),
11825c16febbSAlexander Aring 				     void *data)
11836ed7257bSPatrick Caulfield {
11846ed7257bSPatrick Caulfield 	struct connection *con;
11858f2dc78dSAlexander Aring 	struct dlm_msg *msg;
1186b38bc9c2SAlexander Aring 	int idx;
11876ed7257bSPatrick Caulfield 
1188d10a0b88SAlexander Aring 	if (len > DLM_MAX_SOCKET_BUFSIZE ||
1189c45674fbSAlexander Aring 	    len < sizeof(struct dlm_header)) {
1190d10a0b88SAlexander Aring 		BUILD_BUG_ON(PAGE_SIZE < DLM_MAX_SOCKET_BUFSIZE);
1191692f51c8SAlexander Aring 		log_print("failed to allocate a buffer of size %d", len);
1192c45674fbSAlexander Aring 		WARN_ON(1);
1193692f51c8SAlexander Aring 		return NULL;
1194692f51c8SAlexander Aring 	}
1195692f51c8SAlexander Aring 
1196b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
1197*6f0b0b5dSAlexander Aring 	con = nodeid2con(nodeid, 0);
1198*6f0b0b5dSAlexander Aring 	if (WARN_ON_ONCE(!con)) {
1199b38bc9c2SAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
12006ed7257bSPatrick Caulfield 		return NULL;
1201b38bc9c2SAlexander Aring 	}
12026ed7257bSPatrick Caulfield 
12035c16febbSAlexander Aring 	msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, data);
12048f2dc78dSAlexander Aring 	if (!msg) {
1205b38bc9c2SAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
1206b38bc9c2SAlexander Aring 		return NULL;
1207b38bc9c2SAlexander Aring 	}
1208b38bc9c2SAlexander Aring 
120930ea3257SAlexander Aring 	/* for dlm_lowcomms_commit_msg() */
121030ea3257SAlexander Aring 	kref_get(&msg->ref);
12118f2dc78dSAlexander Aring 	/* we assume if successful commit must called */
12128f2dc78dSAlexander Aring 	msg->idx = idx;
12138f2dc78dSAlexander Aring 	return msg;
12148f2dc78dSAlexander Aring }
1215a8449f23SAlexander Aring #endif
12168f2dc78dSAlexander Aring 
12172874d1a6SAlexander Aring static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg)
12186ed7257bSPatrick Caulfield {
12198f2dc78dSAlexander Aring 	struct writequeue_entry *e = msg->entry;
12206ed7257bSPatrick Caulfield 	struct connection *con = e->con;
12216ed7257bSPatrick Caulfield 	int users;
12226ed7257bSPatrick Caulfield 
12236ed7257bSPatrick Caulfield 	spin_lock(&con->writequeue_lock);
12248f2dc78dSAlexander Aring 	kref_get(&msg->ref);
12258f2dc78dSAlexander Aring 	list_add(&msg->list, &e->msgs);
12268f2dc78dSAlexander Aring 
12276ed7257bSPatrick Caulfield 	users = --e->users;
12286ed7257bSPatrick Caulfield 	if (users)
12296ed7257bSPatrick Caulfield 		goto out;
1230f0747ebfSAlexander Aring 
1231f0747ebfSAlexander Aring 	e->len = DLM_WQ_LENGTH_BYTES(e);
12326ed7257bSPatrick Caulfield 	spin_unlock(&con->writequeue_lock);
12336ed7257bSPatrick Caulfield 
12346ed7257bSPatrick Caulfield 	queue_work(send_workqueue, &con->swork);
12356ed7257bSPatrick Caulfield 	return;
12366ed7257bSPatrick Caulfield 
12376ed7257bSPatrick Caulfield out:
12386ed7257bSPatrick Caulfield 	spin_unlock(&con->writequeue_lock);
12396ed7257bSPatrick Caulfield 	return;
12406ed7257bSPatrick Caulfield }
12416ed7257bSPatrick Caulfield 
1242a8449f23SAlexander Aring /* avoid false positive for nodes_srcu, lock was happen in
1243a8449f23SAlexander Aring  * dlm_lowcomms_new_msg
1244a8449f23SAlexander Aring  */
1245a8449f23SAlexander Aring #ifndef __CHECKER__
12462874d1a6SAlexander Aring void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
12472874d1a6SAlexander Aring {
12482874d1a6SAlexander Aring 	_dlm_lowcomms_commit_msg(msg);
12492874d1a6SAlexander Aring 	srcu_read_unlock(&connections_srcu, msg->idx);
125030ea3257SAlexander Aring 	/* because dlm_lowcomms_new_msg() */
125130ea3257SAlexander Aring 	kref_put(&msg->ref, dlm_msg_release);
12522874d1a6SAlexander Aring }
1253a8449f23SAlexander Aring #endif
12542874d1a6SAlexander Aring 
12558f2dc78dSAlexander Aring void dlm_lowcomms_put_msg(struct dlm_msg *msg)
12568f2dc78dSAlexander Aring {
12578f2dc78dSAlexander Aring 	kref_put(&msg->ref, dlm_msg_release);
12588f2dc78dSAlexander Aring }
12598f2dc78dSAlexander Aring 
12602874d1a6SAlexander Aring /* does not held connections_srcu, usage workqueue only */
12612874d1a6SAlexander Aring int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
12622874d1a6SAlexander Aring {
12632874d1a6SAlexander Aring 	struct dlm_msg *msg_resend;
12642874d1a6SAlexander Aring 	char *ppc;
12652874d1a6SAlexander Aring 
12662874d1a6SAlexander Aring 	if (msg->retransmit)
12672874d1a6SAlexander Aring 		return 1;
12682874d1a6SAlexander Aring 
12692874d1a6SAlexander Aring 	msg_resend = dlm_lowcomms_new_msg_con(msg->entry->con, msg->len,
12702874d1a6SAlexander Aring 					      GFP_ATOMIC, &ppc, NULL, NULL);
12712874d1a6SAlexander Aring 	if (!msg_resend)
12722874d1a6SAlexander Aring 		return -ENOMEM;
12732874d1a6SAlexander Aring 
12742874d1a6SAlexander Aring 	msg->retransmit = true;
12752874d1a6SAlexander Aring 	kref_get(&msg->ref);
12762874d1a6SAlexander Aring 	msg_resend->orig_msg = msg;
12772874d1a6SAlexander Aring 
12782874d1a6SAlexander Aring 	memcpy(ppc, msg->ppc, msg->len);
12792874d1a6SAlexander Aring 	_dlm_lowcomms_commit_msg(msg_resend);
12802874d1a6SAlexander Aring 	dlm_lowcomms_put_msg(msg_resend);
12812874d1a6SAlexander Aring 
12822874d1a6SAlexander Aring 	return 0;
12832874d1a6SAlexander Aring }
12842874d1a6SAlexander Aring 
12856ed7257bSPatrick Caulfield /* Send a message */
12866ed7257bSPatrick Caulfield static void send_to_sock(struct connection *con)
12876ed7257bSPatrick Caulfield {
12886ed7257bSPatrick Caulfield 	const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
12896ed7257bSPatrick Caulfield 	struct writequeue_entry *e;
129066d5955aSAlexander Aring 	int len, offset, ret;
1291194a3fb4SAlexander Aring 	int count;
1292194a3fb4SAlexander Aring 
1293194a3fb4SAlexander Aring again:
1294194a3fb4SAlexander Aring 	count = 0;
12956ed7257bSPatrick Caulfield 
12966ed7257bSPatrick Caulfield 	mutex_lock(&con->sock_mutex);
12976ed7257bSPatrick Caulfield 	if (con->sock == NULL)
12986ed7257bSPatrick Caulfield 		goto out_connect;
12996ed7257bSPatrick Caulfield 
13006ed7257bSPatrick Caulfield 	spin_lock(&con->writequeue_lock);
13016ed7257bSPatrick Caulfield 	for (;;) {
130266d5955aSAlexander Aring 		e = con_next_wq(con);
130366d5955aSAlexander Aring 		if (!e)
13046ed7257bSPatrick Caulfield 			break;
13056ed7257bSPatrick Caulfield 
13066ed7257bSPatrick Caulfield 		len = e->len;
13076ed7257bSPatrick Caulfield 		offset = e->offset;
13086ed7257bSPatrick Caulfield 		BUG_ON(len == 0 && e->users == 0);
13096ed7257bSPatrick Caulfield 		spin_unlock(&con->writequeue_lock);
13106ed7257bSPatrick Caulfield 
13111329e3f2SPaolo Bonzini 		ret = kernel_sendpage(con->sock, e->page, offset, len,
13126ed7257bSPatrick Caulfield 				      msg_flags);
131392732376SAlexander Aring 		trace_dlm_send(con->nodeid, ret);
1314d66f8277SPatrick Caulfield 		if (ret == -EAGAIN || ret == 0) {
1315b36930ddSDavid Miller 			if (ret == -EAGAIN &&
13169cd3e072SEric Dumazet 			    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
1317b36930ddSDavid Miller 			    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1318b36930ddSDavid Miller 				/* Notify TCP that we're limited by the
1319b36930ddSDavid Miller 				 * application window size.
1320b36930ddSDavid Miller 				 */
1321b36930ddSDavid Miller 				set_bit(SOCK_NOSPACE, &con->sock->flags);
1322b36930ddSDavid Miller 				con->sock->sk->sk_write_pending++;
1323b36930ddSDavid Miller 			}
1324d66f8277SPatrick Caulfield 			cond_resched();
13256ed7257bSPatrick Caulfield 			goto out;
13269c5bef58SYing Xue 		} else if (ret < 0)
1327ba868d9dSAlexander Aring 			goto out;
1328f92c8dd7SBob Peterson 
13296ed7257bSPatrick Caulfield 		spin_lock(&con->writequeue_lock);
13305d689871SMike Christie 		writequeue_entry_complete(e, ret);
1331194a3fb4SAlexander Aring 
1332194a3fb4SAlexander Aring 		/* Don't starve people filling buffers */
1333194a3fb4SAlexander Aring 		if (++count >= MAX_SEND_MSG_COUNT) {
1334194a3fb4SAlexander Aring 			spin_unlock(&con->writequeue_lock);
1335194a3fb4SAlexander Aring 			mutex_unlock(&con->sock_mutex);
1336194a3fb4SAlexander Aring 			cond_resched();
1337194a3fb4SAlexander Aring 			goto again;
1338194a3fb4SAlexander Aring 		}
13396ed7257bSPatrick Caulfield 	}
13406ed7257bSPatrick Caulfield 	spin_unlock(&con->writequeue_lock);
13418aa31cbfSAlexander Aring 
13426ed7257bSPatrick Caulfield out:
13436ed7257bSPatrick Caulfield 	mutex_unlock(&con->sock_mutex);
13446ed7257bSPatrick Caulfield 	return;
13456ed7257bSPatrick Caulfield 
13466ed7257bSPatrick Caulfield out_connect:
13476ed7257bSPatrick Caulfield 	mutex_unlock(&con->sock_mutex);
134801da24d3SBob Peterson 	queue_work(send_workqueue, &con->swork);
134901da24d3SBob Peterson 	cond_resched();
13506ed7257bSPatrick Caulfield }
13516ed7257bSPatrick Caulfield 
13526ed7257bSPatrick Caulfield static void clean_one_writequeue(struct connection *con)
13536ed7257bSPatrick Caulfield {
13545e9ccc37SChristine Caulfield 	struct writequeue_entry *e, *safe;
13556ed7257bSPatrick Caulfield 
13566ed7257bSPatrick Caulfield 	spin_lock(&con->writequeue_lock);
13575e9ccc37SChristine Caulfield 	list_for_each_entry_safe(e, safe, &con->writequeue, list) {
13586ed7257bSPatrick Caulfield 		free_entry(e);
13596ed7257bSPatrick Caulfield 	}
13606ed7257bSPatrick Caulfield 	spin_unlock(&con->writequeue_lock);
13616ed7257bSPatrick Caulfield }
13626ed7257bSPatrick Caulfield 
1363*6f0b0b5dSAlexander Aring static void connection_release(struct rcu_head *rcu)
1364*6f0b0b5dSAlexander Aring {
1365*6f0b0b5dSAlexander Aring 	struct connection *con = container_of(rcu, struct connection, rcu);
1366*6f0b0b5dSAlexander Aring 
1367*6f0b0b5dSAlexander Aring 	kfree(con->rx_buf);
1368*6f0b0b5dSAlexander Aring 	kfree(con);
1369*6f0b0b5dSAlexander Aring }
1370*6f0b0b5dSAlexander Aring 
13716ed7257bSPatrick Caulfield /* Called from recovery when it knows that a node has
13726ed7257bSPatrick Caulfield    left the cluster */
13736ed7257bSPatrick Caulfield int dlm_lowcomms_close(int nodeid)
13746ed7257bSPatrick Caulfield {
13756ed7257bSPatrick Caulfield 	struct connection *con;
1376b38bc9c2SAlexander Aring 	int idx;
13776ed7257bSPatrick Caulfield 
13786ed7257bSPatrick Caulfield 	log_print("closing connection to node %d", nodeid);
1379*6f0b0b5dSAlexander Aring 
1380b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
13816ed7257bSPatrick Caulfield 	con = nodeid2con(nodeid, 0);
1382*6f0b0b5dSAlexander Aring 	if (WARN_ON_ONCE(!con)) {
1383*6f0b0b5dSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
1384*6f0b0b5dSAlexander Aring 		return -ENOENT;
1385*6f0b0b5dSAlexander Aring 	}
1386*6f0b0b5dSAlexander Aring 
1387*6f0b0b5dSAlexander Aring 	spin_lock(&connections_lock);
1388*6f0b0b5dSAlexander Aring 	hlist_del_rcu(&con->list);
1389*6f0b0b5dSAlexander Aring 	spin_unlock(&connections_lock);
1390*6f0b0b5dSAlexander Aring 
13910d737a8cSMarcelo Ricardo Leitner 	close_connection(con, true, true, true);
1392*6f0b0b5dSAlexander Aring 
13936ed7257bSPatrick Caulfield 	clean_one_writequeue(con);
1394*6f0b0b5dSAlexander Aring 	call_srcu(&connections_srcu, &con->rcu, connection_release);
1395*6f0b0b5dSAlexander Aring 	if (con->othercon) {
139653a5edaaSAlexander Aring 		clean_one_writequeue(con->othercon);
1397*6f0b0b5dSAlexander Aring 		if (con->othercon)
1398*6f0b0b5dSAlexander Aring 			call_srcu(&connections_srcu, &con->othercon->rcu, connection_release);
13996ed7257bSPatrick Caulfield 	}
1400b38bc9c2SAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
140136b71a8bSDavid Teigland 
14026ed7257bSPatrick Caulfield 	return 0;
14036ed7257bSPatrick Caulfield }
14046ed7257bSPatrick Caulfield 
14056ed7257bSPatrick Caulfield /* Receive workqueue function */
14066ed7257bSPatrick Caulfield static void process_recv_sockets(struct work_struct *work)
14076ed7257bSPatrick Caulfield {
14086ed7257bSPatrick Caulfield 	struct connection *con = container_of(work, struct connection, rwork);
14096ed7257bSPatrick Caulfield 
14106ed7257bSPatrick Caulfield 	clear_bit(CF_READ_PENDING, &con->flags);
141162699b3fSAlexander Aring 	receive_from_sock(con);
14126ed7257bSPatrick Caulfield }
14136ed7257bSPatrick Caulfield 
1414d11ccd45SAlexander Aring static void process_listen_recv_socket(struct work_struct *work)
1415d11ccd45SAlexander Aring {
1416f0f4bb43SAlexander Aring 	int ret;
1417f0f4bb43SAlexander Aring 
1418f0f4bb43SAlexander Aring 	do {
1419f0f4bb43SAlexander Aring 		ret = accept_from_sock(&listen_con);
1420f0f4bb43SAlexander Aring 	} while (!ret);
1421d11ccd45SAlexander Aring }
1422d11ccd45SAlexander Aring 
14238728a455SAlexander Aring static void dlm_connect(struct connection *con)
14248728a455SAlexander Aring {
14258728a455SAlexander Aring 	struct sockaddr_storage addr;
14268728a455SAlexander Aring 	int result, addr_len;
14278728a455SAlexander Aring 	struct socket *sock;
14288728a455SAlexander Aring 	unsigned int mark;
14298728a455SAlexander Aring 
14308728a455SAlexander Aring 	/* Some odd races can cause double-connects, ignore them */
14318728a455SAlexander Aring 	if (con->retries++ > MAX_CONNECT_RETRIES)
14328728a455SAlexander Aring 		return;
14338728a455SAlexander Aring 
14348728a455SAlexander Aring 	if (con->sock) {
14358728a455SAlexander Aring 		log_print("node %d already connected.", con->nodeid);
14368728a455SAlexander Aring 		return;
14378728a455SAlexander Aring 	}
14388728a455SAlexander Aring 
14398728a455SAlexander Aring 	memset(&addr, 0, sizeof(addr));
14408728a455SAlexander Aring 	result = nodeid_to_addr(con->nodeid, &addr, NULL,
14418728a455SAlexander Aring 				dlm_proto_ops->try_new_addr, &mark);
14428728a455SAlexander Aring 	if (result < 0) {
14438728a455SAlexander Aring 		log_print("no address for nodeid %d", con->nodeid);
14448728a455SAlexander Aring 		return;
14458728a455SAlexander Aring 	}
14468728a455SAlexander Aring 
14478728a455SAlexander Aring 	/* Create a socket to communicate with */
1448c51c9cd8SAlexander Aring 	result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family,
14498728a455SAlexander Aring 				  SOCK_STREAM, dlm_proto_ops->proto, &sock);
14508728a455SAlexander Aring 	if (result < 0)
14518728a455SAlexander Aring 		goto socket_err;
14528728a455SAlexander Aring 
14538728a455SAlexander Aring 	sock_set_mark(sock->sk, mark);
14548728a455SAlexander Aring 	dlm_proto_ops->sockopts(sock);
14558728a455SAlexander Aring 
14568728a455SAlexander Aring 	add_sock(sock, con);
14578728a455SAlexander Aring 
14588728a455SAlexander Aring 	result = dlm_proto_ops->bind(sock);
14598728a455SAlexander Aring 	if (result < 0)
14608728a455SAlexander Aring 		goto add_sock_err;
14618728a455SAlexander Aring 
14628728a455SAlexander Aring 	log_print_ratelimited("connecting to %d", con->nodeid);
14638728a455SAlexander Aring 	make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len);
14648728a455SAlexander Aring 	result = dlm_proto_ops->connect(con, sock, (struct sockaddr *)&addr,
14658728a455SAlexander Aring 					addr_len);
14668728a455SAlexander Aring 	if (result < 0)
14678728a455SAlexander Aring 		goto add_sock_err;
14688728a455SAlexander Aring 
14698728a455SAlexander Aring 	return;
14708728a455SAlexander Aring 
14718728a455SAlexander Aring add_sock_err:
14728728a455SAlexander Aring 	dlm_close_sock(&con->sock);
14738728a455SAlexander Aring 
14748728a455SAlexander Aring socket_err:
14758728a455SAlexander Aring 	/*
14768728a455SAlexander Aring 	 * Some errors are fatal and this list might need adjusting. For other
14778728a455SAlexander Aring 	 * errors we try again until the max number of retries is reached.
14788728a455SAlexander Aring 	 */
14798728a455SAlexander Aring 	if (result != -EHOSTUNREACH &&
14808728a455SAlexander Aring 	    result != -ENETUNREACH &&
14818728a455SAlexander Aring 	    result != -ENETDOWN &&
14828728a455SAlexander Aring 	    result != -EINVAL &&
14838728a455SAlexander Aring 	    result != -EPROTONOSUPPORT) {
14848728a455SAlexander Aring 		log_print("connect %d try %d error %d", con->nodeid,
14858728a455SAlexander Aring 			  con->retries, result);
14868728a455SAlexander Aring 		msleep(1000);
14878728a455SAlexander Aring 		lowcomms_connect_sock(con);
14888728a455SAlexander Aring 	}
14898728a455SAlexander Aring }
14908728a455SAlexander Aring 
14916ed7257bSPatrick Caulfield /* Send workqueue function */
14926ed7257bSPatrick Caulfield static void process_send_sockets(struct work_struct *work)
14936ed7257bSPatrick Caulfield {
14946ed7257bSPatrick Caulfield 	struct connection *con = container_of(work, struct connection, swork);
14956ed7257bSPatrick Caulfield 
14967443bc96SAlexander Aring 	WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
14977443bc96SAlexander Aring 
14988a4abb08Stsutomu.owa@toshiba.co.jp 	clear_bit(CF_WRITE_PENDING, &con->flags);
1499ba868d9dSAlexander Aring 
1500489d8e55SAlexander Aring 	if (test_and_clear_bit(CF_RECONNECT, &con->flags)) {
1501ba868d9dSAlexander Aring 		close_connection(con, false, false, true);
1502489d8e55SAlexander Aring 		dlm_midcomms_unack_msg_resend(con->nodeid);
1503489d8e55SAlexander Aring 	}
1504ba868d9dSAlexander Aring 
15058728a455SAlexander Aring 	if (con->sock == NULL) {
1506ba868d9dSAlexander Aring 		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
1507ba868d9dSAlexander Aring 			msleep(1000);
15088728a455SAlexander Aring 
15098728a455SAlexander Aring 		mutex_lock(&con->sock_mutex);
15108728a455SAlexander Aring 		dlm_connect(con);
15118728a455SAlexander Aring 		mutex_unlock(&con->sock_mutex);
1512ba868d9dSAlexander Aring 	}
15138728a455SAlexander Aring 
151401da24d3SBob Peterson 	if (!list_empty(&con->writequeue))
15156ed7257bSPatrick Caulfield 		send_to_sock(con);
15166ed7257bSPatrick Caulfield }
15176ed7257bSPatrick Caulfield 
15186ed7257bSPatrick Caulfield static void work_stop(void)
15196ed7257bSPatrick Caulfield {
1520fcef0e6cSAlexander Aring 	if (recv_workqueue) {
15216ed7257bSPatrick Caulfield 		destroy_workqueue(recv_workqueue);
1522fcef0e6cSAlexander Aring 		recv_workqueue = NULL;
1523fcef0e6cSAlexander Aring 	}
1524fcef0e6cSAlexander Aring 
1525fcef0e6cSAlexander Aring 	if (send_workqueue) {
15266ed7257bSPatrick Caulfield 		destroy_workqueue(send_workqueue);
1527fcef0e6cSAlexander Aring 		send_workqueue = NULL;
1528fcef0e6cSAlexander Aring 	}
15296ed7257bSPatrick Caulfield }
15306ed7257bSPatrick Caulfield 
15316ed7257bSPatrick Caulfield static int work_start(void)
15326ed7257bSPatrick Caulfield {
15336c6a1cc6SAlexander Aring 	recv_workqueue = alloc_ordered_workqueue("dlm_recv", WQ_MEM_RECLAIM);
1534b9d41052SNamhyung Kim 	if (!recv_workqueue) {
1535b9d41052SNamhyung Kim 		log_print("can't start dlm_recv");
1536b9d41052SNamhyung Kim 		return -ENOMEM;
15376ed7257bSPatrick Caulfield 	}
15386ed7257bSPatrick Caulfield 
15396c6a1cc6SAlexander Aring 	send_workqueue = alloc_ordered_workqueue("dlm_send", WQ_MEM_RECLAIM);
1540b9d41052SNamhyung Kim 	if (!send_workqueue) {
1541b9d41052SNamhyung Kim 		log_print("can't start dlm_send");
15426ed7257bSPatrick Caulfield 		destroy_workqueue(recv_workqueue);
1543fcef0e6cSAlexander Aring 		recv_workqueue = NULL;
1544b9d41052SNamhyung Kim 		return -ENOMEM;
15456ed7257bSPatrick Caulfield 	}
15466ed7257bSPatrick Caulfield 
15476ed7257bSPatrick Caulfield 	return 0;
15486ed7257bSPatrick Caulfield }
15496ed7257bSPatrick Caulfield 
15509d232469SAlexander Aring void dlm_lowcomms_shutdown(void)
15519d232469SAlexander Aring {
1552c3d88dfdSAlexander Aring 	/* stop lowcomms_listen_data_ready calls */
1553c3d88dfdSAlexander Aring 	lock_sock(listen_con.sock->sk);
1554c3d88dfdSAlexander Aring 	listen_con.sock->sk->sk_data_ready = listen_sock.sk_data_ready;
1555c3d88dfdSAlexander Aring 	release_sock(listen_con.sock->sk);
15569d232469SAlexander Aring 
1557c3d88dfdSAlexander Aring 	cancel_work_sync(&listen_con.rwork);
15589d232469SAlexander Aring 	dlm_close_sock(&listen_con.sock);
15594f567acbSAlexander Aring }
15604f567acbSAlexander Aring 
15614f567acbSAlexander Aring void dlm_lowcomms_shutdown_node(int nodeid, bool force)
15624f567acbSAlexander Aring {
15634f567acbSAlexander Aring 	struct connection *con;
15644f567acbSAlexander Aring 	int idx;
15659d232469SAlexander Aring 
1566b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
15674f567acbSAlexander Aring 	con = nodeid2con(nodeid, 0);
15684f567acbSAlexander Aring 	if (WARN_ON_ONCE(!con)) {
15694f567acbSAlexander Aring 		srcu_read_unlock(&connections_srcu, idx);
15704f567acbSAlexander Aring 		return;
15714f567acbSAlexander Aring 	}
15724f567acbSAlexander Aring 
1573c3d88dfdSAlexander Aring 	flush_work(&con->swork);
15744f567acbSAlexander Aring 	WARN_ON_ONCE(!force && !list_empty(&con->writequeue));
15754f567acbSAlexander Aring 	clean_one_writequeue(con);
15764f567acbSAlexander Aring 	if (con->othercon)
15774f567acbSAlexander Aring 		clean_one_writequeue(con->othercon);
15784f567acbSAlexander Aring 	close_connection(con, true, true, true);
1579b38bc9c2SAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
15809d232469SAlexander Aring }
15819d232469SAlexander Aring 
1582f0fb83cbStsutomu.owa@toshiba.co.jp static void _stop_conn(struct connection *con, bool and_other)
15836ed7257bSPatrick Caulfield {
1584f0fb83cbStsutomu.owa@toshiba.co.jp 	mutex_lock(&con->sock_mutex);
1585173a31feStsutomu.owa@toshiba.co.jp 	set_bit(CF_CLOSE, &con->flags);
1586f0fb83cbStsutomu.owa@toshiba.co.jp 	set_bit(CF_READ_PENDING, &con->flags);
15878a4abb08Stsutomu.owa@toshiba.co.jp 	set_bit(CF_WRITE_PENDING, &con->flags);
158893eaadebStsutomu.owa@toshiba.co.jp 	if (con->sock && con->sock->sk) {
158992c44605SAlexander Aring 		lock_sock(con->sock->sk);
1590afb853fbSPatrick Caulfield 		con->sock->sk->sk_user_data = NULL;
159192c44605SAlexander Aring 		release_sock(con->sock->sk);
159293eaadebStsutomu.owa@toshiba.co.jp 	}
1593f0fb83cbStsutomu.owa@toshiba.co.jp 	if (con->othercon && and_other)
1594f0fb83cbStsutomu.owa@toshiba.co.jp 		_stop_conn(con->othercon, false);
1595f0fb83cbStsutomu.owa@toshiba.co.jp 	mutex_unlock(&con->sock_mutex);
1596f0fb83cbStsutomu.owa@toshiba.co.jp }
1597f0fb83cbStsutomu.owa@toshiba.co.jp 
1598f0fb83cbStsutomu.owa@toshiba.co.jp static void stop_conn(struct connection *con)
1599f0fb83cbStsutomu.owa@toshiba.co.jp {
1600f0fb83cbStsutomu.owa@toshiba.co.jp 	_stop_conn(con, true);
1601afb853fbSPatrick Caulfield }
16025e9ccc37SChristine Caulfield 
16035e9ccc37SChristine Caulfield static void free_conn(struct connection *con)
16045e9ccc37SChristine Caulfield {
16050d737a8cSMarcelo Ricardo Leitner 	close_connection(con, true, true, true);
16066ed7257bSPatrick Caulfield }
16075e9ccc37SChristine Caulfield 
1608f0fb83cbStsutomu.owa@toshiba.co.jp static void work_flush(void)
1609f0fb83cbStsutomu.owa@toshiba.co.jp {
1610b38bc9c2SAlexander Aring 	int ok;
1611f0fb83cbStsutomu.owa@toshiba.co.jp 	int i;
1612f0fb83cbStsutomu.owa@toshiba.co.jp 	struct connection *con;
1613f0fb83cbStsutomu.owa@toshiba.co.jp 
1614f0fb83cbStsutomu.owa@toshiba.co.jp 	do {
1615f0fb83cbStsutomu.owa@toshiba.co.jp 		ok = 1;
1616f0fb83cbStsutomu.owa@toshiba.co.jp 		foreach_conn(stop_conn);
1617b355516fSDavid Windsor 		if (recv_workqueue)
1618f0fb83cbStsutomu.owa@toshiba.co.jp 			flush_workqueue(recv_workqueue);
1619b355516fSDavid Windsor 		if (send_workqueue)
1620f0fb83cbStsutomu.owa@toshiba.co.jp 			flush_workqueue(send_workqueue);
1621f0fb83cbStsutomu.owa@toshiba.co.jp 		for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
1622a47666ebSAlexander Aring 			hlist_for_each_entry_rcu(con, &connection_hash[i],
1623a47666ebSAlexander Aring 						 list) {
1624f0fb83cbStsutomu.owa@toshiba.co.jp 				ok &= test_bit(CF_READ_PENDING, &con->flags);
16258a4abb08Stsutomu.owa@toshiba.co.jp 				ok &= test_bit(CF_WRITE_PENDING, &con->flags);
16268a4abb08Stsutomu.owa@toshiba.co.jp 				if (con->othercon) {
1627f0fb83cbStsutomu.owa@toshiba.co.jp 					ok &= test_bit(CF_READ_PENDING,
1628f0fb83cbStsutomu.owa@toshiba.co.jp 						       &con->othercon->flags);
16298a4abb08Stsutomu.owa@toshiba.co.jp 					ok &= test_bit(CF_WRITE_PENDING,
16308a4abb08Stsutomu.owa@toshiba.co.jp 						       &con->othercon->flags);
16318a4abb08Stsutomu.owa@toshiba.co.jp 				}
1632f0fb83cbStsutomu.owa@toshiba.co.jp 			}
1633f0fb83cbStsutomu.owa@toshiba.co.jp 		}
1634f0fb83cbStsutomu.owa@toshiba.co.jp 	} while (!ok);
1635f0fb83cbStsutomu.owa@toshiba.co.jp }
1636f0fb83cbStsutomu.owa@toshiba.co.jp 
16375e9ccc37SChristine Caulfield void dlm_lowcomms_stop(void)
16385e9ccc37SChristine Caulfield {
1639b38bc9c2SAlexander Aring 	int idx;
1640b38bc9c2SAlexander Aring 
1641b38bc9c2SAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
1642f0fb83cbStsutomu.owa@toshiba.co.jp 	work_flush();
16433a8db798SMarcelo Ricardo Leitner 	foreach_conn(free_conn);
1644b38bc9c2SAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
16456ed7257bSPatrick Caulfield 	work_stop();
1646a66c008cSAlexander Aring 
1647a66c008cSAlexander Aring 	dlm_proto_ops = NULL;
16486ed7257bSPatrick Caulfield }
16496ed7257bSPatrick Caulfield 
16502dc6b115SAlexander Aring static int dlm_listen_for_all(void)
16512dc6b115SAlexander Aring {
16522dc6b115SAlexander Aring 	struct socket *sock;
16532dc6b115SAlexander Aring 	int result;
16542dc6b115SAlexander Aring 
16552dc6b115SAlexander Aring 	log_print("Using %s for communications",
16562dc6b115SAlexander Aring 		  dlm_proto_ops->name);
16572dc6b115SAlexander Aring 
16582dc6b115SAlexander Aring 	result = dlm_proto_ops->listen_validate();
16592dc6b115SAlexander Aring 	if (result < 0)
16602dc6b115SAlexander Aring 		return result;
16612dc6b115SAlexander Aring 
1662c51c9cd8SAlexander Aring 	result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family,
16632dc6b115SAlexander Aring 				  SOCK_STREAM, dlm_proto_ops->proto, &sock);
16642dc6b115SAlexander Aring 	if (result < 0) {
1665fe933675SAlexander Aring 		log_print("Can't create comms socket: %d", result);
16661f4f1084SDan Carpenter 		return result;
16672dc6b115SAlexander Aring 	}
16682dc6b115SAlexander Aring 
16692dc6b115SAlexander Aring 	sock_set_mark(sock->sk, dlm_config.ci_mark);
16702dc6b115SAlexander Aring 	dlm_proto_ops->listen_sockopts(sock);
16712dc6b115SAlexander Aring 
16722dc6b115SAlexander Aring 	result = dlm_proto_ops->listen_bind(sock);
16732dc6b115SAlexander Aring 	if (result < 0)
16742dc6b115SAlexander Aring 		goto out;
16752dc6b115SAlexander Aring 
1676c3d88dfdSAlexander Aring 	lock_sock(sock->sk);
1677c3d88dfdSAlexander Aring 	listen_sock.sk_data_ready = sock->sk->sk_data_ready;
1678c3d88dfdSAlexander Aring 	listen_sock.sk_write_space = sock->sk->sk_write_space;
1679c3d88dfdSAlexander Aring 	listen_sock.sk_error_report = sock->sk->sk_error_report;
1680c3d88dfdSAlexander Aring 	listen_sock.sk_state_change = sock->sk->sk_state_change;
1681c3d88dfdSAlexander Aring 
1682c3d88dfdSAlexander Aring 	listen_con.sock = sock;
1683c3d88dfdSAlexander Aring 
1684c3d88dfdSAlexander Aring 	sock->sk->sk_allocation = GFP_NOFS;
1685c3d88dfdSAlexander Aring 	sock->sk->sk_data_ready = lowcomms_listen_data_ready;
1686c3d88dfdSAlexander Aring 	release_sock(sock->sk);
16872dc6b115SAlexander Aring 
16882dc6b115SAlexander Aring 	result = sock->ops->listen(sock, 5);
16892dc6b115SAlexander Aring 	if (result < 0) {
16902dc6b115SAlexander Aring 		dlm_close_sock(&listen_con.sock);
169108ae0547SAlexander Aring 		return result;
16922dc6b115SAlexander Aring 	}
16932dc6b115SAlexander Aring 
16942dc6b115SAlexander Aring 	return 0;
16952dc6b115SAlexander Aring 
16962dc6b115SAlexander Aring out:
16972dc6b115SAlexander Aring 	sock_release(sock);
16982dc6b115SAlexander Aring 	return result;
16992dc6b115SAlexander Aring }
17002dc6b115SAlexander Aring 
17018728a455SAlexander Aring static int dlm_tcp_bind(struct socket *sock)
17028728a455SAlexander Aring {
17038728a455SAlexander Aring 	struct sockaddr_storage src_addr;
17048728a455SAlexander Aring 	int result, addr_len;
17058728a455SAlexander Aring 
17068728a455SAlexander Aring 	/* Bind to our cluster-known address connecting to avoid
17078728a455SAlexander Aring 	 * routing problems.
17088728a455SAlexander Aring 	 */
1709c51c9cd8SAlexander Aring 	memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
17108728a455SAlexander Aring 	make_sockaddr(&src_addr, 0, &addr_len);
17118728a455SAlexander Aring 
17128728a455SAlexander Aring 	result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
17138728a455SAlexander Aring 				 addr_len);
17148728a455SAlexander Aring 	if (result < 0) {
17158728a455SAlexander Aring 		/* This *may* not indicate a critical error */
17168728a455SAlexander Aring 		log_print("could not bind for connect: %d", result);
17178728a455SAlexander Aring 	}
17188728a455SAlexander Aring 
17198728a455SAlexander Aring 	return 0;
17208728a455SAlexander Aring }
17218728a455SAlexander Aring 
17228728a455SAlexander Aring static int dlm_tcp_connect(struct connection *con, struct socket *sock,
17238728a455SAlexander Aring 			   struct sockaddr *addr, int addr_len)
17248728a455SAlexander Aring {
17258728a455SAlexander Aring 	int ret;
17268728a455SAlexander Aring 
17278728a455SAlexander Aring 	ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
17288728a455SAlexander Aring 	switch (ret) {
17298728a455SAlexander Aring 	case -EINPROGRESS:
17308728a455SAlexander Aring 		fallthrough;
17318728a455SAlexander Aring 	case 0:
17328728a455SAlexander Aring 		return 0;
17338728a455SAlexander Aring 	}
17348728a455SAlexander Aring 
17358728a455SAlexander Aring 	return ret;
17368728a455SAlexander Aring }
17378728a455SAlexander Aring 
17382dc6b115SAlexander Aring static int dlm_tcp_listen_validate(void)
17392dc6b115SAlexander Aring {
17402dc6b115SAlexander Aring 	/* We don't support multi-homed hosts */
17412dc6b115SAlexander Aring 	if (dlm_local_count > 1) {
17422dc6b115SAlexander Aring 		log_print("TCP protocol can't handle multi-homed hosts, try SCTP");
17432dc6b115SAlexander Aring 		return -EINVAL;
17442dc6b115SAlexander Aring 	}
17452dc6b115SAlexander Aring 
17462dc6b115SAlexander Aring 	return 0;
17472dc6b115SAlexander Aring }
17482dc6b115SAlexander Aring 
17492dc6b115SAlexander Aring static void dlm_tcp_sockopts(struct socket *sock)
17502dc6b115SAlexander Aring {
17512dc6b115SAlexander Aring 	/* Turn off Nagle's algorithm */
17522dc6b115SAlexander Aring 	tcp_sock_set_nodelay(sock->sk);
17532dc6b115SAlexander Aring }
17542dc6b115SAlexander Aring 
17552dc6b115SAlexander Aring static void dlm_tcp_listen_sockopts(struct socket *sock)
17562dc6b115SAlexander Aring {
17572dc6b115SAlexander Aring 	dlm_tcp_sockopts(sock);
17582dc6b115SAlexander Aring 	sock_set_reuseaddr(sock->sk);
17592dc6b115SAlexander Aring }
17602dc6b115SAlexander Aring 
17612dc6b115SAlexander Aring static int dlm_tcp_listen_bind(struct socket *sock)
17622dc6b115SAlexander Aring {
17632dc6b115SAlexander Aring 	int addr_len;
17642dc6b115SAlexander Aring 
17652dc6b115SAlexander Aring 	/* Bind to our port */
1766c51c9cd8SAlexander Aring 	make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
1767c51c9cd8SAlexander Aring 	return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
17682dc6b115SAlexander Aring 			       addr_len);
17692dc6b115SAlexander Aring }
17702dc6b115SAlexander Aring 
1771a66c008cSAlexander Aring static const struct dlm_proto_ops dlm_tcp_ops = {
17722dc6b115SAlexander Aring 	.name = "TCP",
17732dc6b115SAlexander Aring 	.proto = IPPROTO_TCP,
17748728a455SAlexander Aring 	.connect = dlm_tcp_connect,
17758728a455SAlexander Aring 	.sockopts = dlm_tcp_sockopts,
17768728a455SAlexander Aring 	.bind = dlm_tcp_bind,
17772dc6b115SAlexander Aring 	.listen_validate = dlm_tcp_listen_validate,
17782dc6b115SAlexander Aring 	.listen_sockopts = dlm_tcp_listen_sockopts,
17792dc6b115SAlexander Aring 	.listen_bind = dlm_tcp_listen_bind,
1780a66c008cSAlexander Aring };
1781a66c008cSAlexander Aring 
17828728a455SAlexander Aring static int dlm_sctp_bind(struct socket *sock)
17838728a455SAlexander Aring {
17848728a455SAlexander Aring 	return sctp_bind_addrs(sock, 0);
17858728a455SAlexander Aring }
17868728a455SAlexander Aring 
17878728a455SAlexander Aring static int dlm_sctp_connect(struct connection *con, struct socket *sock,
17888728a455SAlexander Aring 			    struct sockaddr *addr, int addr_len)
17898728a455SAlexander Aring {
17908728a455SAlexander Aring 	int ret;
17918728a455SAlexander Aring 
17928728a455SAlexander Aring 	/*
17938728a455SAlexander Aring 	 * Make sock->ops->connect() function return in specified time,
17948728a455SAlexander Aring 	 * since O_NONBLOCK argument in connect() function does not work here,
17958728a455SAlexander Aring 	 * then, we should restore the default value of this attribute.
17968728a455SAlexander Aring 	 */
17978728a455SAlexander Aring 	sock_set_sndtimeo(sock->sk, 5);
17988728a455SAlexander Aring 	ret = sock->ops->connect(sock, addr, addr_len, 0);
17998728a455SAlexander Aring 	sock_set_sndtimeo(sock->sk, 0);
18008728a455SAlexander Aring 	if (ret < 0)
18018728a455SAlexander Aring 		return ret;
18028728a455SAlexander Aring 
18038728a455SAlexander Aring 	if (!test_and_set_bit(CF_CONNECTED, &con->flags))
1804dfc020f3SAlexander Aring 		log_print("connected to node %d", con->nodeid);
18058728a455SAlexander Aring 
18068728a455SAlexander Aring 	return 0;
18078728a455SAlexander Aring }
18088728a455SAlexander Aring 
180990d21fc0SAlexander Aring static int dlm_sctp_listen_validate(void)
181090d21fc0SAlexander Aring {
181190d21fc0SAlexander Aring 	if (!IS_ENABLED(CONFIG_IP_SCTP)) {
181290d21fc0SAlexander Aring 		log_print("SCTP is not enabled by this kernel");
181390d21fc0SAlexander Aring 		return -EOPNOTSUPP;
181490d21fc0SAlexander Aring 	}
181590d21fc0SAlexander Aring 
181690d21fc0SAlexander Aring 	request_module("sctp");
181790d21fc0SAlexander Aring 	return 0;
181890d21fc0SAlexander Aring }
181990d21fc0SAlexander Aring 
18202dc6b115SAlexander Aring static int dlm_sctp_bind_listen(struct socket *sock)
18212dc6b115SAlexander Aring {
18222dc6b115SAlexander Aring 	return sctp_bind_addrs(sock, dlm_config.ci_tcp_port);
18232dc6b115SAlexander Aring }
18242dc6b115SAlexander Aring 
18252dc6b115SAlexander Aring static void dlm_sctp_sockopts(struct socket *sock)
18262dc6b115SAlexander Aring {
18272dc6b115SAlexander Aring 	/* Turn off Nagle's algorithm */
18282dc6b115SAlexander Aring 	sctp_sock_set_nodelay(sock->sk);
18292dc6b115SAlexander Aring 	sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
18302dc6b115SAlexander Aring }
18312dc6b115SAlexander Aring 
1832a66c008cSAlexander Aring static const struct dlm_proto_ops dlm_sctp_ops = {
18332dc6b115SAlexander Aring 	.name = "SCTP",
18342dc6b115SAlexander Aring 	.proto = IPPROTO_SCTP,
18358728a455SAlexander Aring 	.try_new_addr = true,
18368728a455SAlexander Aring 	.connect = dlm_sctp_connect,
18378728a455SAlexander Aring 	.sockopts = dlm_sctp_sockopts,
18388728a455SAlexander Aring 	.bind = dlm_sctp_bind,
183990d21fc0SAlexander Aring 	.listen_validate = dlm_sctp_listen_validate,
18402dc6b115SAlexander Aring 	.listen_sockopts = dlm_sctp_sockopts,
18412dc6b115SAlexander Aring 	.listen_bind = dlm_sctp_bind_listen,
1842a66c008cSAlexander Aring };
1843a66c008cSAlexander Aring 
18446ed7257bSPatrick Caulfield int dlm_lowcomms_start(void)
18456ed7257bSPatrick Caulfield {
18466ed7257bSPatrick Caulfield 	int error = -EINVAL;
18476ed7257bSPatrick Caulfield 
18486ed7257bSPatrick Caulfield 	init_local();
18496ed7257bSPatrick Caulfield 	if (!dlm_local_count) {
1850617e82e1SDavid Teigland 		error = -ENOTCONN;
18516ed7257bSPatrick Caulfield 		log_print("no local IP address has been set");
1852513ef596SDavid Teigland 		goto fail;
18536ed7257bSPatrick Caulfield 	}
18546ed7257bSPatrick Caulfield 
1855513ef596SDavid Teigland 	error = work_start();
1856513ef596SDavid Teigland 	if (error)
1857c51c9cd8SAlexander Aring 		goto fail;
1858513ef596SDavid Teigland 
18596ed7257bSPatrick Caulfield 	/* Start listening */
1860ac7d5d03SAlexander Aring 	switch (dlm_config.ci_protocol) {
1861ac7d5d03SAlexander Aring 	case DLM_PROTO_TCP:
1862a66c008cSAlexander Aring 		dlm_proto_ops = &dlm_tcp_ops;
1863ac7d5d03SAlexander Aring 		break;
1864ac7d5d03SAlexander Aring 	case DLM_PROTO_SCTP:
1865a66c008cSAlexander Aring 		dlm_proto_ops = &dlm_sctp_ops;
1866ac7d5d03SAlexander Aring 		break;
1867ac7d5d03SAlexander Aring 	default:
1868ac7d5d03SAlexander Aring 		log_print("Invalid protocol identifier %d set",
1869ac7d5d03SAlexander Aring 			  dlm_config.ci_protocol);
1870ac7d5d03SAlexander Aring 		error = -EINVAL;
18712dc6b115SAlexander Aring 		goto fail_proto_ops;
1872ac7d5d03SAlexander Aring 	}
18732dc6b115SAlexander Aring 
18742dc6b115SAlexander Aring 	error = dlm_listen_for_all();
18756ed7257bSPatrick Caulfield 	if (error)
18762dc6b115SAlexander Aring 		goto fail_listen;
18776ed7257bSPatrick Caulfield 
18786ed7257bSPatrick Caulfield 	return 0;
18796ed7257bSPatrick Caulfield 
18802dc6b115SAlexander Aring fail_listen:
18812dc6b115SAlexander Aring 	dlm_proto_ops = NULL;
18822dc6b115SAlexander Aring fail_proto_ops:
1883fcef0e6cSAlexander Aring 	work_stop();
1884513ef596SDavid Teigland fail:
18856ed7257bSPatrick Caulfield 	return error;
18866ed7257bSPatrick Caulfield }
188736b71a8bSDavid Teigland 
18888b0188b0SAlexander Aring void dlm_lowcomms_init(void)
18898b0188b0SAlexander Aring {
18908b0188b0SAlexander Aring 	int i;
18918b0188b0SAlexander Aring 
18928b0188b0SAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++)
18938b0188b0SAlexander Aring 		INIT_HLIST_HEAD(&connection_hash[i]);
18948b0188b0SAlexander Aring 
18958b0188b0SAlexander Aring 	INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
18968b0188b0SAlexander Aring }
18978b0188b0SAlexander Aring 
189836b71a8bSDavid Teigland void dlm_lowcomms_exit(void)
189936b71a8bSDavid Teigland {
1900*6f0b0b5dSAlexander Aring 	struct connection *con;
1901*6f0b0b5dSAlexander Aring 	int i, idx;
190236b71a8bSDavid Teigland 
1903*6f0b0b5dSAlexander Aring 	idx = srcu_read_lock(&connections_srcu);
1904*6f0b0b5dSAlexander Aring 	for (i = 0; i < CONN_HASH_SIZE; i++) {
1905*6f0b0b5dSAlexander Aring 		hlist_for_each_entry_rcu(con, &connection_hash[i], list) {
1906*6f0b0b5dSAlexander Aring 			spin_lock(&connections_lock);
1907*6f0b0b5dSAlexander Aring 			hlist_del_rcu(&con->list);
1908*6f0b0b5dSAlexander Aring 			spin_unlock(&connections_lock);
1909*6f0b0b5dSAlexander Aring 
1910*6f0b0b5dSAlexander Aring 			if (con->othercon)
1911*6f0b0b5dSAlexander Aring 				call_srcu(&connections_srcu, &con->othercon->rcu,
1912*6f0b0b5dSAlexander Aring 					  connection_release);
1913*6f0b0b5dSAlexander Aring 			call_srcu(&connections_srcu, &con->rcu, connection_release);
191436b71a8bSDavid Teigland 		}
1915*6f0b0b5dSAlexander Aring 	}
1916*6f0b0b5dSAlexander Aring 	srcu_read_unlock(&connections_srcu, idx);
191736b71a8bSDavid Teigland }
1918