12522fe45SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 26ed7257bSPatrick Caulfield /****************************************************************************** 36ed7257bSPatrick Caulfield ******************************************************************************* 46ed7257bSPatrick Caulfield ** 56ed7257bSPatrick Caulfield ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 65e9ccc37SChristine Caulfield ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 76ed7257bSPatrick Caulfield ** 86ed7257bSPatrick Caulfield ** 96ed7257bSPatrick Caulfield ******************************************************************************* 106ed7257bSPatrick Caulfield ******************************************************************************/ 116ed7257bSPatrick Caulfield 126ed7257bSPatrick Caulfield /* 136ed7257bSPatrick Caulfield * lowcomms.c 146ed7257bSPatrick Caulfield * 156ed7257bSPatrick Caulfield * This is the "low-level" comms layer. 166ed7257bSPatrick Caulfield * 176ed7257bSPatrick Caulfield * It is responsible for sending/receiving messages 186ed7257bSPatrick Caulfield * from other nodes in the cluster. 196ed7257bSPatrick Caulfield * 206ed7257bSPatrick Caulfield * Cluster nodes are referred to by their nodeids. nodeids are 216ed7257bSPatrick Caulfield * simply 32 bit numbers to the locking module - if they need to 222cf12c0bSJoe Perches * be expanded for the cluster infrastructure then that is its 236ed7257bSPatrick Caulfield * responsibility. It is this layer's 246ed7257bSPatrick Caulfield * responsibility to resolve these into IP address or 256ed7257bSPatrick Caulfield * whatever it needs for inter-node communication. 266ed7257bSPatrick Caulfield * 276ed7257bSPatrick Caulfield * The comms level is two kernel threads that deal mainly with 286ed7257bSPatrick Caulfield * the receiving of messages from other nodes and passing them 296ed7257bSPatrick Caulfield * up to the mid-level comms layer (which understands the 306ed7257bSPatrick Caulfield * message format) for execution by the locking core, and 316ed7257bSPatrick Caulfield * a send thread which does all the setting up of connections 326ed7257bSPatrick Caulfield * to remote nodes and the sending of data. Threads are not allowed 336ed7257bSPatrick Caulfield * to send their own data because it may cause them to wait in times 346ed7257bSPatrick Caulfield * of high load. Also, this way, the sending thread can collect together 356ed7257bSPatrick Caulfield * messages bound for one node and send them in one block. 366ed7257bSPatrick Caulfield * 372cf12c0bSJoe Perches * lowcomms will choose to use either TCP or SCTP as its transport layer 386ed7257bSPatrick Caulfield * depending on the configuration variable 'protocol'. This should be set 396ed7257bSPatrick Caulfield * to 0 (default) for TCP or 1 for SCTP. It should be configured using a 406ed7257bSPatrick Caulfield * cluster-wide mechanism as it must be the same on all nodes of the cluster 416ed7257bSPatrick Caulfield * for the DLM to function. 426ed7257bSPatrick Caulfield * 436ed7257bSPatrick Caulfield */ 446ed7257bSPatrick Caulfield 456ed7257bSPatrick Caulfield #include <asm/ioctls.h> 466ed7257bSPatrick Caulfield #include <net/sock.h> 476ed7257bSPatrick Caulfield #include <net/tcp.h> 486ed7257bSPatrick Caulfield #include <linux/pagemap.h> 496ed7257bSPatrick Caulfield #include <linux/file.h> 507a936ce7SMatthias Kaehlcke #include <linux/mutex.h> 516ed7257bSPatrick Caulfield #include <linux/sctp.h> 525a0e3ad6STejun Heo #include <linux/slab.h> 532f2d76ccSBenjamin Poirier #include <net/sctp/sctp.h> 5444ad532bSJoe Perches #include <net/ipv6.h> 556ed7257bSPatrick Caulfield 566ed7257bSPatrick Caulfield #include "dlm_internal.h" 576ed7257bSPatrick Caulfield #include "lowcomms.h" 586ed7257bSPatrick Caulfield #include "midcomms.h" 596ed7257bSPatrick Caulfield #include "config.h" 606ed7257bSPatrick Caulfield 616ed7257bSPatrick Caulfield #define NEEDED_RMEM (4*1024*1024) 625e9ccc37SChristine Caulfield #define CONN_HASH_SIZE 32 636ed7257bSPatrick Caulfield 64f92c8dd7SBob Peterson /* Number of messages to send before rescheduling */ 65f92c8dd7SBob Peterson #define MAX_SEND_MSG_COUNT 25 66055923bfSAlexander Aring #define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000) 67f92c8dd7SBob Peterson 686ed7257bSPatrick Caulfield struct connection { 696ed7257bSPatrick Caulfield struct socket *sock; /* NULL if not connected */ 706ed7257bSPatrick Caulfield uint32_t nodeid; /* So we know who we are in the list */ 716ed7257bSPatrick Caulfield struct mutex sock_mutex; 726ed7257bSPatrick Caulfield unsigned long flags; 736ed7257bSPatrick Caulfield #define CF_READ_PENDING 1 748a4abb08Stsutomu.owa@toshiba.co.jp #define CF_WRITE_PENDING 2 756ed7257bSPatrick Caulfield #define CF_INIT_PENDING 4 766ed7257bSPatrick Caulfield #define CF_IS_OTHERCON 5 77063c4c99SLars Marowsky-Bree #define CF_CLOSE 6 78b36930ddSDavid Miller #define CF_APP_LIMITED 7 79b2a66629Stsutomu.owa@toshiba.co.jp #define CF_CLOSING 8 80055923bfSAlexander Aring #define CF_SHUTDOWN 9 8119633c7eSAlexander Aring #define CF_CONNECTED 10 826ed7257bSPatrick Caulfield struct list_head writequeue; /* List of outgoing writequeue_entries */ 836ed7257bSPatrick Caulfield spinlock_t writequeue_lock; 846ed7257bSPatrick Caulfield void (*connect_action) (struct connection *); /* What to do to connect */ 85055923bfSAlexander Aring void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ 866ed7257bSPatrick Caulfield int retries; 876ed7257bSPatrick Caulfield #define MAX_CONNECT_RETRIES 3 885e9ccc37SChristine Caulfield struct hlist_node list; 896ed7257bSPatrick Caulfield struct connection *othercon; 906ed7257bSPatrick Caulfield struct work_struct rwork; /* Receive workqueue */ 916ed7257bSPatrick Caulfield struct work_struct swork; /* Send workqueue */ 92055923bfSAlexander Aring wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */ 934798cbbfSAlexander Aring unsigned char *rx_buf; 944798cbbfSAlexander Aring int rx_buflen; 954798cbbfSAlexander Aring int rx_leftover; 96a47666ebSAlexander Aring struct rcu_head rcu; 976ed7257bSPatrick Caulfield }; 986ed7257bSPatrick Caulfield #define sock2con(x) ((struct connection *)(x)->sk_user_data) 996ed7257bSPatrick Caulfield 100d11ccd45SAlexander Aring struct listen_connection { 101d11ccd45SAlexander Aring struct socket *sock; 102d11ccd45SAlexander Aring struct work_struct rwork; 103d11ccd45SAlexander Aring }; 104d11ccd45SAlexander Aring 105*f0747ebfSAlexander Aring #define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end) 106*f0747ebfSAlexander Aring #define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset) 107*f0747ebfSAlexander Aring 1086ed7257bSPatrick Caulfield /* An entry waiting to be sent */ 1096ed7257bSPatrick Caulfield struct writequeue_entry { 1106ed7257bSPatrick Caulfield struct list_head list; 1116ed7257bSPatrick Caulfield struct page *page; 1126ed7257bSPatrick Caulfield int offset; 1136ed7257bSPatrick Caulfield int len; 1146ed7257bSPatrick Caulfield int end; 1156ed7257bSPatrick Caulfield int users; 1166ed7257bSPatrick Caulfield struct connection *con; 1176ed7257bSPatrick Caulfield }; 1186ed7257bSPatrick Caulfield 11936b71a8bSDavid Teigland struct dlm_node_addr { 12036b71a8bSDavid Teigland struct list_head list; 12136b71a8bSDavid Teigland int nodeid; 122e125fbebSAlexander Aring int mark; 12336b71a8bSDavid Teigland int addr_count; 12498e1b60eSMike Christie int curr_addr_index; 12536b71a8bSDavid Teigland struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 12636b71a8bSDavid Teigland }; 12736b71a8bSDavid Teigland 128cc661fc9SBob Peterson static struct listen_sock_callbacks { 129cc661fc9SBob Peterson void (*sk_error_report)(struct sock *); 130cc661fc9SBob Peterson void (*sk_data_ready)(struct sock *); 131cc661fc9SBob Peterson void (*sk_state_change)(struct sock *); 132cc661fc9SBob Peterson void (*sk_write_space)(struct sock *); 133cc661fc9SBob Peterson } listen_sock; 134cc661fc9SBob Peterson 13536b71a8bSDavid Teigland static LIST_HEAD(dlm_node_addrs); 13636b71a8bSDavid Teigland static DEFINE_SPINLOCK(dlm_node_addrs_spin); 13736b71a8bSDavid Teigland 138d11ccd45SAlexander Aring static struct listen_connection listen_con; 1396ed7257bSPatrick Caulfield static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; 1406ed7257bSPatrick Caulfield static int dlm_local_count; 14151746163SAlexander Aring int dlm_allow_conn; 1426ed7257bSPatrick Caulfield 1436ed7257bSPatrick Caulfield /* Work queues */ 1446ed7257bSPatrick Caulfield static struct workqueue_struct *recv_workqueue; 1456ed7257bSPatrick Caulfield static struct workqueue_struct *send_workqueue; 1466ed7257bSPatrick Caulfield 1475e9ccc37SChristine Caulfield static struct hlist_head connection_hash[CONN_HASH_SIZE]; 148a47666ebSAlexander Aring static DEFINE_SPINLOCK(connections_lock); 149a47666ebSAlexander Aring DEFINE_STATIC_SRCU(connections_srcu); 1506ed7257bSPatrick Caulfield 1516ed7257bSPatrick Caulfield static void process_recv_sockets(struct work_struct *work); 1526ed7257bSPatrick Caulfield static void process_send_sockets(struct work_struct *work); 1536ed7257bSPatrick Caulfield 1540672c3c2SAlexander Aring static void sctp_connect_to_sock(struct connection *con); 1550672c3c2SAlexander Aring static void tcp_connect_to_sock(struct connection *con); 15642873c90SAlexander Aring static void dlm_tcp_shutdown(struct connection *con); 1575e9ccc37SChristine Caulfield 1585e9ccc37SChristine Caulfield /* This is deliberately very simple because most clusters have simple 1595e9ccc37SChristine Caulfield sequential nodeids, so we should be able to go straight to a connection 1605e9ccc37SChristine Caulfield struct in the array */ 1615e9ccc37SChristine Caulfield static inline int nodeid_hash(int nodeid) 1625e9ccc37SChristine Caulfield { 1635e9ccc37SChristine Caulfield return nodeid & (CONN_HASH_SIZE-1); 1645e9ccc37SChristine Caulfield } 1655e9ccc37SChristine Caulfield 1665e9ccc37SChristine Caulfield static struct connection *__find_con(int nodeid) 1675e9ccc37SChristine Caulfield { 168a47666ebSAlexander Aring int r, idx; 1695e9ccc37SChristine Caulfield struct connection *con; 1705e9ccc37SChristine Caulfield 1715e9ccc37SChristine Caulfield r = nodeid_hash(nodeid); 1725e9ccc37SChristine Caulfield 173a47666ebSAlexander Aring idx = srcu_read_lock(&connections_srcu); 174a47666ebSAlexander Aring hlist_for_each_entry_rcu(con, &connection_hash[r], list) { 175a47666ebSAlexander Aring if (con->nodeid == nodeid) { 176a47666ebSAlexander Aring srcu_read_unlock(&connections_srcu, idx); 1775e9ccc37SChristine Caulfield return con; 1785e9ccc37SChristine Caulfield } 179a47666ebSAlexander Aring } 180a47666ebSAlexander Aring srcu_read_unlock(&connections_srcu, idx); 181a47666ebSAlexander Aring 1825e9ccc37SChristine Caulfield return NULL; 1835e9ccc37SChristine Caulfield } 1845e9ccc37SChristine Caulfield 1856cde210aSAlexander Aring static int dlm_con_init(struct connection *con, int nodeid) 1866ed7257bSPatrick Caulfield { 1874798cbbfSAlexander Aring con->rx_buflen = dlm_config.ci_buffer_size; 1884798cbbfSAlexander Aring con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS); 1896cde210aSAlexander Aring if (!con->rx_buf) 1906cde210aSAlexander Aring return -ENOMEM; 1914798cbbfSAlexander Aring 1926ed7257bSPatrick Caulfield con->nodeid = nodeid; 1936ed7257bSPatrick Caulfield mutex_init(&con->sock_mutex); 1946ed7257bSPatrick Caulfield INIT_LIST_HEAD(&con->writequeue); 1956ed7257bSPatrick Caulfield spin_lock_init(&con->writequeue_lock); 1966ed7257bSPatrick Caulfield INIT_WORK(&con->swork, process_send_sockets); 1976ed7257bSPatrick Caulfield INIT_WORK(&con->rwork, process_recv_sockets); 198055923bfSAlexander Aring init_waitqueue_head(&con->shutdown_wait); 1996ed7257bSPatrick Caulfield 20042873c90SAlexander Aring if (dlm_config.ci_protocol == 0) { 2010672c3c2SAlexander Aring con->connect_action = tcp_connect_to_sock; 20242873c90SAlexander Aring con->shutdown_action = dlm_tcp_shutdown; 20342873c90SAlexander Aring } else { 2040672c3c2SAlexander Aring con->connect_action = sctp_connect_to_sock; 20542873c90SAlexander Aring } 2066ed7257bSPatrick Caulfield 2076cde210aSAlexander Aring return 0; 2086cde210aSAlexander Aring } 2096cde210aSAlexander Aring 2106cde210aSAlexander Aring /* 2116cde210aSAlexander Aring * If 'allocation' is zero then we don't attempt to create a new 2126cde210aSAlexander Aring * connection structure for this node. 2136cde210aSAlexander Aring */ 2146cde210aSAlexander Aring static struct connection *nodeid2con(int nodeid, gfp_t alloc) 2156cde210aSAlexander Aring { 2166cde210aSAlexander Aring struct connection *con, *tmp; 2176cde210aSAlexander Aring int r, ret; 2186cde210aSAlexander Aring 2196cde210aSAlexander Aring con = __find_con(nodeid); 2206cde210aSAlexander Aring if (con || !alloc) 2216cde210aSAlexander Aring return con; 2226cde210aSAlexander Aring 2236cde210aSAlexander Aring con = kzalloc(sizeof(*con), alloc); 2246cde210aSAlexander Aring if (!con) 2256cde210aSAlexander Aring return NULL; 2266cde210aSAlexander Aring 2276cde210aSAlexander Aring ret = dlm_con_init(con, nodeid); 2286cde210aSAlexander Aring if (ret) { 2296cde210aSAlexander Aring kfree(con); 2306cde210aSAlexander Aring return NULL; 2316cde210aSAlexander Aring } 2326cde210aSAlexander Aring 233a47666ebSAlexander Aring r = nodeid_hash(nodeid); 234a47666ebSAlexander Aring 235a47666ebSAlexander Aring spin_lock(&connections_lock); 2364f2b30fdSAlexander Aring /* Because multiple workqueues/threads calls this function it can 2374f2b30fdSAlexander Aring * race on multiple cpu's. Instead of locking hot path __find_con() 2384f2b30fdSAlexander Aring * we just check in rare cases of recently added nodes again 2394f2b30fdSAlexander Aring * under protection of connections_lock. If this is the case we 2404f2b30fdSAlexander Aring * abort our connection creation and return the existing connection. 2414f2b30fdSAlexander Aring */ 2424f2b30fdSAlexander Aring tmp = __find_con(nodeid); 2434f2b30fdSAlexander Aring if (tmp) { 2444f2b30fdSAlexander Aring spin_unlock(&connections_lock); 2454f2b30fdSAlexander Aring kfree(con->rx_buf); 2464f2b30fdSAlexander Aring kfree(con); 2474f2b30fdSAlexander Aring return tmp; 2484f2b30fdSAlexander Aring } 2494f2b30fdSAlexander Aring 250a47666ebSAlexander Aring hlist_add_head_rcu(&con->list, &connection_hash[r]); 251a47666ebSAlexander Aring spin_unlock(&connections_lock); 252a47666ebSAlexander Aring 2536ed7257bSPatrick Caulfield return con; 2546ed7257bSPatrick Caulfield } 2556ed7257bSPatrick Caulfield 2565e9ccc37SChristine Caulfield /* Loop round all connections */ 2575e9ccc37SChristine Caulfield static void foreach_conn(void (*conn_func)(struct connection *c)) 2585e9ccc37SChristine Caulfield { 259a47666ebSAlexander Aring int i, idx; 2605e9ccc37SChristine Caulfield struct connection *con; 2615e9ccc37SChristine Caulfield 262a47666ebSAlexander Aring idx = srcu_read_lock(&connections_srcu); 2635e9ccc37SChristine Caulfield for (i = 0; i < CONN_HASH_SIZE; i++) { 264a47666ebSAlexander Aring hlist_for_each_entry_rcu(con, &connection_hash[i], list) 2655e9ccc37SChristine Caulfield conn_func(con); 2665e9ccc37SChristine Caulfield } 267a47666ebSAlexander Aring srcu_read_unlock(&connections_srcu, idx); 2686ed7257bSPatrick Caulfield } 2696ed7257bSPatrick Caulfield 27036b71a8bSDavid Teigland static struct dlm_node_addr *find_node_addr(int nodeid) 2716ed7257bSPatrick Caulfield { 27236b71a8bSDavid Teigland struct dlm_node_addr *na; 27336b71a8bSDavid Teigland 27436b71a8bSDavid Teigland list_for_each_entry(na, &dlm_node_addrs, list) { 27536b71a8bSDavid Teigland if (na->nodeid == nodeid) 27636b71a8bSDavid Teigland return na; 27736b71a8bSDavid Teigland } 27836b71a8bSDavid Teigland return NULL; 27936b71a8bSDavid Teigland } 28036b71a8bSDavid Teigland 28140c6b83eSAlexander Aring static int addr_compare(const struct sockaddr_storage *x, 28240c6b83eSAlexander Aring const struct sockaddr_storage *y) 28336b71a8bSDavid Teigland { 28436b71a8bSDavid Teigland switch (x->ss_family) { 28536b71a8bSDavid Teigland case AF_INET: { 28636b71a8bSDavid Teigland struct sockaddr_in *sinx = (struct sockaddr_in *)x; 28736b71a8bSDavid Teigland struct sockaddr_in *siny = (struct sockaddr_in *)y; 28836b71a8bSDavid Teigland if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) 28936b71a8bSDavid Teigland return 0; 29036b71a8bSDavid Teigland if (sinx->sin_port != siny->sin_port) 29136b71a8bSDavid Teigland return 0; 29236b71a8bSDavid Teigland break; 29336b71a8bSDavid Teigland } 29436b71a8bSDavid Teigland case AF_INET6: { 29536b71a8bSDavid Teigland struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; 29636b71a8bSDavid Teigland struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; 29736b71a8bSDavid Teigland if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) 29836b71a8bSDavid Teigland return 0; 29936b71a8bSDavid Teigland if (sinx->sin6_port != siny->sin6_port) 30036b71a8bSDavid Teigland return 0; 30136b71a8bSDavid Teigland break; 30236b71a8bSDavid Teigland } 30336b71a8bSDavid Teigland default: 30436b71a8bSDavid Teigland return 0; 30536b71a8bSDavid Teigland } 30636b71a8bSDavid Teigland return 1; 30736b71a8bSDavid Teigland } 30836b71a8bSDavid Teigland 30936b71a8bSDavid Teigland static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, 310e125fbebSAlexander Aring struct sockaddr *sa_out, bool try_new_addr, 311e125fbebSAlexander Aring unsigned int *mark) 31236b71a8bSDavid Teigland { 31336b71a8bSDavid Teigland struct sockaddr_storage sas; 31436b71a8bSDavid Teigland struct dlm_node_addr *na; 3156ed7257bSPatrick Caulfield 3166ed7257bSPatrick Caulfield if (!dlm_local_count) 3176ed7257bSPatrick Caulfield return -1; 3186ed7257bSPatrick Caulfield 31936b71a8bSDavid Teigland spin_lock(&dlm_node_addrs_spin); 32036b71a8bSDavid Teigland na = find_node_addr(nodeid); 32198e1b60eSMike Christie if (na && na->addr_count) { 322ee44b4bcSMarcelo Ricardo Leitner memcpy(&sas, na->addr[na->curr_addr_index], 323ee44b4bcSMarcelo Ricardo Leitner sizeof(struct sockaddr_storage)); 324ee44b4bcSMarcelo Ricardo Leitner 32598e1b60eSMike Christie if (try_new_addr) { 32698e1b60eSMike Christie na->curr_addr_index++; 32798e1b60eSMike Christie if (na->curr_addr_index == na->addr_count) 32898e1b60eSMike Christie na->curr_addr_index = 0; 32998e1b60eSMike Christie } 33098e1b60eSMike Christie } 33136b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 33236b71a8bSDavid Teigland 33336b71a8bSDavid Teigland if (!na) 33436b71a8bSDavid Teigland return -EEXIST; 33536b71a8bSDavid Teigland 33636b71a8bSDavid Teigland if (!na->addr_count) 33736b71a8bSDavid Teigland return -ENOENT; 33836b71a8bSDavid Teigland 339e125fbebSAlexander Aring *mark = na->mark; 340e125fbebSAlexander Aring 34136b71a8bSDavid Teigland if (sas_out) 34236b71a8bSDavid Teigland memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); 34336b71a8bSDavid Teigland 34436b71a8bSDavid Teigland if (!sa_out) 34536b71a8bSDavid Teigland return 0; 3466ed7257bSPatrick Caulfield 3476ed7257bSPatrick Caulfield if (dlm_local_addr[0]->ss_family == AF_INET) { 34836b71a8bSDavid Teigland struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; 34936b71a8bSDavid Teigland struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; 3506ed7257bSPatrick Caulfield ret4->sin_addr.s_addr = in4->sin_addr.s_addr; 3516ed7257bSPatrick Caulfield } else { 35236b71a8bSDavid Teigland struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; 35336b71a8bSDavid Teigland struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; 3544e3fd7a0SAlexey Dobriyan ret6->sin6_addr = in6->sin6_addr; 3556ed7257bSPatrick Caulfield } 3566ed7257bSPatrick Caulfield 3576ed7257bSPatrick Caulfield return 0; 3586ed7257bSPatrick Caulfield } 3596ed7257bSPatrick Caulfield 360e125fbebSAlexander Aring static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid, 361e125fbebSAlexander Aring unsigned int *mark) 36236b71a8bSDavid Teigland { 36336b71a8bSDavid Teigland struct dlm_node_addr *na; 36436b71a8bSDavid Teigland int rv = -EEXIST; 36598e1b60eSMike Christie int addr_i; 36636b71a8bSDavid Teigland 36736b71a8bSDavid Teigland spin_lock(&dlm_node_addrs_spin); 36836b71a8bSDavid Teigland list_for_each_entry(na, &dlm_node_addrs, list) { 36936b71a8bSDavid Teigland if (!na->addr_count) 37036b71a8bSDavid Teigland continue; 37136b71a8bSDavid Teigland 37298e1b60eSMike Christie for (addr_i = 0; addr_i < na->addr_count; addr_i++) { 37398e1b60eSMike Christie if (addr_compare(na->addr[addr_i], addr)) { 37436b71a8bSDavid Teigland *nodeid = na->nodeid; 375e125fbebSAlexander Aring *mark = na->mark; 37636b71a8bSDavid Teigland rv = 0; 37798e1b60eSMike Christie goto unlock; 37836b71a8bSDavid Teigland } 37998e1b60eSMike Christie } 38098e1b60eSMike Christie } 38198e1b60eSMike Christie unlock: 38236b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 38336b71a8bSDavid Teigland return rv; 38436b71a8bSDavid Teigland } 38536b71a8bSDavid Teigland 3864f19d071SAlexander Aring /* caller need to held dlm_node_addrs_spin lock */ 3874f19d071SAlexander Aring static bool dlm_lowcomms_na_has_addr(const struct dlm_node_addr *na, 3884f19d071SAlexander Aring const struct sockaddr_storage *addr) 3894f19d071SAlexander Aring { 3904f19d071SAlexander Aring int i; 3914f19d071SAlexander Aring 3924f19d071SAlexander Aring for (i = 0; i < na->addr_count; i++) { 3934f19d071SAlexander Aring if (addr_compare(na->addr[i], addr)) 3944f19d071SAlexander Aring return true; 3954f19d071SAlexander Aring } 3964f19d071SAlexander Aring 3974f19d071SAlexander Aring return false; 3984f19d071SAlexander Aring } 3994f19d071SAlexander Aring 40036b71a8bSDavid Teigland int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) 40136b71a8bSDavid Teigland { 40236b71a8bSDavid Teigland struct sockaddr_storage *new_addr; 40336b71a8bSDavid Teigland struct dlm_node_addr *new_node, *na; 4044f19d071SAlexander Aring bool ret; 40536b71a8bSDavid Teigland 40636b71a8bSDavid Teigland new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); 40736b71a8bSDavid Teigland if (!new_node) 40836b71a8bSDavid Teigland return -ENOMEM; 40936b71a8bSDavid Teigland 41036b71a8bSDavid Teigland new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); 41136b71a8bSDavid Teigland if (!new_addr) { 41236b71a8bSDavid Teigland kfree(new_node); 41336b71a8bSDavid Teigland return -ENOMEM; 41436b71a8bSDavid Teigland } 41536b71a8bSDavid Teigland 41636b71a8bSDavid Teigland memcpy(new_addr, addr, len); 41736b71a8bSDavid Teigland 41836b71a8bSDavid Teigland spin_lock(&dlm_node_addrs_spin); 41936b71a8bSDavid Teigland na = find_node_addr(nodeid); 42036b71a8bSDavid Teigland if (!na) { 42136b71a8bSDavid Teigland new_node->nodeid = nodeid; 42236b71a8bSDavid Teigland new_node->addr[0] = new_addr; 42336b71a8bSDavid Teigland new_node->addr_count = 1; 424e125fbebSAlexander Aring new_node->mark = dlm_config.ci_mark; 42536b71a8bSDavid Teigland list_add(&new_node->list, &dlm_node_addrs); 42636b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 42736b71a8bSDavid Teigland return 0; 42836b71a8bSDavid Teigland } 42936b71a8bSDavid Teigland 4304f19d071SAlexander Aring ret = dlm_lowcomms_na_has_addr(na, addr); 4314f19d071SAlexander Aring if (ret) { 4324f19d071SAlexander Aring spin_unlock(&dlm_node_addrs_spin); 4334f19d071SAlexander Aring kfree(new_addr); 4344f19d071SAlexander Aring kfree(new_node); 4354f19d071SAlexander Aring return -EEXIST; 4364f19d071SAlexander Aring } 4374f19d071SAlexander Aring 43836b71a8bSDavid Teigland if (na->addr_count >= DLM_MAX_ADDR_COUNT) { 43936b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 44036b71a8bSDavid Teigland kfree(new_addr); 44136b71a8bSDavid Teigland kfree(new_node); 44236b71a8bSDavid Teigland return -ENOSPC; 44336b71a8bSDavid Teigland } 44436b71a8bSDavid Teigland 44536b71a8bSDavid Teigland na->addr[na->addr_count++] = new_addr; 44636b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 44736b71a8bSDavid Teigland kfree(new_node); 44836b71a8bSDavid Teigland return 0; 44936b71a8bSDavid Teigland } 45036b71a8bSDavid Teigland 4516ed7257bSPatrick Caulfield /* Data available on socket or listen socket received a connect */ 452676d2369SDavid S. Miller static void lowcomms_data_ready(struct sock *sk) 4536ed7257bSPatrick Caulfield { 45493eaadebStsutomu.owa@toshiba.co.jp struct connection *con; 45593eaadebStsutomu.owa@toshiba.co.jp 45693eaadebStsutomu.owa@toshiba.co.jp read_lock_bh(&sk->sk_callback_lock); 45793eaadebStsutomu.owa@toshiba.co.jp con = sock2con(sk); 458afb853fbSPatrick Caulfield if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) 4596ed7257bSPatrick Caulfield queue_work(recv_workqueue, &con->rwork); 46093eaadebStsutomu.owa@toshiba.co.jp read_unlock_bh(&sk->sk_callback_lock); 4616ed7257bSPatrick Caulfield } 4626ed7257bSPatrick Caulfield 463d11ccd45SAlexander Aring static void lowcomms_listen_data_ready(struct sock *sk) 464d11ccd45SAlexander Aring { 465d11ccd45SAlexander Aring queue_work(recv_workqueue, &listen_con.rwork); 466d11ccd45SAlexander Aring } 467d11ccd45SAlexander Aring 4686ed7257bSPatrick Caulfield static void lowcomms_write_space(struct sock *sk) 4696ed7257bSPatrick Caulfield { 47093eaadebStsutomu.owa@toshiba.co.jp struct connection *con; 4716ed7257bSPatrick Caulfield 47293eaadebStsutomu.owa@toshiba.co.jp read_lock_bh(&sk->sk_callback_lock); 47393eaadebStsutomu.owa@toshiba.co.jp con = sock2con(sk); 474b36930ddSDavid Miller if (!con) 47593eaadebStsutomu.owa@toshiba.co.jp goto out; 476b36930ddSDavid Miller 47719633c7eSAlexander Aring if (!test_and_set_bit(CF_CONNECTED, &con->flags)) { 47819633c7eSAlexander Aring log_print("successful connected to node %d", con->nodeid); 47919633c7eSAlexander Aring queue_work(send_workqueue, &con->swork); 48019633c7eSAlexander Aring goto out; 48119633c7eSAlexander Aring } 48219633c7eSAlexander Aring 483b36930ddSDavid Miller clear_bit(SOCK_NOSPACE, &con->sock->flags); 484b36930ddSDavid Miller 485b36930ddSDavid Miller if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { 486b36930ddSDavid Miller con->sock->sk->sk_write_pending--; 4879cd3e072SEric Dumazet clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags); 488b36930ddSDavid Miller } 489b36930ddSDavid Miller 4906ed7257bSPatrick Caulfield queue_work(send_workqueue, &con->swork); 49193eaadebStsutomu.owa@toshiba.co.jp out: 49293eaadebStsutomu.owa@toshiba.co.jp read_unlock_bh(&sk->sk_callback_lock); 4936ed7257bSPatrick Caulfield } 4946ed7257bSPatrick Caulfield 4956ed7257bSPatrick Caulfield static inline void lowcomms_connect_sock(struct connection *con) 4966ed7257bSPatrick Caulfield { 497063c4c99SLars Marowsky-Bree if (test_bit(CF_CLOSE, &con->flags)) 498063c4c99SLars Marowsky-Bree return; 4996ed7257bSPatrick Caulfield queue_work(send_workqueue, &con->swork); 50061d9102bSBob Peterson cond_resched(); 5016ed7257bSPatrick Caulfield } 5026ed7257bSPatrick Caulfield 5036ed7257bSPatrick Caulfield static void lowcomms_state_change(struct sock *sk) 5046ed7257bSPatrick Caulfield { 505ee44b4bcSMarcelo Ricardo Leitner /* SCTP layer is not calling sk_data_ready when the connection 506ee44b4bcSMarcelo Ricardo Leitner * is done, so we catch the signal through here. Also, it 507ee44b4bcSMarcelo Ricardo Leitner * doesn't switch socket state when entering shutdown, so we 508ee44b4bcSMarcelo Ricardo Leitner * skip the write in that case. 509ee44b4bcSMarcelo Ricardo Leitner */ 510ee44b4bcSMarcelo Ricardo Leitner if (sk->sk_shutdown) { 511ee44b4bcSMarcelo Ricardo Leitner if (sk->sk_shutdown == RCV_SHUTDOWN) 512ee44b4bcSMarcelo Ricardo Leitner lowcomms_data_ready(sk); 513ee44b4bcSMarcelo Ricardo Leitner } else if (sk->sk_state == TCP_ESTABLISHED) { 5146ed7257bSPatrick Caulfield lowcomms_write_space(sk); 5156ed7257bSPatrick Caulfield } 516ee44b4bcSMarcelo Ricardo Leitner } 5176ed7257bSPatrick Caulfield 518391fbdc5SChristine Caulfield int dlm_lowcomms_connect_node(int nodeid) 519391fbdc5SChristine Caulfield { 520391fbdc5SChristine Caulfield struct connection *con; 521391fbdc5SChristine Caulfield 522391fbdc5SChristine Caulfield if (nodeid == dlm_our_nodeid()) 523391fbdc5SChristine Caulfield return 0; 524391fbdc5SChristine Caulfield 525391fbdc5SChristine Caulfield con = nodeid2con(nodeid, GFP_NOFS); 526391fbdc5SChristine Caulfield if (!con) 527391fbdc5SChristine Caulfield return -ENOMEM; 528391fbdc5SChristine Caulfield lowcomms_connect_sock(con); 529391fbdc5SChristine Caulfield return 0; 530391fbdc5SChristine Caulfield } 531391fbdc5SChristine Caulfield 532e125fbebSAlexander Aring int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark) 533e125fbebSAlexander Aring { 534e125fbebSAlexander Aring struct dlm_node_addr *na; 535e125fbebSAlexander Aring 536e125fbebSAlexander Aring spin_lock(&dlm_node_addrs_spin); 537e125fbebSAlexander Aring na = find_node_addr(nodeid); 538e125fbebSAlexander Aring if (!na) { 539e125fbebSAlexander Aring spin_unlock(&dlm_node_addrs_spin); 540e125fbebSAlexander Aring return -ENOENT; 541e125fbebSAlexander Aring } 542e125fbebSAlexander Aring 543e125fbebSAlexander Aring na->mark = mark; 544e125fbebSAlexander Aring spin_unlock(&dlm_node_addrs_spin); 545e125fbebSAlexander Aring 546e125fbebSAlexander Aring return 0; 547e125fbebSAlexander Aring } 548e125fbebSAlexander Aring 549b3a5bbfdSBob Peterson static void lowcomms_error_report(struct sock *sk) 550b3a5bbfdSBob Peterson { 551b81171cbSBob Peterson struct connection *con; 552b3a5bbfdSBob Peterson struct sockaddr_storage saddr; 553b81171cbSBob Peterson void (*orig_report)(struct sock *) = NULL; 554b3a5bbfdSBob Peterson 555b81171cbSBob Peterson read_lock_bh(&sk->sk_callback_lock); 556b81171cbSBob Peterson con = sock2con(sk); 557b81171cbSBob Peterson if (con == NULL) 558b81171cbSBob Peterson goto out; 559b81171cbSBob Peterson 560cc661fc9SBob Peterson orig_report = listen_sock.sk_error_report; 5611a31833dSBob Peterson if (con->sock == NULL || 5629b2c45d4SDenys Vlasenko kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { 563b3a5bbfdSBob Peterson printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 564b3a5bbfdSBob Peterson "sending to node %d, port %d, " 565b3a5bbfdSBob Peterson "sk_err=%d/%d\n", dlm_our_nodeid(), 566b3a5bbfdSBob Peterson con->nodeid, dlm_config.ci_tcp_port, 567b3a5bbfdSBob Peterson sk->sk_err, sk->sk_err_soft); 568b3a5bbfdSBob Peterson } else if (saddr.ss_family == AF_INET) { 569b3a5bbfdSBob Peterson struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; 570b3a5bbfdSBob Peterson 571b3a5bbfdSBob Peterson printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 572b3a5bbfdSBob Peterson "sending to node %d at %pI4, port %d, " 573b3a5bbfdSBob Peterson "sk_err=%d/%d\n", dlm_our_nodeid(), 574b3a5bbfdSBob Peterson con->nodeid, &sin4->sin_addr.s_addr, 575b3a5bbfdSBob Peterson dlm_config.ci_tcp_port, sk->sk_err, 576b3a5bbfdSBob Peterson sk->sk_err_soft); 577b3a5bbfdSBob Peterson } else { 578b3a5bbfdSBob Peterson struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; 579b3a5bbfdSBob Peterson 580b3a5bbfdSBob Peterson printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 581b3a5bbfdSBob Peterson "sending to node %d at %u.%u.%u.%u, " 582b3a5bbfdSBob Peterson "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), 583b3a5bbfdSBob Peterson con->nodeid, sin6->sin6_addr.s6_addr32[0], 584b3a5bbfdSBob Peterson sin6->sin6_addr.s6_addr32[1], 585b3a5bbfdSBob Peterson sin6->sin6_addr.s6_addr32[2], 586b3a5bbfdSBob Peterson sin6->sin6_addr.s6_addr32[3], 587b3a5bbfdSBob Peterson dlm_config.ci_tcp_port, sk->sk_err, 588b3a5bbfdSBob Peterson sk->sk_err_soft); 589b3a5bbfdSBob Peterson } 590b81171cbSBob Peterson out: 591b81171cbSBob Peterson read_unlock_bh(&sk->sk_callback_lock); 592b81171cbSBob Peterson if (orig_report) 593b81171cbSBob Peterson orig_report(sk); 594b81171cbSBob Peterson } 595b81171cbSBob Peterson 596b81171cbSBob Peterson /* Note: sk_callback_lock must be locked before calling this function. */ 597cc661fc9SBob Peterson static void save_listen_callbacks(struct socket *sock) 598b81171cbSBob Peterson { 599cc661fc9SBob Peterson struct sock *sk = sock->sk; 600cc661fc9SBob Peterson 601cc661fc9SBob Peterson listen_sock.sk_data_ready = sk->sk_data_ready; 602cc661fc9SBob Peterson listen_sock.sk_state_change = sk->sk_state_change; 603cc661fc9SBob Peterson listen_sock.sk_write_space = sk->sk_write_space; 604cc661fc9SBob Peterson listen_sock.sk_error_report = sk->sk_error_report; 605b81171cbSBob Peterson } 606b81171cbSBob Peterson 607cc661fc9SBob Peterson static void restore_callbacks(struct socket *sock) 608b81171cbSBob Peterson { 609cc661fc9SBob Peterson struct sock *sk = sock->sk; 610cc661fc9SBob Peterson 611b81171cbSBob Peterson write_lock_bh(&sk->sk_callback_lock); 612b81171cbSBob Peterson sk->sk_user_data = NULL; 613cc661fc9SBob Peterson sk->sk_data_ready = listen_sock.sk_data_ready; 614cc661fc9SBob Peterson sk->sk_state_change = listen_sock.sk_state_change; 615cc661fc9SBob Peterson sk->sk_write_space = listen_sock.sk_write_space; 616cc661fc9SBob Peterson sk->sk_error_report = listen_sock.sk_error_report; 617b81171cbSBob Peterson write_unlock_bh(&sk->sk_callback_lock); 618b3a5bbfdSBob Peterson } 619b3a5bbfdSBob Peterson 620d11ccd45SAlexander Aring static void add_listen_sock(struct socket *sock, struct listen_connection *con) 621d11ccd45SAlexander Aring { 622d11ccd45SAlexander Aring struct sock *sk = sock->sk; 623d11ccd45SAlexander Aring 624d11ccd45SAlexander Aring write_lock_bh(&sk->sk_callback_lock); 625d11ccd45SAlexander Aring save_listen_callbacks(sock); 626d11ccd45SAlexander Aring con->sock = sock; 627d11ccd45SAlexander Aring 628d11ccd45SAlexander Aring sk->sk_user_data = con; 629d11ccd45SAlexander Aring sk->sk_allocation = GFP_NOFS; 630d11ccd45SAlexander Aring /* Install a data_ready callback */ 631d11ccd45SAlexander Aring sk->sk_data_ready = lowcomms_listen_data_ready; 632d11ccd45SAlexander Aring write_unlock_bh(&sk->sk_callback_lock); 633d11ccd45SAlexander Aring } 634d11ccd45SAlexander Aring 6356ed7257bSPatrick Caulfield /* Make a socket active */ 636988419a9Stsutomu.owa@toshiba.co.jp static void add_sock(struct socket *sock, struct connection *con) 6376ed7257bSPatrick Caulfield { 638b81171cbSBob Peterson struct sock *sk = sock->sk; 639b81171cbSBob Peterson 640b81171cbSBob Peterson write_lock_bh(&sk->sk_callback_lock); 6416ed7257bSPatrick Caulfield con->sock = sock; 6426ed7257bSPatrick Caulfield 643b81171cbSBob Peterson sk->sk_user_data = con; 6446ed7257bSPatrick Caulfield /* Install a data_ready callback */ 645b81171cbSBob Peterson sk->sk_data_ready = lowcomms_data_ready; 646b81171cbSBob Peterson sk->sk_write_space = lowcomms_write_space; 647b81171cbSBob Peterson sk->sk_state_change = lowcomms_state_change; 648b81171cbSBob Peterson sk->sk_allocation = GFP_NOFS; 649b81171cbSBob Peterson sk->sk_error_report = lowcomms_error_report; 650b81171cbSBob Peterson write_unlock_bh(&sk->sk_callback_lock); 6516ed7257bSPatrick Caulfield } 6526ed7257bSPatrick Caulfield 6536ed7257bSPatrick Caulfield /* Add the port number to an IPv6 or 4 sockaddr and return the address 6546ed7257bSPatrick Caulfield length */ 6556ed7257bSPatrick Caulfield static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, 6566ed7257bSPatrick Caulfield int *addr_len) 6576ed7257bSPatrick Caulfield { 6586ed7257bSPatrick Caulfield saddr->ss_family = dlm_local_addr[0]->ss_family; 6596ed7257bSPatrick Caulfield if (saddr->ss_family == AF_INET) { 6606ed7257bSPatrick Caulfield struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; 6616ed7257bSPatrick Caulfield in4_addr->sin_port = cpu_to_be16(port); 6626ed7257bSPatrick Caulfield *addr_len = sizeof(struct sockaddr_in); 6636ed7257bSPatrick Caulfield memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); 6646ed7257bSPatrick Caulfield } else { 6656ed7257bSPatrick Caulfield struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; 6666ed7257bSPatrick Caulfield in6_addr->sin6_port = cpu_to_be16(port); 6676ed7257bSPatrick Caulfield *addr_len = sizeof(struct sockaddr_in6); 6686ed7257bSPatrick Caulfield } 66901c8cab2SPatrick Caulfield memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len); 6706ed7257bSPatrick Caulfield } 6716ed7257bSPatrick Caulfield 672d11ccd45SAlexander Aring static void dlm_close_sock(struct socket **sock) 673d11ccd45SAlexander Aring { 674d11ccd45SAlexander Aring if (*sock) { 675d11ccd45SAlexander Aring restore_callbacks(*sock); 676d11ccd45SAlexander Aring sock_release(*sock); 677d11ccd45SAlexander Aring *sock = NULL; 678d11ccd45SAlexander Aring } 679d11ccd45SAlexander Aring } 680d11ccd45SAlexander Aring 6816ed7257bSPatrick Caulfield /* Close a remote connection and tidy up */ 6820d737a8cSMarcelo Ricardo Leitner static void close_connection(struct connection *con, bool and_other, 6830d737a8cSMarcelo Ricardo Leitner bool tx, bool rx) 6846ed7257bSPatrick Caulfield { 685b2a66629Stsutomu.owa@toshiba.co.jp bool closing = test_and_set_bit(CF_CLOSING, &con->flags); 686b2a66629Stsutomu.owa@toshiba.co.jp 6870aa18464Stsutomu.owa@toshiba.co.jp if (tx && !closing && cancel_work_sync(&con->swork)) { 6880d737a8cSMarcelo Ricardo Leitner log_print("canceled swork for node %d", con->nodeid); 6890aa18464Stsutomu.owa@toshiba.co.jp clear_bit(CF_WRITE_PENDING, &con->flags); 6900aa18464Stsutomu.owa@toshiba.co.jp } 6910aa18464Stsutomu.owa@toshiba.co.jp if (rx && !closing && cancel_work_sync(&con->rwork)) { 6920d737a8cSMarcelo Ricardo Leitner log_print("canceled rwork for node %d", con->nodeid); 6930aa18464Stsutomu.owa@toshiba.co.jp clear_bit(CF_READ_PENDING, &con->flags); 6940aa18464Stsutomu.owa@toshiba.co.jp } 6956ed7257bSPatrick Caulfield 6960d737a8cSMarcelo Ricardo Leitner mutex_lock(&con->sock_mutex); 697d11ccd45SAlexander Aring dlm_close_sock(&con->sock); 698d11ccd45SAlexander Aring 6996ed7257bSPatrick Caulfield if (con->othercon && and_other) { 7006ed7257bSPatrick Caulfield /* Will only re-enter once. */ 7010d737a8cSMarcelo Ricardo Leitner close_connection(con->othercon, false, true, true); 7026ed7257bSPatrick Caulfield } 7039e5f2825SPatrick Caulfield 7044798cbbfSAlexander Aring con->rx_leftover = 0; 7056ed7257bSPatrick Caulfield con->retries = 0; 70619633c7eSAlexander Aring clear_bit(CF_CONNECTED, &con->flags); 7076ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 708b2a66629Stsutomu.owa@toshiba.co.jp clear_bit(CF_CLOSING, &con->flags); 7096ed7257bSPatrick Caulfield } 7106ed7257bSPatrick Caulfield 711055923bfSAlexander Aring static void shutdown_connection(struct connection *con) 712055923bfSAlexander Aring { 713055923bfSAlexander Aring int ret; 714055923bfSAlexander Aring 715055923bfSAlexander Aring if (cancel_work_sync(&con->swork)) { 716055923bfSAlexander Aring log_print("canceled swork for node %d", con->nodeid); 717055923bfSAlexander Aring clear_bit(CF_WRITE_PENDING, &con->flags); 718055923bfSAlexander Aring } 719055923bfSAlexander Aring 720055923bfSAlexander Aring mutex_lock(&con->sock_mutex); 721055923bfSAlexander Aring /* nothing to shutdown */ 722055923bfSAlexander Aring if (!con->sock) { 723055923bfSAlexander Aring mutex_unlock(&con->sock_mutex); 724055923bfSAlexander Aring return; 725055923bfSAlexander Aring } 726055923bfSAlexander Aring 727055923bfSAlexander Aring set_bit(CF_SHUTDOWN, &con->flags); 728055923bfSAlexander Aring ret = kernel_sock_shutdown(con->sock, SHUT_WR); 729055923bfSAlexander Aring mutex_unlock(&con->sock_mutex); 730055923bfSAlexander Aring if (ret) { 731055923bfSAlexander Aring log_print("Connection %p failed to shutdown: %d will force close", 732055923bfSAlexander Aring con, ret); 733055923bfSAlexander Aring goto force_close; 734055923bfSAlexander Aring } else { 735055923bfSAlexander Aring ret = wait_event_timeout(con->shutdown_wait, 736055923bfSAlexander Aring !test_bit(CF_SHUTDOWN, &con->flags), 737055923bfSAlexander Aring DLM_SHUTDOWN_WAIT_TIMEOUT); 738055923bfSAlexander Aring if (ret == 0) { 739055923bfSAlexander Aring log_print("Connection %p shutdown timed out, will force close", 740055923bfSAlexander Aring con); 741055923bfSAlexander Aring goto force_close; 742055923bfSAlexander Aring } 743055923bfSAlexander Aring } 744055923bfSAlexander Aring 745055923bfSAlexander Aring return; 746055923bfSAlexander Aring 747055923bfSAlexander Aring force_close: 748055923bfSAlexander Aring clear_bit(CF_SHUTDOWN, &con->flags); 749055923bfSAlexander Aring close_connection(con, false, true, true); 750055923bfSAlexander Aring } 751055923bfSAlexander Aring 752055923bfSAlexander Aring static void dlm_tcp_shutdown(struct connection *con) 753055923bfSAlexander Aring { 754055923bfSAlexander Aring if (con->othercon) 755055923bfSAlexander Aring shutdown_connection(con->othercon); 756055923bfSAlexander Aring shutdown_connection(con); 757055923bfSAlexander Aring } 758055923bfSAlexander Aring 7594798cbbfSAlexander Aring static int con_realloc_receive_buf(struct connection *con, int newlen) 7604798cbbfSAlexander Aring { 7614798cbbfSAlexander Aring unsigned char *newbuf; 7624798cbbfSAlexander Aring 7634798cbbfSAlexander Aring newbuf = kmalloc(newlen, GFP_NOFS); 7644798cbbfSAlexander Aring if (!newbuf) 7654798cbbfSAlexander Aring return -ENOMEM; 7664798cbbfSAlexander Aring 7674798cbbfSAlexander Aring /* copy any leftover from last receive */ 7684798cbbfSAlexander Aring if (con->rx_leftover) 7694798cbbfSAlexander Aring memmove(newbuf, con->rx_buf, con->rx_leftover); 7704798cbbfSAlexander Aring 7714798cbbfSAlexander Aring /* swap to new buffer space */ 7724798cbbfSAlexander Aring kfree(con->rx_buf); 7734798cbbfSAlexander Aring con->rx_buflen = newlen; 7744798cbbfSAlexander Aring con->rx_buf = newbuf; 7754798cbbfSAlexander Aring 7764798cbbfSAlexander Aring return 0; 7774798cbbfSAlexander Aring } 7784798cbbfSAlexander Aring 7796ed7257bSPatrick Caulfield /* Data received from remote end */ 7806ed7257bSPatrick Caulfield static int receive_from_sock(struct connection *con) 7816ed7257bSPatrick Caulfield { 7826ed7257bSPatrick Caulfield int call_again_soon = 0; 7834798cbbfSAlexander Aring struct msghdr msg; 7844798cbbfSAlexander Aring struct kvec iov; 7854798cbbfSAlexander Aring int ret, buflen; 7866ed7257bSPatrick Caulfield 7876ed7257bSPatrick Caulfield mutex_lock(&con->sock_mutex); 7886ed7257bSPatrick Caulfield 7896ed7257bSPatrick Caulfield if (con->sock == NULL) { 7906ed7257bSPatrick Caulfield ret = -EAGAIN; 7916ed7257bSPatrick Caulfield goto out_close; 7926ed7257bSPatrick Caulfield } 7934798cbbfSAlexander Aring 7944798cbbfSAlexander Aring /* realloc if we get new buffer size to read out */ 7954798cbbfSAlexander Aring buflen = dlm_config.ci_buffer_size; 7964798cbbfSAlexander Aring if (con->rx_buflen != buflen && con->rx_leftover <= buflen) { 7974798cbbfSAlexander Aring ret = con_realloc_receive_buf(con, buflen); 7984798cbbfSAlexander Aring if (ret < 0) 7996ed7257bSPatrick Caulfield goto out_resched; 8006ed7257bSPatrick Caulfield } 8016ed7257bSPatrick Caulfield 8024798cbbfSAlexander Aring /* calculate new buffer parameter regarding last receive and 8034798cbbfSAlexander Aring * possible leftover bytes 8046ed7257bSPatrick Caulfield */ 8054798cbbfSAlexander Aring iov.iov_base = con->rx_buf + con->rx_leftover; 8064798cbbfSAlexander Aring iov.iov_len = con->rx_buflen - con->rx_leftover; 8076ed7257bSPatrick Caulfield 8084798cbbfSAlexander Aring memset(&msg, 0, sizeof(msg)); 8094798cbbfSAlexander Aring msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 8104798cbbfSAlexander Aring ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 8114798cbbfSAlexander Aring msg.msg_flags); 8126ed7257bSPatrick Caulfield if (ret <= 0) 8136ed7257bSPatrick Caulfield goto out_close; 8144798cbbfSAlexander Aring else if (ret == iov.iov_len) 815ee44b4bcSMarcelo Ricardo Leitner call_again_soon = 1; 8166ed7257bSPatrick Caulfield 8174798cbbfSAlexander Aring /* new buflen according readed bytes and leftover from last receive */ 8184798cbbfSAlexander Aring buflen = ret + con->rx_leftover; 8194798cbbfSAlexander Aring ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); 8204798cbbfSAlexander Aring if (ret < 0) 8214798cbbfSAlexander Aring goto out_close; 8226ed7257bSPatrick Caulfield 8234798cbbfSAlexander Aring /* calculate leftover bytes from process and put it into begin of 8244798cbbfSAlexander Aring * the receive buffer, so next receive we have the full message 8254798cbbfSAlexander Aring * at the start address of the receive buffer. 8264798cbbfSAlexander Aring */ 8274798cbbfSAlexander Aring con->rx_leftover = buflen - ret; 8284798cbbfSAlexander Aring if (con->rx_leftover) { 8294798cbbfSAlexander Aring memmove(con->rx_buf, con->rx_buf + ret, 8304798cbbfSAlexander Aring con->rx_leftover); 8314798cbbfSAlexander Aring call_again_soon = true; 8326ed7257bSPatrick Caulfield } 8336ed7257bSPatrick Caulfield 8346ed7257bSPatrick Caulfield if (call_again_soon) 8356ed7257bSPatrick Caulfield goto out_resched; 8364798cbbfSAlexander Aring 8376ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 8386ed7257bSPatrick Caulfield return 0; 8396ed7257bSPatrick Caulfield 8406ed7257bSPatrick Caulfield out_resched: 8416ed7257bSPatrick Caulfield if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 8426ed7257bSPatrick Caulfield queue_work(recv_workqueue, &con->rwork); 8436ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 8446ed7257bSPatrick Caulfield return -EAGAIN; 8456ed7257bSPatrick Caulfield 8466ed7257bSPatrick Caulfield out_close: 8476ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 8489e5f2825SPatrick Caulfield if (ret != -EAGAIN) { 8496ed7257bSPatrick Caulfield /* Reconnect when there is something to send */ 850055923bfSAlexander Aring close_connection(con, false, true, false); 851055923bfSAlexander Aring if (ret == 0) { 852055923bfSAlexander Aring log_print("connection %p got EOF from %d", 853055923bfSAlexander Aring con, con->nodeid); 854055923bfSAlexander Aring /* handling for tcp shutdown */ 855055923bfSAlexander Aring clear_bit(CF_SHUTDOWN, &con->flags); 856055923bfSAlexander Aring wake_up(&con->shutdown_wait); 857055923bfSAlexander Aring /* signal to breaking receive worker */ 858055923bfSAlexander Aring ret = -1; 8596ed7257bSPatrick Caulfield } 860055923bfSAlexander Aring } 8616ed7257bSPatrick Caulfield return ret; 8626ed7257bSPatrick Caulfield } 8636ed7257bSPatrick Caulfield 8646ed7257bSPatrick Caulfield /* Listening socket is busy, accept a connection */ 865d11ccd45SAlexander Aring static int accept_from_sock(struct listen_connection *con) 8666ed7257bSPatrick Caulfield { 8676ed7257bSPatrick Caulfield int result; 8686ed7257bSPatrick Caulfield struct sockaddr_storage peeraddr; 8696ed7257bSPatrick Caulfield struct socket *newsock; 8706ed7257bSPatrick Caulfield int len; 8716ed7257bSPatrick Caulfield int nodeid; 8726ed7257bSPatrick Caulfield struct connection *newcon; 8736ed7257bSPatrick Caulfield struct connection *addcon; 8743f78cd7dSAlexander Aring unsigned int mark; 8756ed7257bSPatrick Caulfield 876513ef596SDavid Teigland if (!dlm_allow_conn) { 877513ef596SDavid Teigland return -1; 878513ef596SDavid Teigland } 879513ef596SDavid Teigland 880d11ccd45SAlexander Aring if (!con->sock) 8813421fb15Stsutomu.owa@toshiba.co.jp return -ENOTCONN; 8826ed7257bSPatrick Caulfield 8833421fb15Stsutomu.owa@toshiba.co.jp result = kernel_accept(con->sock, &newsock, O_NONBLOCK); 8846ed7257bSPatrick Caulfield if (result < 0) 8856ed7257bSPatrick Caulfield goto accept_err; 8866ed7257bSPatrick Caulfield 8876ed7257bSPatrick Caulfield /* Get the connected socket's peer */ 8886ed7257bSPatrick Caulfield memset(&peeraddr, 0, sizeof(peeraddr)); 8899b2c45d4SDenys Vlasenko len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2); 8909b2c45d4SDenys Vlasenko if (len < 0) { 8916ed7257bSPatrick Caulfield result = -ECONNABORTED; 8926ed7257bSPatrick Caulfield goto accept_err; 8936ed7257bSPatrick Caulfield } 8946ed7257bSPatrick Caulfield 8956ed7257bSPatrick Caulfield /* Get the new node's NODEID */ 8966ed7257bSPatrick Caulfield make_sockaddr(&peeraddr, 0, &len); 897e125fbebSAlexander Aring if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) { 898bcaadf5cSMasatake YAMATO unsigned char *b=(unsigned char *)&peeraddr; 899617e82e1SDavid Teigland log_print("connect from non cluster node"); 900bcaadf5cSMasatake YAMATO print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 901bcaadf5cSMasatake YAMATO b, sizeof(struct sockaddr_storage)); 9026ed7257bSPatrick Caulfield sock_release(newsock); 9036ed7257bSPatrick Caulfield return -1; 9046ed7257bSPatrick Caulfield } 9056ed7257bSPatrick Caulfield 9066ed7257bSPatrick Caulfield log_print("got connection from %d", nodeid); 9076ed7257bSPatrick Caulfield 9086ed7257bSPatrick Caulfield /* Check to see if we already have a connection to this node. This 9096ed7257bSPatrick Caulfield * could happen if the two nodes initiate a connection at roughly 9106ed7257bSPatrick Caulfield * the same time and the connections cross on the wire. 9116ed7257bSPatrick Caulfield * In this case we store the incoming one in "othercon" 9126ed7257bSPatrick Caulfield */ 913748285ccSDavid Teigland newcon = nodeid2con(nodeid, GFP_NOFS); 9146ed7257bSPatrick Caulfield if (!newcon) { 9156ed7257bSPatrick Caulfield result = -ENOMEM; 9166ed7257bSPatrick Caulfield goto accept_err; 9176ed7257bSPatrick Caulfield } 918d11ccd45SAlexander Aring 919e125fbebSAlexander Aring sock_set_mark(newsock->sk, mark); 920e125fbebSAlexander Aring 921d11ccd45SAlexander Aring mutex_lock(&newcon->sock_mutex); 9226ed7257bSPatrick Caulfield if (newcon->sock) { 9236ed7257bSPatrick Caulfield struct connection *othercon = newcon->othercon; 9246ed7257bSPatrick Caulfield 9256ed7257bSPatrick Caulfield if (!othercon) { 926a47666ebSAlexander Aring othercon = kzalloc(sizeof(*othercon), GFP_NOFS); 9276ed7257bSPatrick Caulfield if (!othercon) { 928617e82e1SDavid Teigland log_print("failed to allocate incoming socket"); 9296ed7257bSPatrick Caulfield mutex_unlock(&newcon->sock_mutex); 9306ed7257bSPatrick Caulfield result = -ENOMEM; 9316ed7257bSPatrick Caulfield goto accept_err; 9326ed7257bSPatrick Caulfield } 9334798cbbfSAlexander Aring 9346cde210aSAlexander Aring result = dlm_con_init(othercon, nodeid); 9356cde210aSAlexander Aring if (result < 0) { 9364798cbbfSAlexander Aring kfree(othercon); 9374798cbbfSAlexander Aring goto accept_err; 9384798cbbfSAlexander Aring } 9394798cbbfSAlexander Aring 940e9a470acSAlexander Aring lockdep_set_subclass(&othercon->sock_mutex, 1); 9416cde210aSAlexander Aring newcon->othercon = othercon; 942ba3ab3caSAlexander Aring } else { 943ba3ab3caSAlexander Aring /* close other sock con if we have something new */ 944ba3ab3caSAlexander Aring close_connection(othercon, false, true, false); 94561d96be0SPatrick Caulfield } 946ba3ab3caSAlexander Aring 947e9a470acSAlexander Aring mutex_lock(&othercon->sock_mutex); 948988419a9Stsutomu.owa@toshiba.co.jp add_sock(newsock, othercon); 9496ed7257bSPatrick Caulfield addcon = othercon; 950c7355827Stsutomu.owa@toshiba.co.jp mutex_unlock(&othercon->sock_mutex); 9516ed7257bSPatrick Caulfield } 9526ed7257bSPatrick Caulfield else { 9533735b4b9SBob Peterson /* accept copies the sk after we've saved the callbacks, so we 9543735b4b9SBob Peterson don't want to save them a second time or comm errors will 9553735b4b9SBob Peterson result in calling sk_error_report recursively. */ 956988419a9Stsutomu.owa@toshiba.co.jp add_sock(newsock, newcon); 9576ed7257bSPatrick Caulfield addcon = newcon; 9586ed7257bSPatrick Caulfield } 9596ed7257bSPatrick Caulfield 960b30a624fSAlexander Aring set_bit(CF_CONNECTED, &addcon->flags); 9616ed7257bSPatrick Caulfield mutex_unlock(&newcon->sock_mutex); 9626ed7257bSPatrick Caulfield 9636ed7257bSPatrick Caulfield /* 9646ed7257bSPatrick Caulfield * Add it to the active queue in case we got data 96525985edcSLucas De Marchi * between processing the accept adding the socket 9666ed7257bSPatrick Caulfield * to the read_sockets list 9676ed7257bSPatrick Caulfield */ 9686ed7257bSPatrick Caulfield if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 9696ed7257bSPatrick Caulfield queue_work(recv_workqueue, &addcon->rwork); 9706ed7257bSPatrick Caulfield 9716ed7257bSPatrick Caulfield return 0; 9726ed7257bSPatrick Caulfield 9736ed7257bSPatrick Caulfield accept_err: 9743421fb15Stsutomu.owa@toshiba.co.jp if (newsock) 9756ed7257bSPatrick Caulfield sock_release(newsock); 9766ed7257bSPatrick Caulfield 9776ed7257bSPatrick Caulfield if (result != -EAGAIN) 978617e82e1SDavid Teigland log_print("error accepting connection from node: %d", result); 9796ed7257bSPatrick Caulfield return result; 9806ed7257bSPatrick Caulfield } 9816ed7257bSPatrick Caulfield 9826ed7257bSPatrick Caulfield static void free_entry(struct writequeue_entry *e) 9836ed7257bSPatrick Caulfield { 9846ed7257bSPatrick Caulfield __free_page(e->page); 9856ed7257bSPatrick Caulfield kfree(e); 9866ed7257bSPatrick Caulfield } 9876ed7257bSPatrick Caulfield 9885d689871SMike Christie /* 9895d689871SMike Christie * writequeue_entry_complete - try to delete and free write queue entry 9905d689871SMike Christie * @e: write queue entry to try to delete 9915d689871SMike Christie * @completed: bytes completed 9925d689871SMike Christie * 9935d689871SMike Christie * writequeue_lock must be held. 9945d689871SMike Christie */ 9955d689871SMike Christie static void writequeue_entry_complete(struct writequeue_entry *e, int completed) 9965d689871SMike Christie { 9975d689871SMike Christie e->offset += completed; 9985d689871SMike Christie e->len -= completed; 9995d689871SMike Christie 10005d689871SMike Christie if (e->len == 0 && e->users == 0) { 10015d689871SMike Christie list_del(&e->list); 10025d689871SMike Christie free_entry(e); 10035d689871SMike Christie } 10045d689871SMike Christie } 10055d689871SMike Christie 1006ee44b4bcSMarcelo Ricardo Leitner /* 1007ee44b4bcSMarcelo Ricardo Leitner * sctp_bind_addrs - bind a SCTP socket to all our addresses 1008ee44b4bcSMarcelo Ricardo Leitner */ 100913004e8aSAlexander Aring static int sctp_bind_addrs(struct socket *sock, uint16_t port) 1010ee44b4bcSMarcelo Ricardo Leitner { 1011ee44b4bcSMarcelo Ricardo Leitner struct sockaddr_storage localaddr; 1012c0425a42SChristoph Hellwig struct sockaddr *addr = (struct sockaddr *)&localaddr; 1013ee44b4bcSMarcelo Ricardo Leitner int i, addr_len, result = 0; 1014ee44b4bcSMarcelo Ricardo Leitner 1015ee44b4bcSMarcelo Ricardo Leitner for (i = 0; i < dlm_local_count; i++) { 1016ee44b4bcSMarcelo Ricardo Leitner memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 1017ee44b4bcSMarcelo Ricardo Leitner make_sockaddr(&localaddr, port, &addr_len); 1018ee44b4bcSMarcelo Ricardo Leitner 1019ee44b4bcSMarcelo Ricardo Leitner if (!i) 102013004e8aSAlexander Aring result = kernel_bind(sock, addr, addr_len); 1021ee44b4bcSMarcelo Ricardo Leitner else 102213004e8aSAlexander Aring result = sock_bind_add(sock->sk, addr, addr_len); 1023ee44b4bcSMarcelo Ricardo Leitner 1024ee44b4bcSMarcelo Ricardo Leitner if (result < 0) { 1025ee44b4bcSMarcelo Ricardo Leitner log_print("Can't bind to %d addr number %d, %d.\n", 1026ee44b4bcSMarcelo Ricardo Leitner port, i + 1, result); 1027ee44b4bcSMarcelo Ricardo Leitner break; 1028ee44b4bcSMarcelo Ricardo Leitner } 1029ee44b4bcSMarcelo Ricardo Leitner } 1030ee44b4bcSMarcelo Ricardo Leitner return result; 1031ee44b4bcSMarcelo Ricardo Leitner } 1032ee44b4bcSMarcelo Ricardo Leitner 10336ed7257bSPatrick Caulfield /* Initiate an SCTP association. 10346ed7257bSPatrick Caulfield This is a special case of send_to_sock() in that we don't yet have a 10356ed7257bSPatrick Caulfield peeled-off socket for this association, so we use the listening socket 10366ed7257bSPatrick Caulfield and add the primary IP address of the remote node. 10376ed7257bSPatrick Caulfield */ 1038ee44b4bcSMarcelo Ricardo Leitner static void sctp_connect_to_sock(struct connection *con) 10396ed7257bSPatrick Caulfield { 1040ee44b4bcSMarcelo Ricardo Leitner struct sockaddr_storage daddr; 1041ee44b4bcSMarcelo Ricardo Leitner int result; 1042ee44b4bcSMarcelo Ricardo Leitner int addr_len; 1043ee44b4bcSMarcelo Ricardo Leitner struct socket *sock; 10449c9f168fSAlexander Aring unsigned int mark; 1045ee44b4bcSMarcelo Ricardo Leitner 10465d689871SMike Christie mutex_lock(&con->sock_mutex); 10476ed7257bSPatrick Caulfield 1048ee44b4bcSMarcelo Ricardo Leitner /* Some odd races can cause double-connects, ignore them */ 1049ee44b4bcSMarcelo Ricardo Leitner if (con->retries++ > MAX_CONNECT_RETRIES) 1050ee44b4bcSMarcelo Ricardo Leitner goto out; 1051ee44b4bcSMarcelo Ricardo Leitner 1052ee44b4bcSMarcelo Ricardo Leitner if (con->sock) { 1053ee44b4bcSMarcelo Ricardo Leitner log_print("node %d already connected.", con->nodeid); 1054ee44b4bcSMarcelo Ricardo Leitner goto out; 1055ee44b4bcSMarcelo Ricardo Leitner } 1056ee44b4bcSMarcelo Ricardo Leitner 1057ee44b4bcSMarcelo Ricardo Leitner memset(&daddr, 0, sizeof(daddr)); 1058e125fbebSAlexander Aring result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark); 1059ee44b4bcSMarcelo Ricardo Leitner if (result < 0) { 10606ed7257bSPatrick Caulfield log_print("no address for nodeid %d", con->nodeid); 1061ee44b4bcSMarcelo Ricardo Leitner goto out; 106204bedd79SDavid Teigland } 10636ed7257bSPatrick Caulfield 1064ee44b4bcSMarcelo Ricardo Leitner /* Create a socket to communicate with */ 1065ee44b4bcSMarcelo Ricardo Leitner result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1066ee44b4bcSMarcelo Ricardo Leitner SOCK_STREAM, IPPROTO_SCTP, &sock); 1067ee44b4bcSMarcelo Ricardo Leitner if (result < 0) 1068ee44b4bcSMarcelo Ricardo Leitner goto socket_err; 10696ed7257bSPatrick Caulfield 10709c9f168fSAlexander Aring sock_set_mark(sock->sk, mark); 10719c9f168fSAlexander Aring 1072988419a9Stsutomu.owa@toshiba.co.jp add_sock(sock, con); 10736ed7257bSPatrick Caulfield 1074ee44b4bcSMarcelo Ricardo Leitner /* Bind to all addresses. */ 107513004e8aSAlexander Aring if (sctp_bind_addrs(con->sock, 0)) 1076ee44b4bcSMarcelo Ricardo Leitner goto bind_err; 107798e1b60eSMike Christie 1078ee44b4bcSMarcelo Ricardo Leitner make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); 10796ed7257bSPatrick Caulfield 1080ee44b4bcSMarcelo Ricardo Leitner log_print("connecting to %d", con->nodeid); 10816ed7257bSPatrick Caulfield 1082ee44b4bcSMarcelo Ricardo Leitner /* Turn off Nagle's algorithm */ 108340ef92c6SChristoph Hellwig sctp_sock_set_nodelay(sock->sk); 1084ee44b4bcSMarcelo Ricardo Leitner 1085f706d830SGang He /* 1086f706d830SGang He * Make sock->ops->connect() function return in specified time, 1087f706d830SGang He * since O_NONBLOCK argument in connect() function does not work here, 1088f706d830SGang He * then, we should restore the default value of this attribute. 1089f706d830SGang He */ 109076ee0785SChristoph Hellwig sock_set_sndtimeo(sock->sk, 5); 1091ee44b4bcSMarcelo Ricardo Leitner result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, 1092da3627c3SGang He 0); 109376ee0785SChristoph Hellwig sock_set_sndtimeo(sock->sk, 0); 1094f706d830SGang He 1095ee44b4bcSMarcelo Ricardo Leitner if (result == -EINPROGRESS) 1096ee44b4bcSMarcelo Ricardo Leitner result = 0; 109719633c7eSAlexander Aring if (result == 0) { 109819633c7eSAlexander Aring if (!test_and_set_bit(CF_CONNECTED, &con->flags)) 109919633c7eSAlexander Aring log_print("successful connected to node %d", con->nodeid); 1100ee44b4bcSMarcelo Ricardo Leitner goto out; 110119633c7eSAlexander Aring } 1102ee44b4bcSMarcelo Ricardo Leitner 1103ee44b4bcSMarcelo Ricardo Leitner bind_err: 1104ee44b4bcSMarcelo Ricardo Leitner con->sock = NULL; 1105ee44b4bcSMarcelo Ricardo Leitner sock_release(sock); 1106ee44b4bcSMarcelo Ricardo Leitner 1107ee44b4bcSMarcelo Ricardo Leitner socket_err: 1108ee44b4bcSMarcelo Ricardo Leitner /* 1109ee44b4bcSMarcelo Ricardo Leitner * Some errors are fatal and this list might need adjusting. For other 1110ee44b4bcSMarcelo Ricardo Leitner * errors we try again until the max number of retries is reached. 1111ee44b4bcSMarcelo Ricardo Leitner */ 1112ee44b4bcSMarcelo Ricardo Leitner if (result != -EHOSTUNREACH && 1113ee44b4bcSMarcelo Ricardo Leitner result != -ENETUNREACH && 1114ee44b4bcSMarcelo Ricardo Leitner result != -ENETDOWN && 1115ee44b4bcSMarcelo Ricardo Leitner result != -EINVAL && 1116ee44b4bcSMarcelo Ricardo Leitner result != -EPROTONOSUPPORT) { 1117ee44b4bcSMarcelo Ricardo Leitner log_print("connect %d try %d error %d", con->nodeid, 1118ee44b4bcSMarcelo Ricardo Leitner con->retries, result); 1119ee44b4bcSMarcelo Ricardo Leitner mutex_unlock(&con->sock_mutex); 1120ee44b4bcSMarcelo Ricardo Leitner msleep(1000); 1121ee44b4bcSMarcelo Ricardo Leitner lowcomms_connect_sock(con); 1122ee44b4bcSMarcelo Ricardo Leitner return; 11236ed7257bSPatrick Caulfield } 11245d689871SMike Christie 1125ee44b4bcSMarcelo Ricardo Leitner out: 11265d689871SMike Christie mutex_unlock(&con->sock_mutex); 11276ed7257bSPatrick Caulfield } 11286ed7257bSPatrick Caulfield 11296ed7257bSPatrick Caulfield /* Connect a new socket to its peer */ 11306ed7257bSPatrick Caulfield static void tcp_connect_to_sock(struct connection *con) 11316ed7257bSPatrick Caulfield { 11326bd8fedaSLon Hohberger struct sockaddr_storage saddr, src_addr; 1133e125fbebSAlexander Aring unsigned int mark; 11346ed7257bSPatrick Caulfield int addr_len; 1135a89d63a1SCasey Dahlin struct socket *sock = NULL; 113636b71a8bSDavid Teigland int result; 11376ed7257bSPatrick Caulfield 11386ed7257bSPatrick Caulfield mutex_lock(&con->sock_mutex); 11396ed7257bSPatrick Caulfield if (con->retries++ > MAX_CONNECT_RETRIES) 11406ed7257bSPatrick Caulfield goto out; 11416ed7257bSPatrick Caulfield 11426ed7257bSPatrick Caulfield /* Some odd races can cause double-connects, ignore them */ 114336b71a8bSDavid Teigland if (con->sock) 11446ed7257bSPatrick Caulfield goto out; 11456ed7257bSPatrick Caulfield 11466ed7257bSPatrick Caulfield /* Create a socket to communicate with */ 1147eeb1bd5cSEric W. Biederman result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1148eeb1bd5cSEric W. Biederman SOCK_STREAM, IPPROTO_TCP, &sock); 11496ed7257bSPatrick Caulfield if (result < 0) 11506ed7257bSPatrick Caulfield goto out_err; 11516ed7257bSPatrick Caulfield 11526ed7257bSPatrick Caulfield memset(&saddr, 0, sizeof(saddr)); 1153e125fbebSAlexander Aring result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark); 115436b71a8bSDavid Teigland if (result < 0) { 115536b71a8bSDavid Teigland log_print("no address for nodeid %d", con->nodeid); 11566ed7257bSPatrick Caulfield goto out_err; 115736b71a8bSDavid Teigland } 11586ed7257bSPatrick Caulfield 1159e125fbebSAlexander Aring sock_set_mark(sock->sk, mark); 1160e125fbebSAlexander Aring 1161988419a9Stsutomu.owa@toshiba.co.jp add_sock(sock, con); 11626ed7257bSPatrick Caulfield 11636bd8fedaSLon Hohberger /* Bind to our cluster-known address connecting to avoid 11646bd8fedaSLon Hohberger routing problems */ 11656bd8fedaSLon Hohberger memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); 11666bd8fedaSLon Hohberger make_sockaddr(&src_addr, 0, &addr_len); 11676bd8fedaSLon Hohberger result = sock->ops->bind(sock, (struct sockaddr *) &src_addr, 11686bd8fedaSLon Hohberger addr_len); 11696bd8fedaSLon Hohberger if (result < 0) { 11706bd8fedaSLon Hohberger log_print("could not bind for connect: %d", result); 11716bd8fedaSLon Hohberger /* This *may* not indicate a critical error */ 11726bd8fedaSLon Hohberger } 11736bd8fedaSLon Hohberger 11746ed7257bSPatrick Caulfield make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 11756ed7257bSPatrick Caulfield 11766ed7257bSPatrick Caulfield log_print("connecting to %d", con->nodeid); 1177cb2d45daSDavid Teigland 1178cb2d45daSDavid Teigland /* Turn off Nagle's algorithm */ 117912abc5eeSChristoph Hellwig tcp_sock_set_nodelay(sock->sk); 1180cb2d45daSDavid Teigland 118136b71a8bSDavid Teigland result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 11826ed7257bSPatrick Caulfield O_NONBLOCK); 11836ed7257bSPatrick Caulfield if (result == -EINPROGRESS) 11846ed7257bSPatrick Caulfield result = 0; 11856ed7257bSPatrick Caulfield if (result == 0) 11866ed7257bSPatrick Caulfield goto out; 11876ed7257bSPatrick Caulfield 11886ed7257bSPatrick Caulfield out_err: 11896ed7257bSPatrick Caulfield if (con->sock) { 11906ed7257bSPatrick Caulfield sock_release(con->sock); 11916ed7257bSPatrick Caulfield con->sock = NULL; 1192a89d63a1SCasey Dahlin } else if (sock) { 1193a89d63a1SCasey Dahlin sock_release(sock); 11946ed7257bSPatrick Caulfield } 11956ed7257bSPatrick Caulfield /* 11966ed7257bSPatrick Caulfield * Some errors are fatal and this list might need adjusting. For other 11976ed7257bSPatrick Caulfield * errors we try again until the max number of retries is reached. 11986ed7257bSPatrick Caulfield */ 119936b71a8bSDavid Teigland if (result != -EHOSTUNREACH && 120036b71a8bSDavid Teigland result != -ENETUNREACH && 120136b71a8bSDavid Teigland result != -ENETDOWN && 120236b71a8bSDavid Teigland result != -EINVAL && 120336b71a8bSDavid Teigland result != -EPROTONOSUPPORT) { 120436b71a8bSDavid Teigland log_print("connect %d try %d error %d", con->nodeid, 120536b71a8bSDavid Teigland con->retries, result); 120636b71a8bSDavid Teigland mutex_unlock(&con->sock_mutex); 120736b71a8bSDavid Teigland msleep(1000); 12086ed7257bSPatrick Caulfield lowcomms_connect_sock(con); 120936b71a8bSDavid Teigland return; 12106ed7257bSPatrick Caulfield } 12116ed7257bSPatrick Caulfield out: 12126ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 12136ed7257bSPatrick Caulfield return; 12146ed7257bSPatrick Caulfield } 12156ed7257bSPatrick Caulfield 1216d11ccd45SAlexander Aring /* On error caller must run dlm_close_sock() for the 1217d11ccd45SAlexander Aring * listen connection socket. 1218d11ccd45SAlexander Aring */ 1219d11ccd45SAlexander Aring static int tcp_create_listen_sock(struct listen_connection *con, 12206ed7257bSPatrick Caulfield struct sockaddr_storage *saddr) 12216ed7257bSPatrick Caulfield { 12226ed7257bSPatrick Caulfield struct socket *sock = NULL; 12236ed7257bSPatrick Caulfield int result = 0; 12246ed7257bSPatrick Caulfield int addr_len; 12256ed7257bSPatrick Caulfield 12266ed7257bSPatrick Caulfield if (dlm_local_addr[0]->ss_family == AF_INET) 12276ed7257bSPatrick Caulfield addr_len = sizeof(struct sockaddr_in); 12286ed7257bSPatrick Caulfield else 12296ed7257bSPatrick Caulfield addr_len = sizeof(struct sockaddr_in6); 12306ed7257bSPatrick Caulfield 12316ed7257bSPatrick Caulfield /* Create a socket to communicate with */ 1232eeb1bd5cSEric W. Biederman result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1233eeb1bd5cSEric W. Biederman SOCK_STREAM, IPPROTO_TCP, &sock); 12346ed7257bSPatrick Caulfield if (result < 0) { 1235617e82e1SDavid Teigland log_print("Can't create listening comms socket"); 12366ed7257bSPatrick Caulfield goto create_out; 12376ed7257bSPatrick Caulfield } 12386ed7257bSPatrick Caulfield 1239a5b7ab63SAlexander Aring sock_set_mark(sock->sk, dlm_config.ci_mark); 1240a5b7ab63SAlexander Aring 1241cb2d45daSDavid Teigland /* Turn off Nagle's algorithm */ 124212abc5eeSChristoph Hellwig tcp_sock_set_nodelay(sock->sk); 1243cb2d45daSDavid Teigland 1244b58f0e8fSChristoph Hellwig sock_set_reuseaddr(sock->sk); 12456ed7257bSPatrick Caulfield 1246d11ccd45SAlexander Aring add_listen_sock(sock, con); 12476ed7257bSPatrick Caulfield 12486ed7257bSPatrick Caulfield /* Bind to our port */ 12496ed7257bSPatrick Caulfield make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); 12506ed7257bSPatrick Caulfield result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 12516ed7257bSPatrick Caulfield if (result < 0) { 1252617e82e1SDavid Teigland log_print("Can't bind to port %d", dlm_config.ci_tcp_port); 12536ed7257bSPatrick Caulfield goto create_out; 12546ed7257bSPatrick Caulfield } 1255ce3d9544SChristoph Hellwig sock_set_keepalive(sock->sk); 12566ed7257bSPatrick Caulfield 12576ed7257bSPatrick Caulfield result = sock->ops->listen(sock, 5); 12586ed7257bSPatrick Caulfield if (result < 0) { 1259617e82e1SDavid Teigland log_print("Can't listen on port %d", dlm_config.ci_tcp_port); 12606ed7257bSPatrick Caulfield goto create_out; 12616ed7257bSPatrick Caulfield } 12626ed7257bSPatrick Caulfield 1263d11ccd45SAlexander Aring return 0; 1264d11ccd45SAlexander Aring 12656ed7257bSPatrick Caulfield create_out: 1266d11ccd45SAlexander Aring return result; 12676ed7257bSPatrick Caulfield } 12686ed7257bSPatrick Caulfield 12696ed7257bSPatrick Caulfield /* Get local addresses */ 12706ed7257bSPatrick Caulfield static void init_local(void) 12716ed7257bSPatrick Caulfield { 12726ed7257bSPatrick Caulfield struct sockaddr_storage sas, *addr; 12736ed7257bSPatrick Caulfield int i; 12746ed7257bSPatrick Caulfield 127530d3a237SPatrick Caulfield dlm_local_count = 0; 12761b189b88SDavid Teigland for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) { 12776ed7257bSPatrick Caulfield if (dlm_our_addr(&sas, i)) 12786ed7257bSPatrick Caulfield break; 12796ed7257bSPatrick Caulfield 12805c93f56fSAmitoj Kaur Chawla addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS); 12816ed7257bSPatrick Caulfield if (!addr) 12826ed7257bSPatrick Caulfield break; 12836ed7257bSPatrick Caulfield dlm_local_addr[dlm_local_count++] = addr; 12846ed7257bSPatrick Caulfield } 12856ed7257bSPatrick Caulfield } 12866ed7257bSPatrick Caulfield 1287043697f0SAlexander Aring static void deinit_local(void) 1288043697f0SAlexander Aring { 1289043697f0SAlexander Aring int i; 1290043697f0SAlexander Aring 1291043697f0SAlexander Aring for (i = 0; i < dlm_local_count; i++) 1292043697f0SAlexander Aring kfree(dlm_local_addr[i]); 1293043697f0SAlexander Aring } 1294043697f0SAlexander Aring 1295d11ccd45SAlexander Aring /* Initialise SCTP socket and bind to all interfaces 1296d11ccd45SAlexander Aring * On error caller must run dlm_close_sock() for the 1297d11ccd45SAlexander Aring * listen connection socket. 1298d11ccd45SAlexander Aring */ 1299d11ccd45SAlexander Aring static int sctp_listen_for_all(struct listen_connection *con) 13006ed7257bSPatrick Caulfield { 13016ed7257bSPatrick Caulfield struct socket *sock = NULL; 1302ee44b4bcSMarcelo Ricardo Leitner int result = -EINVAL; 13036ed7257bSPatrick Caulfield 13046ed7257bSPatrick Caulfield log_print("Using SCTP for communications"); 13056ed7257bSPatrick Caulfield 1306eeb1bd5cSEric W. Biederman result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1307ee44b4bcSMarcelo Ricardo Leitner SOCK_STREAM, IPPROTO_SCTP, &sock); 13086ed7257bSPatrick Caulfield if (result < 0) { 13096ed7257bSPatrick Caulfield log_print("Can't create comms socket, check SCTP is loaded"); 13106ed7257bSPatrick Caulfield goto out; 13116ed7257bSPatrick Caulfield } 13126ed7257bSPatrick Caulfield 131326cfabf9SChristoph Hellwig sock_set_rcvbuf(sock->sk, NEEDED_RMEM); 1314a5b7ab63SAlexander Aring sock_set_mark(sock->sk, dlm_config.ci_mark); 131540ef92c6SChristoph Hellwig sctp_sock_set_nodelay(sock->sk); 131686e92ad2SMike Christie 1317d11ccd45SAlexander Aring add_listen_sock(sock, con); 1318b81171cbSBob Peterson 1319ee44b4bcSMarcelo Ricardo Leitner /* Bind to all addresses. */ 1320d11ccd45SAlexander Aring result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port); 1321d11ccd45SAlexander Aring if (result < 0) 1322d11ccd45SAlexander Aring goto out; 13236ed7257bSPatrick Caulfield 13246ed7257bSPatrick Caulfield result = sock->ops->listen(sock, 5); 13256ed7257bSPatrick Caulfield if (result < 0) { 13266ed7257bSPatrick Caulfield log_print("Can't set socket listening"); 1327d11ccd45SAlexander Aring goto out; 13286ed7257bSPatrick Caulfield } 13296ed7257bSPatrick Caulfield 13306ed7257bSPatrick Caulfield return 0; 13316ed7257bSPatrick Caulfield 13326ed7257bSPatrick Caulfield out: 13336ed7257bSPatrick Caulfield return result; 13346ed7257bSPatrick Caulfield } 13356ed7257bSPatrick Caulfield 13366ed7257bSPatrick Caulfield static int tcp_listen_for_all(void) 13376ed7257bSPatrick Caulfield { 13386ed7257bSPatrick Caulfield /* We don't support multi-homed hosts */ 13391a26bfafSAlexander Aring if (dlm_local_count > 1) { 1340617e82e1SDavid Teigland log_print("TCP protocol can't handle multi-homed hosts, " 1341617e82e1SDavid Teigland "try SCTP"); 13426ed7257bSPatrick Caulfield return -EINVAL; 13436ed7257bSPatrick Caulfield } 13446ed7257bSPatrick Caulfield 13456ed7257bSPatrick Caulfield log_print("Using TCP for communications"); 13466ed7257bSPatrick Caulfield 1347d11ccd45SAlexander Aring return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]); 13486ed7257bSPatrick Caulfield } 13496ed7257bSPatrick Caulfield 13506ed7257bSPatrick Caulfield 13516ed7257bSPatrick Caulfield 13526ed7257bSPatrick Caulfield static struct writequeue_entry *new_writequeue_entry(struct connection *con, 13536ed7257bSPatrick Caulfield gfp_t allocation) 13546ed7257bSPatrick Caulfield { 13556ed7257bSPatrick Caulfield struct writequeue_entry *entry; 13566ed7257bSPatrick Caulfield 1357*f0747ebfSAlexander Aring entry = kzalloc(sizeof(*entry), allocation); 13586ed7257bSPatrick Caulfield if (!entry) 13596ed7257bSPatrick Caulfield return NULL; 13606ed7257bSPatrick Caulfield 1361e1a7cbceSAlexander Aring entry->page = alloc_page(allocation | __GFP_ZERO); 13626ed7257bSPatrick Caulfield if (!entry->page) { 13636ed7257bSPatrick Caulfield kfree(entry); 13646ed7257bSPatrick Caulfield return NULL; 13656ed7257bSPatrick Caulfield } 13666ed7257bSPatrick Caulfield 13676ed7257bSPatrick Caulfield entry->con = con; 1368*f0747ebfSAlexander Aring entry->users = 1; 13696ed7257bSPatrick Caulfield 13706ed7257bSPatrick Caulfield return entry; 13716ed7257bSPatrick Caulfield } 13726ed7257bSPatrick Caulfield 1373*f0747ebfSAlexander Aring static struct writequeue_entry *new_wq_entry(struct connection *con, int len, 1374*f0747ebfSAlexander Aring gfp_t allocation, char **ppc) 1375*f0747ebfSAlexander Aring { 1376*f0747ebfSAlexander Aring struct writequeue_entry *e; 1377*f0747ebfSAlexander Aring 1378*f0747ebfSAlexander Aring spin_lock(&con->writequeue_lock); 1379*f0747ebfSAlexander Aring if (!list_empty(&con->writequeue)) { 1380*f0747ebfSAlexander Aring e = list_last_entry(&con->writequeue, struct writequeue_entry, list); 1381*f0747ebfSAlexander Aring if (DLM_WQ_REMAIN_BYTES(e) >= len) { 1382*f0747ebfSAlexander Aring *ppc = page_address(e->page) + e->end; 1383*f0747ebfSAlexander Aring e->end += len; 1384*f0747ebfSAlexander Aring e->users++; 1385*f0747ebfSAlexander Aring spin_unlock(&con->writequeue_lock); 1386*f0747ebfSAlexander Aring 1387*f0747ebfSAlexander Aring return e; 1388*f0747ebfSAlexander Aring } 1389*f0747ebfSAlexander Aring } 1390*f0747ebfSAlexander Aring spin_unlock(&con->writequeue_lock); 1391*f0747ebfSAlexander Aring 1392*f0747ebfSAlexander Aring e = new_writequeue_entry(con, allocation); 1393*f0747ebfSAlexander Aring if (!e) 1394*f0747ebfSAlexander Aring return NULL; 1395*f0747ebfSAlexander Aring 1396*f0747ebfSAlexander Aring *ppc = page_address(e->page); 1397*f0747ebfSAlexander Aring e->end += len; 1398*f0747ebfSAlexander Aring 1399*f0747ebfSAlexander Aring spin_lock(&con->writequeue_lock); 1400*f0747ebfSAlexander Aring list_add_tail(&e->list, &con->writequeue); 1401*f0747ebfSAlexander Aring spin_unlock(&con->writequeue_lock); 1402*f0747ebfSAlexander Aring 1403*f0747ebfSAlexander Aring return e; 1404*f0747ebfSAlexander Aring }; 1405*f0747ebfSAlexander Aring 1406617e82e1SDavid Teigland void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) 14076ed7257bSPatrick Caulfield { 14086ed7257bSPatrick Caulfield struct connection *con; 14096ed7257bSPatrick Caulfield 1410c45674fbSAlexander Aring if (len > DEFAULT_BUFFER_SIZE || 1411c45674fbSAlexander Aring len < sizeof(struct dlm_header)) { 1412c45674fbSAlexander Aring BUILD_BUG_ON(PAGE_SIZE < DEFAULT_BUFFER_SIZE); 1413692f51c8SAlexander Aring log_print("failed to allocate a buffer of size %d", len); 1414c45674fbSAlexander Aring WARN_ON(1); 1415692f51c8SAlexander Aring return NULL; 1416692f51c8SAlexander Aring } 1417692f51c8SAlexander Aring 14186ed7257bSPatrick Caulfield con = nodeid2con(nodeid, allocation); 14196ed7257bSPatrick Caulfield if (!con) 14206ed7257bSPatrick Caulfield return NULL; 14216ed7257bSPatrick Caulfield 1422*f0747ebfSAlexander Aring return new_wq_entry(con, len, allocation, ppc); 14236ed7257bSPatrick Caulfield } 14246ed7257bSPatrick Caulfield 14256ed7257bSPatrick Caulfield void dlm_lowcomms_commit_buffer(void *mh) 14266ed7257bSPatrick Caulfield { 14276ed7257bSPatrick Caulfield struct writequeue_entry *e = (struct writequeue_entry *)mh; 14286ed7257bSPatrick Caulfield struct connection *con = e->con; 14296ed7257bSPatrick Caulfield int users; 14306ed7257bSPatrick Caulfield 14316ed7257bSPatrick Caulfield spin_lock(&con->writequeue_lock); 14326ed7257bSPatrick Caulfield users = --e->users; 14336ed7257bSPatrick Caulfield if (users) 14346ed7257bSPatrick Caulfield goto out; 1435*f0747ebfSAlexander Aring 1436*f0747ebfSAlexander Aring e->len = DLM_WQ_LENGTH_BYTES(e); 14376ed7257bSPatrick Caulfield spin_unlock(&con->writequeue_lock); 14386ed7257bSPatrick Caulfield 14396ed7257bSPatrick Caulfield queue_work(send_workqueue, &con->swork); 14406ed7257bSPatrick Caulfield return; 14416ed7257bSPatrick Caulfield 14426ed7257bSPatrick Caulfield out: 14436ed7257bSPatrick Caulfield spin_unlock(&con->writequeue_lock); 14446ed7257bSPatrick Caulfield return; 14456ed7257bSPatrick Caulfield } 14466ed7257bSPatrick Caulfield 14476ed7257bSPatrick Caulfield /* Send a message */ 14486ed7257bSPatrick Caulfield static void send_to_sock(struct connection *con) 14496ed7257bSPatrick Caulfield { 14506ed7257bSPatrick Caulfield int ret = 0; 14516ed7257bSPatrick Caulfield const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 14526ed7257bSPatrick Caulfield struct writequeue_entry *e; 14536ed7257bSPatrick Caulfield int len, offset; 1454f92c8dd7SBob Peterson int count = 0; 14556ed7257bSPatrick Caulfield 14566ed7257bSPatrick Caulfield mutex_lock(&con->sock_mutex); 14576ed7257bSPatrick Caulfield if (con->sock == NULL) 14586ed7257bSPatrick Caulfield goto out_connect; 14596ed7257bSPatrick Caulfield 14606ed7257bSPatrick Caulfield spin_lock(&con->writequeue_lock); 14616ed7257bSPatrick Caulfield for (;;) { 1462*f0747ebfSAlexander Aring if (list_empty(&con->writequeue)) 14636ed7257bSPatrick Caulfield break; 14646ed7257bSPatrick Caulfield 1465*f0747ebfSAlexander Aring e = list_first_entry(&con->writequeue, struct writequeue_entry, list); 14666ed7257bSPatrick Caulfield len = e->len; 14676ed7257bSPatrick Caulfield offset = e->offset; 14686ed7257bSPatrick Caulfield BUG_ON(len == 0 && e->users == 0); 14696ed7257bSPatrick Caulfield spin_unlock(&con->writequeue_lock); 14706ed7257bSPatrick Caulfield 14716ed7257bSPatrick Caulfield ret = 0; 14726ed7257bSPatrick Caulfield if (len) { 14731329e3f2SPaolo Bonzini ret = kernel_sendpage(con->sock, e->page, offset, len, 14746ed7257bSPatrick Caulfield msg_flags); 1475d66f8277SPatrick Caulfield if (ret == -EAGAIN || ret == 0) { 1476b36930ddSDavid Miller if (ret == -EAGAIN && 14779cd3e072SEric Dumazet test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && 1478b36930ddSDavid Miller !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { 1479b36930ddSDavid Miller /* Notify TCP that we're limited by the 1480b36930ddSDavid Miller * application window size. 1481b36930ddSDavid Miller */ 1482b36930ddSDavid Miller set_bit(SOCK_NOSPACE, &con->sock->flags); 1483b36930ddSDavid Miller con->sock->sk->sk_write_pending++; 1484b36930ddSDavid Miller } 1485d66f8277SPatrick Caulfield cond_resched(); 14866ed7257bSPatrick Caulfield goto out; 14879c5bef58SYing Xue } else if (ret < 0) 14886ed7257bSPatrick Caulfield goto send_error; 1489d66f8277SPatrick Caulfield } 1490f92c8dd7SBob Peterson 14916ed7257bSPatrick Caulfield /* Don't starve people filling buffers */ 1492f92c8dd7SBob Peterson if (++count >= MAX_SEND_MSG_COUNT) { 14936ed7257bSPatrick Caulfield cond_resched(); 1494f92c8dd7SBob Peterson count = 0; 1495f92c8dd7SBob Peterson } 14966ed7257bSPatrick Caulfield 14976ed7257bSPatrick Caulfield spin_lock(&con->writequeue_lock); 14985d689871SMike Christie writequeue_entry_complete(e, ret); 14996ed7257bSPatrick Caulfield } 15006ed7257bSPatrick Caulfield spin_unlock(&con->writequeue_lock); 15016ed7257bSPatrick Caulfield out: 15026ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 15036ed7257bSPatrick Caulfield return; 15046ed7257bSPatrick Caulfield 15056ed7257bSPatrick Caulfield send_error: 15066ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 1507ba3ab3caSAlexander Aring close_connection(con, false, false, true); 150801da24d3SBob Peterson /* Requeue the send work. When the work daemon runs again, it will try 150901da24d3SBob Peterson a new connection, then call this function again. */ 151001da24d3SBob Peterson queue_work(send_workqueue, &con->swork); 15116ed7257bSPatrick Caulfield return; 15126ed7257bSPatrick Caulfield 15136ed7257bSPatrick Caulfield out_connect: 15146ed7257bSPatrick Caulfield mutex_unlock(&con->sock_mutex); 151501da24d3SBob Peterson queue_work(send_workqueue, &con->swork); 151601da24d3SBob Peterson cond_resched(); 15176ed7257bSPatrick Caulfield } 15186ed7257bSPatrick Caulfield 15196ed7257bSPatrick Caulfield static void clean_one_writequeue(struct connection *con) 15206ed7257bSPatrick Caulfield { 15215e9ccc37SChristine Caulfield struct writequeue_entry *e, *safe; 15226ed7257bSPatrick Caulfield 15236ed7257bSPatrick Caulfield spin_lock(&con->writequeue_lock); 15245e9ccc37SChristine Caulfield list_for_each_entry_safe(e, safe, &con->writequeue, list) { 15256ed7257bSPatrick Caulfield list_del(&e->list); 15266ed7257bSPatrick Caulfield free_entry(e); 15276ed7257bSPatrick Caulfield } 15286ed7257bSPatrick Caulfield spin_unlock(&con->writequeue_lock); 15296ed7257bSPatrick Caulfield } 15306ed7257bSPatrick Caulfield 15316ed7257bSPatrick Caulfield /* Called from recovery when it knows that a node has 15326ed7257bSPatrick Caulfield left the cluster */ 15336ed7257bSPatrick Caulfield int dlm_lowcomms_close(int nodeid) 15346ed7257bSPatrick Caulfield { 15356ed7257bSPatrick Caulfield struct connection *con; 153636b71a8bSDavid Teigland struct dlm_node_addr *na; 15376ed7257bSPatrick Caulfield 15386ed7257bSPatrick Caulfield log_print("closing connection to node %d", nodeid); 15396ed7257bSPatrick Caulfield con = nodeid2con(nodeid, 0); 15406ed7257bSPatrick Caulfield if (con) { 1541063c4c99SLars Marowsky-Bree set_bit(CF_CLOSE, &con->flags); 15420d737a8cSMarcelo Ricardo Leitner close_connection(con, true, true, true); 15436ed7257bSPatrick Caulfield clean_one_writequeue(con); 154453a5edaaSAlexander Aring if (con->othercon) 154553a5edaaSAlexander Aring clean_one_writequeue(con->othercon); 15466ed7257bSPatrick Caulfield } 154736b71a8bSDavid Teigland 154836b71a8bSDavid Teigland spin_lock(&dlm_node_addrs_spin); 154936b71a8bSDavid Teigland na = find_node_addr(nodeid); 155036b71a8bSDavid Teigland if (na) { 155136b71a8bSDavid Teigland list_del(&na->list); 155236b71a8bSDavid Teigland while (na->addr_count--) 155336b71a8bSDavid Teigland kfree(na->addr[na->addr_count]); 155436b71a8bSDavid Teigland kfree(na); 155536b71a8bSDavid Teigland } 155636b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 155736b71a8bSDavid Teigland 15586ed7257bSPatrick Caulfield return 0; 15596ed7257bSPatrick Caulfield } 15606ed7257bSPatrick Caulfield 15616ed7257bSPatrick Caulfield /* Receive workqueue function */ 15626ed7257bSPatrick Caulfield static void process_recv_sockets(struct work_struct *work) 15636ed7257bSPatrick Caulfield { 15646ed7257bSPatrick Caulfield struct connection *con = container_of(work, struct connection, rwork); 15656ed7257bSPatrick Caulfield int err; 15666ed7257bSPatrick Caulfield 15676ed7257bSPatrick Caulfield clear_bit(CF_READ_PENDING, &con->flags); 15686ed7257bSPatrick Caulfield do { 1569d11ccd45SAlexander Aring err = receive_from_sock(con); 15706ed7257bSPatrick Caulfield } while (!err); 15716ed7257bSPatrick Caulfield } 15726ed7257bSPatrick Caulfield 1573d11ccd45SAlexander Aring static void process_listen_recv_socket(struct work_struct *work) 1574d11ccd45SAlexander Aring { 1575d11ccd45SAlexander Aring accept_from_sock(&listen_con); 1576d11ccd45SAlexander Aring } 1577d11ccd45SAlexander Aring 15786ed7257bSPatrick Caulfield /* Send workqueue function */ 15796ed7257bSPatrick Caulfield static void process_send_sockets(struct work_struct *work) 15806ed7257bSPatrick Caulfield { 15816ed7257bSPatrick Caulfield struct connection *con = container_of(work, struct connection, swork); 15826ed7257bSPatrick Caulfield 15838a4abb08Stsutomu.owa@toshiba.co.jp clear_bit(CF_WRITE_PENDING, &con->flags); 158461d9102bSBob Peterson if (con->sock == NULL) /* not mutex protected so check it inside too */ 15856ed7257bSPatrick Caulfield con->connect_action(con); 158601da24d3SBob Peterson if (!list_empty(&con->writequeue)) 15876ed7257bSPatrick Caulfield send_to_sock(con); 15886ed7257bSPatrick Caulfield } 15896ed7257bSPatrick Caulfield 15906ed7257bSPatrick Caulfield static void work_stop(void) 15916ed7257bSPatrick Caulfield { 1592b355516fSDavid Windsor if (recv_workqueue) 15936ed7257bSPatrick Caulfield destroy_workqueue(recv_workqueue); 1594b355516fSDavid Windsor if (send_workqueue) 15956ed7257bSPatrick Caulfield destroy_workqueue(send_workqueue); 15966ed7257bSPatrick Caulfield } 15976ed7257bSPatrick Caulfield 15986ed7257bSPatrick Caulfield static int work_start(void) 15996ed7257bSPatrick Caulfield { 1600e43f055aSDavid Teigland recv_workqueue = alloc_workqueue("dlm_recv", 1601e43f055aSDavid Teigland WQ_UNBOUND | WQ_MEM_RECLAIM, 1); 1602b9d41052SNamhyung Kim if (!recv_workqueue) { 1603b9d41052SNamhyung Kim log_print("can't start dlm_recv"); 1604b9d41052SNamhyung Kim return -ENOMEM; 16056ed7257bSPatrick Caulfield } 16066ed7257bSPatrick Caulfield 1607e43f055aSDavid Teigland send_workqueue = alloc_workqueue("dlm_send", 1608e43f055aSDavid Teigland WQ_UNBOUND | WQ_MEM_RECLAIM, 1); 1609b9d41052SNamhyung Kim if (!send_workqueue) { 1610b9d41052SNamhyung Kim log_print("can't start dlm_send"); 16116ed7257bSPatrick Caulfield destroy_workqueue(recv_workqueue); 1612b9d41052SNamhyung Kim return -ENOMEM; 16136ed7257bSPatrick Caulfield } 16146ed7257bSPatrick Caulfield 16156ed7257bSPatrick Caulfield return 0; 16166ed7257bSPatrick Caulfield } 16176ed7257bSPatrick Caulfield 1618f0fb83cbStsutomu.owa@toshiba.co.jp static void _stop_conn(struct connection *con, bool and_other) 16196ed7257bSPatrick Caulfield { 1620f0fb83cbStsutomu.owa@toshiba.co.jp mutex_lock(&con->sock_mutex); 1621173a31feStsutomu.owa@toshiba.co.jp set_bit(CF_CLOSE, &con->flags); 1622f0fb83cbStsutomu.owa@toshiba.co.jp set_bit(CF_READ_PENDING, &con->flags); 16238a4abb08Stsutomu.owa@toshiba.co.jp set_bit(CF_WRITE_PENDING, &con->flags); 162493eaadebStsutomu.owa@toshiba.co.jp if (con->sock && con->sock->sk) { 162593eaadebStsutomu.owa@toshiba.co.jp write_lock_bh(&con->sock->sk->sk_callback_lock); 1626afb853fbSPatrick Caulfield con->sock->sk->sk_user_data = NULL; 162793eaadebStsutomu.owa@toshiba.co.jp write_unlock_bh(&con->sock->sk->sk_callback_lock); 162893eaadebStsutomu.owa@toshiba.co.jp } 1629f0fb83cbStsutomu.owa@toshiba.co.jp if (con->othercon && and_other) 1630f0fb83cbStsutomu.owa@toshiba.co.jp _stop_conn(con->othercon, false); 1631f0fb83cbStsutomu.owa@toshiba.co.jp mutex_unlock(&con->sock_mutex); 1632f0fb83cbStsutomu.owa@toshiba.co.jp } 1633f0fb83cbStsutomu.owa@toshiba.co.jp 1634f0fb83cbStsutomu.owa@toshiba.co.jp static void stop_conn(struct connection *con) 1635f0fb83cbStsutomu.owa@toshiba.co.jp { 1636f0fb83cbStsutomu.owa@toshiba.co.jp _stop_conn(con, true); 1637afb853fbSPatrick Caulfield } 16385e9ccc37SChristine Caulfield 1639055923bfSAlexander Aring static void shutdown_conn(struct connection *con) 1640055923bfSAlexander Aring { 1641055923bfSAlexander Aring if (con->shutdown_action) 1642055923bfSAlexander Aring con->shutdown_action(con); 1643055923bfSAlexander Aring } 1644055923bfSAlexander Aring 16454798cbbfSAlexander Aring static void connection_release(struct rcu_head *rcu) 16464798cbbfSAlexander Aring { 16474798cbbfSAlexander Aring struct connection *con = container_of(rcu, struct connection, rcu); 16484798cbbfSAlexander Aring 16494798cbbfSAlexander Aring kfree(con->rx_buf); 16504798cbbfSAlexander Aring kfree(con); 16514798cbbfSAlexander Aring } 16524798cbbfSAlexander Aring 16535e9ccc37SChristine Caulfield static void free_conn(struct connection *con) 16545e9ccc37SChristine Caulfield { 16550d737a8cSMarcelo Ricardo Leitner close_connection(con, true, true, true); 1656a47666ebSAlexander Aring spin_lock(&connections_lock); 1657a47666ebSAlexander Aring hlist_del_rcu(&con->list); 1658a47666ebSAlexander Aring spin_unlock(&connections_lock); 1659948c47e9SAlexander Aring if (con->othercon) { 1660948c47e9SAlexander Aring clean_one_writequeue(con->othercon); 16615cbec208SAlexander Aring call_srcu(&connections_srcu, &con->othercon->rcu, 16625cbec208SAlexander Aring connection_release); 1663948c47e9SAlexander Aring } 16640de98432SAlexander Aring clean_one_writequeue(con); 16655cbec208SAlexander Aring call_srcu(&connections_srcu, &con->rcu, connection_release); 16666ed7257bSPatrick Caulfield } 16675e9ccc37SChristine Caulfield 1668f0fb83cbStsutomu.owa@toshiba.co.jp static void work_flush(void) 1669f0fb83cbStsutomu.owa@toshiba.co.jp { 1670a47666ebSAlexander Aring int ok, idx; 1671f0fb83cbStsutomu.owa@toshiba.co.jp int i; 1672f0fb83cbStsutomu.owa@toshiba.co.jp struct connection *con; 1673f0fb83cbStsutomu.owa@toshiba.co.jp 1674f0fb83cbStsutomu.owa@toshiba.co.jp do { 1675f0fb83cbStsutomu.owa@toshiba.co.jp ok = 1; 1676f0fb83cbStsutomu.owa@toshiba.co.jp foreach_conn(stop_conn); 1677b355516fSDavid Windsor if (recv_workqueue) 1678f0fb83cbStsutomu.owa@toshiba.co.jp flush_workqueue(recv_workqueue); 1679b355516fSDavid Windsor if (send_workqueue) 1680f0fb83cbStsutomu.owa@toshiba.co.jp flush_workqueue(send_workqueue); 1681a47666ebSAlexander Aring idx = srcu_read_lock(&connections_srcu); 1682f0fb83cbStsutomu.owa@toshiba.co.jp for (i = 0; i < CONN_HASH_SIZE && ok; i++) { 1683a47666ebSAlexander Aring hlist_for_each_entry_rcu(con, &connection_hash[i], 1684a47666ebSAlexander Aring list) { 1685f0fb83cbStsutomu.owa@toshiba.co.jp ok &= test_bit(CF_READ_PENDING, &con->flags); 16868a4abb08Stsutomu.owa@toshiba.co.jp ok &= test_bit(CF_WRITE_PENDING, &con->flags); 16878a4abb08Stsutomu.owa@toshiba.co.jp if (con->othercon) { 1688f0fb83cbStsutomu.owa@toshiba.co.jp ok &= test_bit(CF_READ_PENDING, 1689f0fb83cbStsutomu.owa@toshiba.co.jp &con->othercon->flags); 16908a4abb08Stsutomu.owa@toshiba.co.jp ok &= test_bit(CF_WRITE_PENDING, 16918a4abb08Stsutomu.owa@toshiba.co.jp &con->othercon->flags); 16928a4abb08Stsutomu.owa@toshiba.co.jp } 1693f0fb83cbStsutomu.owa@toshiba.co.jp } 1694f0fb83cbStsutomu.owa@toshiba.co.jp } 1695a47666ebSAlexander Aring srcu_read_unlock(&connections_srcu, idx); 1696f0fb83cbStsutomu.owa@toshiba.co.jp } while (!ok); 1697f0fb83cbStsutomu.owa@toshiba.co.jp } 1698f0fb83cbStsutomu.owa@toshiba.co.jp 16995e9ccc37SChristine Caulfield void dlm_lowcomms_stop(void) 17005e9ccc37SChristine Caulfield { 17015e9ccc37SChristine Caulfield /* Set all the flags to prevent any 17025e9ccc37SChristine Caulfield socket activity. 17035e9ccc37SChristine Caulfield */ 1704513ef596SDavid Teigland dlm_allow_conn = 0; 1705aa7ab1e2SAlexander Aring 1706aa7ab1e2SAlexander Aring if (recv_workqueue) 1707aa7ab1e2SAlexander Aring flush_workqueue(recv_workqueue); 1708aa7ab1e2SAlexander Aring if (send_workqueue) 1709aa7ab1e2SAlexander Aring flush_workqueue(send_workqueue); 1710aa7ab1e2SAlexander Aring 1711d11ccd45SAlexander Aring dlm_close_sock(&listen_con.sock); 1712d11ccd45SAlexander Aring 1713055923bfSAlexander Aring foreach_conn(shutdown_conn); 1714f0fb83cbStsutomu.owa@toshiba.co.jp work_flush(); 17153a8db798SMarcelo Ricardo Leitner foreach_conn(free_conn); 17166ed7257bSPatrick Caulfield work_stop(); 1717043697f0SAlexander Aring deinit_local(); 17186ed7257bSPatrick Caulfield } 17196ed7257bSPatrick Caulfield 17206ed7257bSPatrick Caulfield int dlm_lowcomms_start(void) 17216ed7257bSPatrick Caulfield { 17226ed7257bSPatrick Caulfield int error = -EINVAL; 17235e9ccc37SChristine Caulfield int i; 17245e9ccc37SChristine Caulfield 17255e9ccc37SChristine Caulfield for (i = 0; i < CONN_HASH_SIZE; i++) 17265e9ccc37SChristine Caulfield INIT_HLIST_HEAD(&connection_hash[i]); 17276ed7257bSPatrick Caulfield 17286ed7257bSPatrick Caulfield init_local(); 17296ed7257bSPatrick Caulfield if (!dlm_local_count) { 1730617e82e1SDavid Teigland error = -ENOTCONN; 17316ed7257bSPatrick Caulfield log_print("no local IP address has been set"); 1732513ef596SDavid Teigland goto fail; 17336ed7257bSPatrick Caulfield } 17346ed7257bSPatrick Caulfield 1735d11ccd45SAlexander Aring INIT_WORK(&listen_con.rwork, process_listen_recv_socket); 1736d11ccd45SAlexander Aring 1737513ef596SDavid Teigland error = work_start(); 1738513ef596SDavid Teigland if (error) 1739a47666ebSAlexander Aring goto fail; 1740513ef596SDavid Teigland 1741513ef596SDavid Teigland dlm_allow_conn = 1; 17426ed7257bSPatrick Caulfield 17436ed7257bSPatrick Caulfield /* Start listening */ 17446ed7257bSPatrick Caulfield if (dlm_config.ci_protocol == 0) 17456ed7257bSPatrick Caulfield error = tcp_listen_for_all(); 17466ed7257bSPatrick Caulfield else 1747d11ccd45SAlexander Aring error = sctp_listen_for_all(&listen_con); 17486ed7257bSPatrick Caulfield if (error) 17496ed7257bSPatrick Caulfield goto fail_unlisten; 17506ed7257bSPatrick Caulfield 17516ed7257bSPatrick Caulfield return 0; 17526ed7257bSPatrick Caulfield 17536ed7257bSPatrick Caulfield fail_unlisten: 1754513ef596SDavid Teigland dlm_allow_conn = 0; 1755d11ccd45SAlexander Aring dlm_close_sock(&listen_con.sock); 1756513ef596SDavid Teigland fail: 17576ed7257bSPatrick Caulfield return error; 17586ed7257bSPatrick Caulfield } 175936b71a8bSDavid Teigland 176036b71a8bSDavid Teigland void dlm_lowcomms_exit(void) 176136b71a8bSDavid Teigland { 176236b71a8bSDavid Teigland struct dlm_node_addr *na, *safe; 176336b71a8bSDavid Teigland 176436b71a8bSDavid Teigland spin_lock(&dlm_node_addrs_spin); 176536b71a8bSDavid Teigland list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { 176636b71a8bSDavid Teigland list_del(&na->list); 176736b71a8bSDavid Teigland while (na->addr_count--) 176836b71a8bSDavid Teigland kfree(na->addr[na->addr_count]); 176936b71a8bSDavid Teigland kfree(na); 177036b71a8bSDavid Teigland } 177136b71a8bSDavid Teigland spin_unlock(&dlm_node_addrs_spin); 177236b71a8bSDavid Teigland } 1773