1328970deSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 26714d8e8SKurt Hackel /* -*- mode: c; c-basic-offset: 8; -*- 36714d8e8SKurt Hackel * vim: noexpandtab sw=8 ts=8 sts=0: 46714d8e8SKurt Hackel * 56714d8e8SKurt Hackel * dlmdomain.c 66714d8e8SKurt Hackel * 76714d8e8SKurt Hackel * defines domain join / leave apis 86714d8e8SKurt Hackel * 96714d8e8SKurt Hackel * Copyright (C) 2004 Oracle. All rights reserved. 106714d8e8SKurt Hackel */ 116714d8e8SKurt Hackel 126714d8e8SKurt Hackel #include <linux/module.h> 136714d8e8SKurt Hackel #include <linux/types.h> 146714d8e8SKurt Hackel #include <linux/slab.h> 156714d8e8SKurt Hackel #include <linux/highmem.h> 166714d8e8SKurt Hackel #include <linux/init.h> 176714d8e8SKurt Hackel #include <linux/spinlock.h> 186714d8e8SKurt Hackel #include <linux/delay.h> 196714d8e8SKurt Hackel #include <linux/err.h> 206325b4a2SSunil Mushran #include <linux/debugfs.h> 21174cd4b1SIngo Molnar #include <linux/sched/signal.h> 226714d8e8SKurt Hackel 236714d8e8SKurt Hackel #include "cluster/heartbeat.h" 246714d8e8SKurt Hackel #include "cluster/nodemanager.h" 256714d8e8SKurt Hackel #include "cluster/tcp.h" 266714d8e8SKurt Hackel 276714d8e8SKurt Hackel #include "dlmapi.h" 286714d8e8SKurt Hackel #include "dlmcommon.h" 296714d8e8SKurt Hackel #include "dlmdomain.h" 306325b4a2SSunil Mushran #include "dlmdebug.h" 316714d8e8SKurt Hackel 326714d8e8SKurt Hackel #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 336714d8e8SKurt Hackel #include "cluster/masklog.h" 346714d8e8SKurt Hackel 351faf2894SSrinivas Eeda /* 361faf2894SSrinivas Eeda * ocfs2 node maps are array of long int, which limits to send them freely 371faf2894SSrinivas Eeda * across the wire due to endianness issues. To workaround this, we convert 381faf2894SSrinivas Eeda * long ints to byte arrays. Following 3 routines are helper functions to 391faf2894SSrinivas Eeda * set/test/copy bits within those array of bytes 401faf2894SSrinivas Eeda */ 411faf2894SSrinivas Eeda static inline void byte_set_bit(u8 nr, u8 map[]) 421faf2894SSrinivas Eeda { 431faf2894SSrinivas Eeda map[nr >> 3] |= (1UL << (nr & 7)); 441faf2894SSrinivas Eeda } 451faf2894SSrinivas Eeda 461faf2894SSrinivas Eeda static inline int byte_test_bit(u8 nr, u8 map[]) 471faf2894SSrinivas Eeda { 481faf2894SSrinivas Eeda return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; 491faf2894SSrinivas Eeda } 501faf2894SSrinivas Eeda 511faf2894SSrinivas Eeda static inline void byte_copymap(u8 dmap[], unsigned long smap[], 521faf2894SSrinivas Eeda unsigned int sz) 531faf2894SSrinivas Eeda { 541faf2894SSrinivas Eeda unsigned int nn; 551faf2894SSrinivas Eeda 561faf2894SSrinivas Eeda if (!sz) 571faf2894SSrinivas Eeda return; 581faf2894SSrinivas Eeda 591faf2894SSrinivas Eeda memset(dmap, 0, ((sz + 7) >> 3)); 601faf2894SSrinivas Eeda for (nn = 0 ; nn < sz; nn++) 611faf2894SSrinivas Eeda if (test_bit(nn, smap)) 621faf2894SSrinivas Eeda byte_set_bit(nn, dmap); 631faf2894SSrinivas Eeda } 641faf2894SSrinivas Eeda 6503d864c0SDaniel Phillips static void dlm_free_pagevec(void **vec, int pages) 6603d864c0SDaniel Phillips { 6703d864c0SDaniel Phillips while (pages--) 6803d864c0SDaniel Phillips free_page((unsigned long)vec[pages]); 6903d864c0SDaniel Phillips kfree(vec); 7003d864c0SDaniel Phillips } 7103d864c0SDaniel Phillips 7203d864c0SDaniel Phillips static void **dlm_alloc_pagevec(int pages) 7303d864c0SDaniel Phillips { 746da2ec56SKees Cook void **vec = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); 7503d864c0SDaniel Phillips int i; 7603d864c0SDaniel Phillips 7703d864c0SDaniel Phillips if (!vec) 7803d864c0SDaniel Phillips return NULL; 7903d864c0SDaniel Phillips 8003d864c0SDaniel Phillips for (i = 0; i < pages; i++) 8103d864c0SDaniel Phillips if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) 8203d864c0SDaniel Phillips goto out_free; 83c8f33b6eSJoel Becker 84685f1adbSMark Fasheh mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n", 85f5a923d1SMark Fasheh pages, (unsigned long)DLM_HASH_PAGES, 86f5a923d1SMark Fasheh (unsigned long)DLM_BUCKETS_PER_PAGE); 8703d864c0SDaniel Phillips return vec; 8803d864c0SDaniel Phillips out_free: 8903d864c0SDaniel Phillips dlm_free_pagevec(vec, i); 9003d864c0SDaniel Phillips return NULL; 9103d864c0SDaniel Phillips } 9203d864c0SDaniel Phillips 936714d8e8SKurt Hackel /* 946714d8e8SKurt Hackel * 956714d8e8SKurt Hackel * spinlock lock ordering: if multiple locks are needed, obey this ordering: 966714d8e8SKurt Hackel * dlm_domain_lock 976714d8e8SKurt Hackel * struct dlm_ctxt->spinlock 986714d8e8SKurt Hackel * struct dlm_lock_resource->spinlock 996714d8e8SKurt Hackel * struct dlm_ctxt->master_lock 1006714d8e8SKurt Hackel * struct dlm_ctxt->ast_lock 1016714d8e8SKurt Hackel * dlm_master_list_entry->spinlock 1026714d8e8SKurt Hackel * dlm_lock->spinlock 1036714d8e8SKurt Hackel * 1046714d8e8SKurt Hackel */ 1056714d8e8SKurt Hackel 10634af946aSIngo Molnar DEFINE_SPINLOCK(dlm_domain_lock); 1076714d8e8SKurt Hackel LIST_HEAD(dlm_domains); 1086714d8e8SKurt Hackel static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); 1096714d8e8SKurt Hackel 110d24fbcdaSJoel Becker /* 111d24fbcdaSJoel Becker * The supported protocol version for DLM communication. Running domains 112d24fbcdaSJoel Becker * will have a negotiated version with the same major number and a minor 113d24fbcdaSJoel Becker * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should 114d24fbcdaSJoel Becker * be used to determine what a running domain is actually using. 115ea203441SSunil Mushran * 116ea203441SSunil Mushran * New in version 1.1: 117ea203441SSunil Mushran * - Message DLM_QUERY_REGION added to support global heartbeat 11818cfdf1bSSunil Mushran * - Message DLM_QUERY_NODEINFO added to allow online node removes 119bddefdeeSSunil Mushran * New in version 1.2: 120bddefdeeSSunil Mushran * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain 12160d663cbSxuejiufei * New in version 1.3: 12260d663cbSxuejiufei * - Message DLM_DEREF_LOCKRES_DONE added to inform non-master that the 12360d663cbSxuejiufei * refmap is cleared 124d24fbcdaSJoel Becker */ 125d24fbcdaSJoel Becker static const struct dlm_protocol_version dlm_protocol = { 126d24fbcdaSJoel Becker .pv_major = 1, 12760d663cbSxuejiufei .pv_minor = 3, 128d24fbcdaSJoel Becker }; 129d24fbcdaSJoel Becker 1306714d8e8SKurt Hackel #define DLM_DOMAIN_BACKOFF_MS 200 1316714d8e8SKurt Hackel 132d74c9803SKurt Hackel static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, 133d74c9803SKurt Hackel void **ret_data); 134d74c9803SKurt Hackel static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, 135d74c9803SKurt Hackel void **ret_data); 136d74c9803SKurt Hackel static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 137d74c9803SKurt Hackel void **ret_data); 138ea203441SSunil Mushran static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, 139ea203441SSunil Mushran void *data, void **ret_data); 140d74c9803SKurt Hackel static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 141d74c9803SKurt Hackel void **ret_data); 142d24fbcdaSJoel Becker static int dlm_protocol_compare(struct dlm_protocol_version *existing, 143d24fbcdaSJoel Becker struct dlm_protocol_version *request); 1446714d8e8SKurt Hackel 1456714d8e8SKurt Hackel static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); 1466714d8e8SKurt Hackel 147e9f0b6a6SSunil Mushran void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 1486714d8e8SKurt Hackel { 149e9f0b6a6SSunil Mushran if (hlist_unhashed(&res->hash_node)) 150e9f0b6a6SSunil Mushran return; 151e9f0b6a6SSunil Mushran 152e9f0b6a6SSunil Mushran mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len, 153e9f0b6a6SSunil Mushran res->lockname.name); 154e9f0b6a6SSunil Mushran hlist_del_init(&res->hash_node); 155e9f0b6a6SSunil Mushran dlm_lockres_put(res); 15678062cb2SSunil Mushran } 1576714d8e8SKurt Hackel 158e9f0b6a6SSunil Mushran void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 1596714d8e8SKurt Hackel { 16081f2094aSMark Fasheh struct hlist_head *bucket; 1616714d8e8SKurt Hackel 1626714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 1636714d8e8SKurt Hackel 164612645f7SAl Viro bucket = dlm_lockres_hash(dlm, res->lockname.hash); 1656714d8e8SKurt Hackel 1666714d8e8SKurt Hackel /* get a reference for our hashtable */ 1676714d8e8SKurt Hackel dlm_lockres_get(res); 1686714d8e8SKurt Hackel 16981f2094aSMark Fasheh hlist_add_head(&res->hash_node, bucket); 170e9f0b6a6SSunil Mushran 171e9f0b6a6SSunil Mushran mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len, 172e9f0b6a6SSunil Mushran res->lockname.name); 1736714d8e8SKurt Hackel } 1746714d8e8SKurt Hackel 175ba2bf218SKurt Hackel struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, 1766714d8e8SKurt Hackel const char *name, 177a3d33291SMark Fasheh unsigned int len, 178a3d33291SMark Fasheh unsigned int hash) 1796714d8e8SKurt Hackel { 18081f2094aSMark Fasheh struct hlist_head *bucket; 181df53cd3bSDong Fang struct dlm_lock_resource *res; 1826714d8e8SKurt Hackel 183ef6b689bSTao Ma mlog(0, "%.*s\n", len, name); 1846714d8e8SKurt Hackel 1856714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 1866714d8e8SKurt Hackel 18703d864c0SDaniel Phillips bucket = dlm_lockres_hash(dlm, hash); 18803d864c0SDaniel Phillips 189df53cd3bSDong Fang hlist_for_each_entry(res, bucket, hash_node) { 1904198985fSDaniel Phillips if (res->lockname.name[0] != name[0]) 1914198985fSDaniel Phillips continue; 1924198985fSDaniel Phillips if (unlikely(res->lockname.len != len)) 1934198985fSDaniel Phillips continue; 1944198985fSDaniel Phillips if (memcmp(res->lockname.name + 1, name + 1, len - 1)) 1954198985fSDaniel Phillips continue; 1964198985fSDaniel Phillips dlm_lockres_get(res); 1974198985fSDaniel Phillips return res; 1986714d8e8SKurt Hackel } 1994198985fSDaniel Phillips return NULL; 2006714d8e8SKurt Hackel } 2016714d8e8SKurt Hackel 202ba2bf218SKurt Hackel /* intended to be called by functions which do not care about lock 203ba2bf218SKurt Hackel * resources which are being purged (most net _handler functions). 204ba2bf218SKurt Hackel * this will return NULL for any lock resource which is found but 205ba2bf218SKurt Hackel * currently in the process of dropping its mastery reference. 206ba2bf218SKurt Hackel * use __dlm_lookup_lockres_full when you need the lock resource 207ba2bf218SKurt Hackel * regardless (e.g. dlm_get_lock_resource) */ 208ba2bf218SKurt Hackel struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 209ba2bf218SKurt Hackel const char *name, 210ba2bf218SKurt Hackel unsigned int len, 211ba2bf218SKurt Hackel unsigned int hash) 212ba2bf218SKurt Hackel { 213ba2bf218SKurt Hackel struct dlm_lock_resource *res = NULL; 214ba2bf218SKurt Hackel 215ef6b689bSTao Ma mlog(0, "%.*s\n", len, name); 216ba2bf218SKurt Hackel 217ba2bf218SKurt Hackel assert_spin_locked(&dlm->spinlock); 218ba2bf218SKurt Hackel 219ba2bf218SKurt Hackel res = __dlm_lookup_lockres_full(dlm, name, len, hash); 220ba2bf218SKurt Hackel if (res) { 221ba2bf218SKurt Hackel spin_lock(&res->spinlock); 222ba2bf218SKurt Hackel if (res->state & DLM_LOCK_RES_DROPPING_REF) { 223ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 224ba2bf218SKurt Hackel dlm_lockres_put(res); 225ba2bf218SKurt Hackel return NULL; 226ba2bf218SKurt Hackel } 227ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 228ba2bf218SKurt Hackel } 229ba2bf218SKurt Hackel 230ba2bf218SKurt Hackel return res; 231ba2bf218SKurt Hackel } 232ba2bf218SKurt Hackel 2336714d8e8SKurt Hackel struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 2346714d8e8SKurt Hackel const char *name, 2356714d8e8SKurt Hackel unsigned int len) 2366714d8e8SKurt Hackel { 2376714d8e8SKurt Hackel struct dlm_lock_resource *res; 238a3d33291SMark Fasheh unsigned int hash = dlm_lockid_hash(name, len); 2396714d8e8SKurt Hackel 2406714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 241a3d33291SMark Fasheh res = __dlm_lookup_lockres(dlm, name, len, hash); 2426714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 2436714d8e8SKurt Hackel return res; 2446714d8e8SKurt Hackel } 2456714d8e8SKurt Hackel 2466714d8e8SKurt Hackel static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) 2476714d8e8SKurt Hackel { 248df53cd3bSDong Fang struct dlm_ctxt *tmp; 2496714d8e8SKurt Hackel 2506714d8e8SKurt Hackel assert_spin_locked(&dlm_domain_lock); 2516714d8e8SKurt Hackel 2526714d8e8SKurt Hackel /* tmp->name here is always NULL terminated, 2536714d8e8SKurt Hackel * but domain may not be! */ 254df53cd3bSDong Fang list_for_each_entry(tmp, &dlm_domains, list) { 2556714d8e8SKurt Hackel if (strlen(tmp->name) == len && 2566714d8e8SKurt Hackel memcmp(tmp->name, domain, len)==0) 257df53cd3bSDong Fang return tmp; 2586714d8e8SKurt Hackel } 2596714d8e8SKurt Hackel 260df53cd3bSDong Fang return NULL; 2616714d8e8SKurt Hackel } 2626714d8e8SKurt Hackel 2636714d8e8SKurt Hackel /* For null terminated domain strings ONLY */ 2646714d8e8SKurt Hackel static struct dlm_ctxt * __dlm_lookup_domain(const char *domain) 2656714d8e8SKurt Hackel { 2666714d8e8SKurt Hackel assert_spin_locked(&dlm_domain_lock); 2676714d8e8SKurt Hackel 2686714d8e8SKurt Hackel return __dlm_lookup_domain_full(domain, strlen(domain)); 2696714d8e8SKurt Hackel } 2706714d8e8SKurt Hackel 2716714d8e8SKurt Hackel 2726714d8e8SKurt Hackel /* returns true on one of two conditions: 2736714d8e8SKurt Hackel * 1) the domain does not exist 2746714d8e8SKurt Hackel * 2) the domain exists and it's state is "joined" */ 2756714d8e8SKurt Hackel static int dlm_wait_on_domain_helper(const char *domain) 2766714d8e8SKurt Hackel { 2776714d8e8SKurt Hackel int ret = 0; 2786714d8e8SKurt Hackel struct dlm_ctxt *tmp = NULL; 2796714d8e8SKurt Hackel 2806714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 2816714d8e8SKurt Hackel 2826714d8e8SKurt Hackel tmp = __dlm_lookup_domain(domain); 2836714d8e8SKurt Hackel if (!tmp) 2846714d8e8SKurt Hackel ret = 1; 2856714d8e8SKurt Hackel else if (tmp->dlm_state == DLM_CTXT_JOINED) 2866714d8e8SKurt Hackel ret = 1; 2876714d8e8SKurt Hackel 2886714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 2896714d8e8SKurt Hackel return ret; 2906714d8e8SKurt Hackel } 2916714d8e8SKurt Hackel 2926714d8e8SKurt Hackel static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) 2936714d8e8SKurt Hackel { 2946325b4a2SSunil Mushran dlm_destroy_debugfs_subroot(dlm); 2956325b4a2SSunil Mushran 29681f2094aSMark Fasheh if (dlm->lockres_hash) 29703d864c0SDaniel Phillips dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); 2986714d8e8SKurt Hackel 299e2b66ddcSSunil Mushran if (dlm->master_hash) 300e2b66ddcSSunil Mushran dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); 301e2b66ddcSSunil Mushran 3026714d8e8SKurt Hackel kfree(dlm->name); 3036714d8e8SKurt Hackel kfree(dlm); 3046714d8e8SKurt Hackel } 3056714d8e8SKurt Hackel 3066714d8e8SKurt Hackel /* A little strange - this function will be called while holding 3076714d8e8SKurt Hackel * dlm_domain_lock and is expected to be holding it on the way out. We 3086714d8e8SKurt Hackel * will however drop and reacquire it multiple times */ 3096714d8e8SKurt Hackel static void dlm_ctxt_release(struct kref *kref) 3106714d8e8SKurt Hackel { 3116714d8e8SKurt Hackel struct dlm_ctxt *dlm; 3126714d8e8SKurt Hackel 3136714d8e8SKurt Hackel dlm = container_of(kref, struct dlm_ctxt, dlm_refs); 3146714d8e8SKurt Hackel 3156714d8e8SKurt Hackel BUG_ON(dlm->num_joins); 3166714d8e8SKurt Hackel BUG_ON(dlm->dlm_state == DLM_CTXT_JOINED); 3176714d8e8SKurt Hackel 3186714d8e8SKurt Hackel /* we may still be in the list if we hit an error during join. */ 3196714d8e8SKurt Hackel list_del_init(&dlm->list); 3206714d8e8SKurt Hackel 3216714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3226714d8e8SKurt Hackel 3236714d8e8SKurt Hackel mlog(0, "freeing memory from domain %s\n", dlm->name); 3246714d8e8SKurt Hackel 3256714d8e8SKurt Hackel wake_up(&dlm_domain_events); 3266714d8e8SKurt Hackel 3276714d8e8SKurt Hackel dlm_free_ctxt_mem(dlm); 3286714d8e8SKurt Hackel 3296714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3306714d8e8SKurt Hackel } 3316714d8e8SKurt Hackel 3326714d8e8SKurt Hackel void dlm_put(struct dlm_ctxt *dlm) 3336714d8e8SKurt Hackel { 3346714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3356714d8e8SKurt Hackel kref_put(&dlm->dlm_refs, dlm_ctxt_release); 3366714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3376714d8e8SKurt Hackel } 3386714d8e8SKurt Hackel 3396714d8e8SKurt Hackel static void __dlm_get(struct dlm_ctxt *dlm) 3406714d8e8SKurt Hackel { 3416714d8e8SKurt Hackel kref_get(&dlm->dlm_refs); 3426714d8e8SKurt Hackel } 3436714d8e8SKurt Hackel 3446714d8e8SKurt Hackel /* given a questionable reference to a dlm object, gets a reference if 3456714d8e8SKurt Hackel * it can find it in the list, otherwise returns NULL in which case 3466714d8e8SKurt Hackel * you shouldn't trust your pointer. */ 3476714d8e8SKurt Hackel struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) 3486714d8e8SKurt Hackel { 349df53cd3bSDong Fang struct dlm_ctxt *target; 350df53cd3bSDong Fang struct dlm_ctxt *ret = NULL; 3516714d8e8SKurt Hackel 3526714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3536714d8e8SKurt Hackel 354df53cd3bSDong Fang list_for_each_entry(target, &dlm_domains, list) { 3556714d8e8SKurt Hackel if (target == dlm) { 3566714d8e8SKurt Hackel __dlm_get(target); 357df53cd3bSDong Fang ret = target; 3586714d8e8SKurt Hackel break; 3596714d8e8SKurt Hackel } 3606714d8e8SKurt Hackel } 3616714d8e8SKurt Hackel 3626714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3636714d8e8SKurt Hackel 364df53cd3bSDong Fang return ret; 3656714d8e8SKurt Hackel } 3666714d8e8SKurt Hackel 3676714d8e8SKurt Hackel int dlm_domain_fully_joined(struct dlm_ctxt *dlm) 3686714d8e8SKurt Hackel { 3696714d8e8SKurt Hackel int ret; 3706714d8e8SKurt Hackel 3716714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3726714d8e8SKurt Hackel ret = (dlm->dlm_state == DLM_CTXT_JOINED) || 3736714d8e8SKurt Hackel (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN); 3746714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3756714d8e8SKurt Hackel 3766714d8e8SKurt Hackel return ret; 3776714d8e8SKurt Hackel } 3786714d8e8SKurt Hackel 3793156d267SKurt Hackel static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) 3803156d267SKurt Hackel { 3813156d267SKurt Hackel if (dlm->dlm_worker) { 3823156d267SKurt Hackel destroy_workqueue(dlm->dlm_worker); 3833156d267SKurt Hackel dlm->dlm_worker = NULL; 3843156d267SKurt Hackel } 3853156d267SKurt Hackel } 3863156d267SKurt Hackel 3876714d8e8SKurt Hackel static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) 3886714d8e8SKurt Hackel { 3896714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 390007dce53SSunil Mushran dlm_debug_shutdown(dlm); 3916714d8e8SKurt Hackel dlm_complete_thread(dlm); 3926714d8e8SKurt Hackel dlm_complete_recovery_thread(dlm); 3933156d267SKurt Hackel dlm_destroy_dlm_worker(dlm); 3946714d8e8SKurt Hackel 3956714d8e8SKurt Hackel /* We've left the domain. Now we can take ourselves out of the 3966714d8e8SKurt Hackel * list and allow the kref stuff to help us free the 3976714d8e8SKurt Hackel * memory. */ 3986714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3996714d8e8SKurt Hackel list_del_init(&dlm->list); 4006714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 4016714d8e8SKurt Hackel 4026714d8e8SKurt Hackel /* Wake up anyone waiting for us to remove this domain */ 4036714d8e8SKurt Hackel wake_up(&dlm_domain_events); 4046714d8e8SKurt Hackel } 4056714d8e8SKurt Hackel 406ba2bf218SKurt Hackel static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) 4076714d8e8SKurt Hackel { 408ba2bf218SKurt Hackel int i, num, n, ret = 0; 4096714d8e8SKurt Hackel struct dlm_lock_resource *res; 410ba2bf218SKurt Hackel struct hlist_node *iter; 411ba2bf218SKurt Hackel struct hlist_head *bucket; 412ba2bf218SKurt Hackel int dropped; 4136714d8e8SKurt Hackel 4146714d8e8SKurt Hackel mlog(0, "Migrating locks from domain %s\n", dlm->name); 415ba2bf218SKurt Hackel 416ba2bf218SKurt Hackel num = 0; 4176714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 41881f2094aSMark Fasheh for (i = 0; i < DLM_HASH_BUCKETS; i++) { 419ba2bf218SKurt Hackel redo_bucket: 420ba2bf218SKurt Hackel n = 0; 421ba2bf218SKurt Hackel bucket = dlm_lockres_hash(dlm, i); 422ba2bf218SKurt Hackel iter = bucket->first; 423ba2bf218SKurt Hackel while (iter) { 424ba2bf218SKurt Hackel n++; 425ba2bf218SKurt Hackel res = hlist_entry(iter, struct dlm_lock_resource, 426ba2bf218SKurt Hackel hash_node); 4276714d8e8SKurt Hackel dlm_lockres_get(res); 428ba2bf218SKurt Hackel /* migrate, if necessary. this will drop the dlm 429ba2bf218SKurt Hackel * spinlock and retake it if it does migration. */ 430ba2bf218SKurt Hackel dropped = dlm_empty_lockres(dlm, res); 4316714d8e8SKurt Hackel 432ba2bf218SKurt Hackel spin_lock(&res->spinlock); 43366effd3cSSunil Mushran if (dropped) 434ba2bf218SKurt Hackel __dlm_lockres_calc_usage(dlm, res); 43566effd3cSSunil Mushran else 436ba2bf218SKurt Hackel iter = res->hash_node.next; 437ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 438ba2bf218SKurt Hackel 4396714d8e8SKurt Hackel dlm_lockres_put(res); 440ba2bf218SKurt Hackel 44166effd3cSSunil Mushran if (dropped) { 44266effd3cSSunil Mushran cond_resched_lock(&dlm->spinlock); 443ba2bf218SKurt Hackel goto redo_bucket; 4446714d8e8SKurt Hackel } 44566effd3cSSunil Mushran } 4460d01af6eSSunil Mushran cond_resched_lock(&dlm->spinlock); 447ba2bf218SKurt Hackel num += n; 4486714d8e8SKurt Hackel } 44960c7ec9eSpiaojun 45060c7ec9eSpiaojun if (!num) { 45160c7ec9eSpiaojun if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 45260c7ec9eSpiaojun mlog(0, "%s: perhaps there are more lock resources " 45360c7ec9eSpiaojun "need to be migrated after dlm recovery\n", dlm->name); 45460c7ec9eSpiaojun ret = -EAGAIN; 45560c7ec9eSpiaojun } else { 45660c7ec9eSpiaojun mlog(0, "%s: we won't do dlm recovery after migrating " 45760c7ec9eSpiaojun "all lock resources\n", dlm->name); 45860c7ec9eSpiaojun dlm->migrate_done = 1; 45960c7ec9eSpiaojun } 46060c7ec9eSpiaojun } 46160c7ec9eSpiaojun 4626714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 463ba2bf218SKurt Hackel wake_up(&dlm->dlm_thread_wq); 4646714d8e8SKurt Hackel 465ba2bf218SKurt Hackel /* let the dlm thread take care of purging, keep scanning until 466ba2bf218SKurt Hackel * nothing remains in the hash */ 467ba2bf218SKurt Hackel if (num) { 468ba2bf218SKurt Hackel mlog(0, "%s: %d lock resources in hash last pass\n", 469ba2bf218SKurt Hackel dlm->name, num); 470ba2bf218SKurt Hackel ret = -EAGAIN; 471ba2bf218SKurt Hackel } 4726714d8e8SKurt Hackel mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); 473ba2bf218SKurt Hackel return ret; 4746714d8e8SKurt Hackel } 4756714d8e8SKurt Hackel 4766714d8e8SKurt Hackel static int dlm_no_joining_node(struct dlm_ctxt *dlm) 4776714d8e8SKurt Hackel { 4786714d8e8SKurt Hackel int ret; 4796714d8e8SKurt Hackel 4806714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 4816714d8e8SKurt Hackel ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN; 4826714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4836714d8e8SKurt Hackel 4846714d8e8SKurt Hackel return ret; 4856714d8e8SKurt Hackel } 4866714d8e8SKurt Hackel 487bddefdeeSSunil Mushran static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, 488bddefdeeSSunil Mushran void *data, void **ret_data) 489bddefdeeSSunil Mushran { 490bddefdeeSSunil Mushran struct dlm_ctxt *dlm = data; 491bddefdeeSSunil Mushran unsigned int node; 492bddefdeeSSunil Mushran struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; 493bddefdeeSSunil Mushran 494bddefdeeSSunil Mushran if (!dlm_grab(dlm)) 495bddefdeeSSunil Mushran return 0; 496bddefdeeSSunil Mushran 497bddefdeeSSunil Mushran node = exit_msg->node_idx; 498bddefdeeSSunil Mushran mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); 499bddefdeeSSunil Mushran 500bddefdeeSSunil Mushran spin_lock(&dlm->spinlock); 501bddefdeeSSunil Mushran set_bit(node, dlm->exit_domain_map); 502bddefdeeSSunil Mushran spin_unlock(&dlm->spinlock); 503bddefdeeSSunil Mushran 504bddefdeeSSunil Mushran dlm_put(dlm); 505bddefdeeSSunil Mushran 506bddefdeeSSunil Mushran return 0; 507bddefdeeSSunil Mushran } 508bddefdeeSSunil Mushran 5096714d8e8SKurt Hackel static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) 5106714d8e8SKurt Hackel { 5116714d8e8SKurt Hackel /* Yikes, a double spinlock! I need domain_lock for the dlm 5126714d8e8SKurt Hackel * state and the dlm spinlock for join state... Sorry! */ 5136714d8e8SKurt Hackel again: 5146714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 5156714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 5166714d8e8SKurt Hackel 5176714d8e8SKurt Hackel if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 5186714d8e8SKurt Hackel mlog(0, "Node %d is joining, we wait on it.\n", 5196714d8e8SKurt Hackel dlm->joining_node); 5206714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 5216714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 5226714d8e8SKurt Hackel 5236714d8e8SKurt Hackel wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm)); 5246714d8e8SKurt Hackel goto again; 5256714d8e8SKurt Hackel } 5266714d8e8SKurt Hackel 5276714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_LEAVING; 5286714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 5296714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 5306714d8e8SKurt Hackel } 5316714d8e8SKurt Hackel 5326714d8e8SKurt Hackel static void __dlm_print_nodes(struct dlm_ctxt *dlm) 5336714d8e8SKurt Hackel { 5348decab3cSSunil Mushran int node = -1, num = 0; 5356714d8e8SKurt Hackel 5366714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 5376714d8e8SKurt Hackel 5388decab3cSSunil Mushran printk("( "); 5396714d8e8SKurt Hackel while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 5406714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 541781ee3e2SSunil Mushran printk("%d ", node); 5428decab3cSSunil Mushran ++num; 5436714d8e8SKurt Hackel } 5448decab3cSSunil Mushran printk(") %u nodes\n", num); 5456714d8e8SKurt Hackel } 5466714d8e8SKurt Hackel 547d74c9803SKurt Hackel static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 548d74c9803SKurt Hackel void **ret_data) 5496714d8e8SKurt Hackel { 5506714d8e8SKurt Hackel struct dlm_ctxt *dlm = data; 5516714d8e8SKurt Hackel unsigned int node; 5526714d8e8SKurt Hackel struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; 5536714d8e8SKurt Hackel 554ef6b689bSTao Ma mlog(0, "%p %u %p", msg, len, data); 5556714d8e8SKurt Hackel 5566714d8e8SKurt Hackel if (!dlm_grab(dlm)) 5576714d8e8SKurt Hackel return 0; 5586714d8e8SKurt Hackel 5596714d8e8SKurt Hackel node = exit_msg->node_idx; 5606714d8e8SKurt Hackel 5616714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 5626714d8e8SKurt Hackel clear_bit(node, dlm->domain_map); 563bddefdeeSSunil Mushran clear_bit(node, dlm->exit_domain_map); 5648decab3cSSunil Mushran printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name); 5656714d8e8SKurt Hackel __dlm_print_nodes(dlm); 5666714d8e8SKurt Hackel 5676714d8e8SKurt Hackel /* notify anything attached to the heartbeat events */ 5686714d8e8SKurt Hackel dlm_hb_event_notify_attached(dlm, node, 0); 5696714d8e8SKurt Hackel 5706714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 5716714d8e8SKurt Hackel 5726714d8e8SKurt Hackel dlm_put(dlm); 5736714d8e8SKurt Hackel 5746714d8e8SKurt Hackel return 0; 5756714d8e8SKurt Hackel } 5766714d8e8SKurt Hackel 577bddefdeeSSunil Mushran static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, 5786714d8e8SKurt Hackel unsigned int node) 5796714d8e8SKurt Hackel { 5806714d8e8SKurt Hackel int status; 5816714d8e8SKurt Hackel struct dlm_exit_domain leave_msg; 5826714d8e8SKurt Hackel 583bddefdeeSSunil Mushran mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, 584bddefdeeSSunil Mushran msg_type, node); 5856714d8e8SKurt Hackel 5866714d8e8SKurt Hackel memset(&leave_msg, 0, sizeof(leave_msg)); 5876714d8e8SKurt Hackel leave_msg.node_idx = dlm->node_num; 5886714d8e8SKurt Hackel 589bddefdeeSSunil Mushran status = o2net_send_message(msg_type, dlm->key, &leave_msg, 590bddefdeeSSunil Mushran sizeof(leave_msg), node, NULL); 591a5196ec5SWengang Wang if (status < 0) 592bddefdeeSSunil Mushran mlog(ML_ERROR, "Error %d sending domain exit message %u " 593bddefdeeSSunil Mushran "to node %u on domain %s\n", status, msg_type, node, 594bddefdeeSSunil Mushran dlm->name); 5956714d8e8SKurt Hackel 5966714d8e8SKurt Hackel return status; 5976714d8e8SKurt Hackel } 5986714d8e8SKurt Hackel 599bddefdeeSSunil Mushran static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) 600bddefdeeSSunil Mushran { 601bddefdeeSSunil Mushran int node = -1; 602bddefdeeSSunil Mushran 603bddefdeeSSunil Mushran /* Support for begin exit domain was added in 1.2 */ 604bddefdeeSSunil Mushran if (dlm->dlm_locking_proto.pv_major == 1 && 605bddefdeeSSunil Mushran dlm->dlm_locking_proto.pv_minor < 2) 606bddefdeeSSunil Mushran return; 607bddefdeeSSunil Mushran 608bddefdeeSSunil Mushran /* 609bddefdeeSSunil Mushran * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely 610bddefdeeSSunil Mushran * informational. Meaning if a node does not receive the message, 611bddefdeeSSunil Mushran * so be it. 612bddefdeeSSunil Mushran */ 613bddefdeeSSunil Mushran spin_lock(&dlm->spinlock); 614bddefdeeSSunil Mushran while (1) { 615bddefdeeSSunil Mushran node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); 616bddefdeeSSunil Mushran if (node >= O2NM_MAX_NODES) 617bddefdeeSSunil Mushran break; 618bddefdeeSSunil Mushran if (node == dlm->node_num) 619bddefdeeSSunil Mushran continue; 620bddefdeeSSunil Mushran 621bddefdeeSSunil Mushran spin_unlock(&dlm->spinlock); 622bddefdeeSSunil Mushran dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); 623bddefdeeSSunil Mushran spin_lock(&dlm->spinlock); 624bddefdeeSSunil Mushran } 625bddefdeeSSunil Mushran spin_unlock(&dlm->spinlock); 626bddefdeeSSunil Mushran } 6276714d8e8SKurt Hackel 6286714d8e8SKurt Hackel static void dlm_leave_domain(struct dlm_ctxt *dlm) 6296714d8e8SKurt Hackel { 6306714d8e8SKurt Hackel int node, clear_node, status; 6316714d8e8SKurt Hackel 6326714d8e8SKurt Hackel /* At this point we've migrated away all our locks and won't 6336714d8e8SKurt Hackel * accept mastership of new ones. The dlm is responsible for 6346714d8e8SKurt Hackel * almost nothing now. We make sure not to confuse any joining 6356714d8e8SKurt Hackel * nodes and then commence shutdown procedure. */ 6366714d8e8SKurt Hackel 6376714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 6386714d8e8SKurt Hackel /* Clear ourselves from the domain map */ 6396714d8e8SKurt Hackel clear_bit(dlm->node_num, dlm->domain_map); 6406714d8e8SKurt Hackel while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 6416714d8e8SKurt Hackel 0)) < O2NM_MAX_NODES) { 6426714d8e8SKurt Hackel /* Drop the dlm spinlock. This is safe wrt the domain_map. 6436714d8e8SKurt Hackel * -nodes cannot be added now as the 6446714d8e8SKurt Hackel * query_join_handlers knows to respond with OK_NO_MAP 6456714d8e8SKurt Hackel * -we catch the right network errors if a node is 6466714d8e8SKurt Hackel * removed from the map while we're sending him the 6476714d8e8SKurt Hackel * exit message. */ 6486714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 6496714d8e8SKurt Hackel 6506714d8e8SKurt Hackel clear_node = 1; 6516714d8e8SKurt Hackel 652bddefdeeSSunil Mushran status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, 653bddefdeeSSunil Mushran node); 6546714d8e8SKurt Hackel if (status < 0 && 6556714d8e8SKurt Hackel status != -ENOPROTOOPT && 6566714d8e8SKurt Hackel status != -ENOTCONN) { 6576714d8e8SKurt Hackel mlog(ML_NOTICE, "Error %d sending domain exit message " 6586714d8e8SKurt Hackel "to node %d\n", status, node); 6596714d8e8SKurt Hackel 6606714d8e8SKurt Hackel /* Not sure what to do here but lets sleep for 6616714d8e8SKurt Hackel * a bit in case this was a transient 6626714d8e8SKurt Hackel * error... */ 6636714d8e8SKurt Hackel msleep(DLM_DOMAIN_BACKOFF_MS); 6646714d8e8SKurt Hackel clear_node = 0; 6656714d8e8SKurt Hackel } 6666714d8e8SKurt Hackel 6676714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 6686714d8e8SKurt Hackel /* If we're not clearing the node bit then we intend 6696714d8e8SKurt Hackel * to loop back around to try again. */ 6706714d8e8SKurt Hackel if (clear_node) 6716714d8e8SKurt Hackel clear_bit(node, dlm->domain_map); 6726714d8e8SKurt Hackel } 6736714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 6746714d8e8SKurt Hackel } 6756714d8e8SKurt Hackel 6766714d8e8SKurt Hackel void dlm_unregister_domain(struct dlm_ctxt *dlm) 6776714d8e8SKurt Hackel { 6786714d8e8SKurt Hackel int leave = 0; 67929576f8bSSunil Mushran struct dlm_lock_resource *res; 6806714d8e8SKurt Hackel 6816714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 6826714d8e8SKurt Hackel BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); 6836714d8e8SKurt Hackel BUG_ON(!dlm->num_joins); 6846714d8e8SKurt Hackel 6856714d8e8SKurt Hackel dlm->num_joins--; 6866714d8e8SKurt Hackel if (!dlm->num_joins) { 6876714d8e8SKurt Hackel /* We mark it "in shutdown" now so new register 6886714d8e8SKurt Hackel * requests wait until we've completely left the 6896714d8e8SKurt Hackel * domain. Don't use DLM_CTXT_LEAVING yet as we still 6906714d8e8SKurt Hackel * want new domain joins to communicate with us at 6916714d8e8SKurt Hackel * least until we've completed migration of our 6926714d8e8SKurt Hackel * resources. */ 6936714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN; 6946714d8e8SKurt Hackel leave = 1; 6956714d8e8SKurt Hackel } 6966714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 6976714d8e8SKurt Hackel 6986714d8e8SKurt Hackel if (leave) { 6996714d8e8SKurt Hackel mlog(0, "shutting down domain %s\n", dlm->name); 700bddefdeeSSunil Mushran dlm_begin_exit_domain(dlm); 7016714d8e8SKurt Hackel 7026714d8e8SKurt Hackel /* We changed dlm state, notify the thread */ 7036714d8e8SKurt Hackel dlm_kick_thread(dlm, NULL); 7046714d8e8SKurt Hackel 705ba2bf218SKurt Hackel while (dlm_migrate_all_locks(dlm)) { 7062f5bf1f2SSunil Mushran /* Give dlm_thread time to purge the lockres' */ 7072f5bf1f2SSunil Mushran msleep(500); 708ba2bf218SKurt Hackel mlog(0, "%s: more migration to do\n", dlm->name); 709ba2bf218SKurt Hackel } 71029576f8bSSunil Mushran 71129576f8bSSunil Mushran /* This list should be empty. If not, print remaining lockres */ 71229576f8bSSunil Mushran if (!list_empty(&dlm->tracking_list)) { 71329576f8bSSunil Mushran mlog(ML_ERROR, "Following lockres' are still on the " 71429576f8bSSunil Mushran "tracking list:\n"); 71529576f8bSSunil Mushran list_for_each_entry(res, &dlm->tracking_list, tracking) 71629576f8bSSunil Mushran dlm_print_one_lock_resource(res); 71729576f8bSSunil Mushran } 71829576f8bSSunil Mushran 7196714d8e8SKurt Hackel dlm_mark_domain_leaving(dlm); 7206714d8e8SKurt Hackel dlm_leave_domain(dlm); 7218decab3cSSunil Mushran printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name); 7225dad6c39SSrinivas Eeda dlm_force_free_mles(dlm); 7236714d8e8SKurt Hackel dlm_complete_dlm_shutdown(dlm); 7246714d8e8SKurt Hackel } 7256714d8e8SKurt Hackel dlm_put(dlm); 7266714d8e8SKurt Hackel } 7276714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_unregister_domain); 7286714d8e8SKurt Hackel 729d24fbcdaSJoel Becker static int dlm_query_join_proto_check(char *proto_type, int node, 730d24fbcdaSJoel Becker struct dlm_protocol_version *ours, 731d24fbcdaSJoel Becker struct dlm_protocol_version *request) 732d24fbcdaSJoel Becker { 733d24fbcdaSJoel Becker int rc; 734d24fbcdaSJoel Becker struct dlm_protocol_version proto = *request; 735d24fbcdaSJoel Becker 736d24fbcdaSJoel Becker if (!dlm_protocol_compare(ours, &proto)) { 737d24fbcdaSJoel Becker mlog(0, 738d24fbcdaSJoel Becker "node %u wanted to join with %s locking protocol " 739d24fbcdaSJoel Becker "%u.%u, we respond with %u.%u\n", 740d24fbcdaSJoel Becker node, proto_type, 741d24fbcdaSJoel Becker request->pv_major, 742d24fbcdaSJoel Becker request->pv_minor, 743d24fbcdaSJoel Becker proto.pv_major, proto.pv_minor); 744d24fbcdaSJoel Becker request->pv_minor = proto.pv_minor; 745d24fbcdaSJoel Becker rc = 0; 746d24fbcdaSJoel Becker } else { 747d24fbcdaSJoel Becker mlog(ML_NOTICE, 748d24fbcdaSJoel Becker "Node %u wanted to join with %s locking " 749d24fbcdaSJoel Becker "protocol %u.%u, but we have %u.%u, disallowing\n", 750d24fbcdaSJoel Becker node, proto_type, 751d24fbcdaSJoel Becker request->pv_major, 752d24fbcdaSJoel Becker request->pv_minor, 753d24fbcdaSJoel Becker ours->pv_major, 754d24fbcdaSJoel Becker ours->pv_minor); 755d24fbcdaSJoel Becker rc = 1; 756d24fbcdaSJoel Becker } 757d24fbcdaSJoel Becker 758d24fbcdaSJoel Becker return rc; 759d24fbcdaSJoel Becker } 760d24fbcdaSJoel Becker 7610f71b7b4SJoel Becker /* 7620f71b7b4SJoel Becker * struct dlm_query_join_packet is made up of four one-byte fields. They 7630f71b7b4SJoel Becker * are effectively in big-endian order already. However, little-endian 7640f71b7b4SJoel Becker * machines swap them before putting the packet on the wire (because 7650f71b7b4SJoel Becker * query_join's response is a status, and that status is treated as a u32 7660f71b7b4SJoel Becker * on the wire). Thus, a big-endian and little-endian machines will treat 7670f71b7b4SJoel Becker * this structure differently. 7680f71b7b4SJoel Becker * 7690f71b7b4SJoel Becker * The solution is to have little-endian machines swap the structure when 7700f71b7b4SJoel Becker * converting from the structure to the u32 representation. This will 7710f71b7b4SJoel Becker * result in the structure having the correct format on the wire no matter 7720f71b7b4SJoel Becker * the host endian format. 7730f71b7b4SJoel Becker */ 7740f71b7b4SJoel Becker static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, 7750f71b7b4SJoel Becker u32 *wire) 7760f71b7b4SJoel Becker { 7770f71b7b4SJoel Becker union dlm_query_join_response response; 7780f71b7b4SJoel Becker 7790f71b7b4SJoel Becker response.packet = *packet; 78085158410SAl Viro *wire = be32_to_cpu(response.intval); 7810f71b7b4SJoel Becker } 7820f71b7b4SJoel Becker 7830f71b7b4SJoel Becker static void dlm_query_join_wire_to_packet(u32 wire, 7840f71b7b4SJoel Becker struct dlm_query_join_packet *packet) 7850f71b7b4SJoel Becker { 7860f71b7b4SJoel Becker union dlm_query_join_response response; 7870f71b7b4SJoel Becker 7880f71b7b4SJoel Becker response.intval = cpu_to_be32(wire); 7890f71b7b4SJoel Becker *packet = response.packet; 7900f71b7b4SJoel Becker } 7910f71b7b4SJoel Becker 792d74c9803SKurt Hackel static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, 793d74c9803SKurt Hackel void **ret_data) 7946714d8e8SKurt Hackel { 7956714d8e8SKurt Hackel struct dlm_query_join_request *query; 7960f71b7b4SJoel Becker struct dlm_query_join_packet packet = { 7970f71b7b4SJoel Becker .code = JOIN_DISALLOW, 798d24fbcdaSJoel Becker }; 7996714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 8000f71b7b4SJoel Becker u32 response; 8011faf2894SSrinivas Eeda u8 nodenum; 8026714d8e8SKurt Hackel 8036714d8e8SKurt Hackel query = (struct dlm_query_join_request *) msg->buf; 8046714d8e8SKurt Hackel 8056714d8e8SKurt Hackel mlog(0, "node %u wants to join domain %s\n", query->node_idx, 8066714d8e8SKurt Hackel query->domain); 8076714d8e8SKurt Hackel 8086714d8e8SKurt Hackel /* 8096714d8e8SKurt Hackel * If heartbeat doesn't consider the node live, tell it 8106714d8e8SKurt Hackel * to back off and try again. This gives heartbeat a chance 8116714d8e8SKurt Hackel * to catch up. 8126714d8e8SKurt Hackel */ 81370e82a12SJoseph Qi if (!o2hb_check_node_heartbeating_no_sem(query->node_idx)) { 8146714d8e8SKurt Hackel mlog(0, "node %u is not in our live map yet\n", 8156714d8e8SKurt Hackel query->node_idx); 8166714d8e8SKurt Hackel 8170f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 8186714d8e8SKurt Hackel goto respond; 8196714d8e8SKurt Hackel } 8206714d8e8SKurt Hackel 8210f71b7b4SJoel Becker packet.code = JOIN_OK_NO_MAP; 8226714d8e8SKurt Hackel 8236714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 8246714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 8251faf2894SSrinivas Eeda if (!dlm) 8261faf2894SSrinivas Eeda goto unlock_respond; 8271faf2894SSrinivas Eeda 8281faf2894SSrinivas Eeda /* 8291faf2894SSrinivas Eeda * There is a small window where the joining node may not see the 8301faf2894SSrinivas Eeda * node(s) that just left but still part of the cluster. DISALLOW 8311faf2894SSrinivas Eeda * join request if joining node has different node map. 8321faf2894SSrinivas Eeda */ 8331faf2894SSrinivas Eeda nodenum=0; 8341faf2894SSrinivas Eeda while (nodenum < O2NM_MAX_NODES) { 8351faf2894SSrinivas Eeda if (test_bit(nodenum, dlm->domain_map)) { 8361faf2894SSrinivas Eeda if (!byte_test_bit(nodenum, query->node_map)) { 837e4968476SSunil Mushran mlog(0, "disallow join as node %u does not " 838e4968476SSunil Mushran "have node %u in its nodemap\n", 839e4968476SSunil Mushran query->node_idx, nodenum); 8400f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 8411faf2894SSrinivas Eeda goto unlock_respond; 8421faf2894SSrinivas Eeda } 8431faf2894SSrinivas Eeda } 8441faf2894SSrinivas Eeda nodenum++; 8451faf2894SSrinivas Eeda } 8461faf2894SSrinivas Eeda 8476714d8e8SKurt Hackel /* Once the dlm ctxt is marked as leaving then we don't want 848e2faea4cSKurt Hackel * to be put in someone's domain map. 849e2faea4cSKurt Hackel * Also, explicitly disallow joining at certain troublesome 850e2faea4cSKurt Hackel * times (ie. during recovery). */ 851b3e3e5afSDan Carpenter if (dlm->dlm_state != DLM_CTXT_LEAVING) { 852e2faea4cSKurt Hackel int bit = query->node_idx; 8536714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 8546714d8e8SKurt Hackel 8556714d8e8SKurt Hackel if (dlm->dlm_state == DLM_CTXT_NEW && 8566714d8e8SKurt Hackel dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN) { 8576714d8e8SKurt Hackel /*If this is a brand new context and we 8586714d8e8SKurt Hackel * haven't started our join process yet, then 8596714d8e8SKurt Hackel * the other node won the race. */ 8600f71b7b4SJoel Becker packet.code = JOIN_OK_NO_MAP; 8616714d8e8SKurt Hackel } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 8626714d8e8SKurt Hackel /* Disallow parallel joins. */ 8630f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 864e2faea4cSKurt Hackel } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 865e4968476SSunil Mushran mlog(0, "node %u trying to join, but recovery " 866e2faea4cSKurt Hackel "is ongoing.\n", bit); 8670f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 868e2faea4cSKurt Hackel } else if (test_bit(bit, dlm->recovery_map)) { 869e4968476SSunil Mushran mlog(0, "node %u trying to join, but it " 870e2faea4cSKurt Hackel "still needs recovery.\n", bit); 8710f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 872e2faea4cSKurt Hackel } else if (test_bit(bit, dlm->domain_map)) { 873e4968476SSunil Mushran mlog(0, "node %u trying to join, but it " 874e2faea4cSKurt Hackel "is still in the domain! needs recovery?\n", 875e2faea4cSKurt Hackel bit); 8760f71b7b4SJoel Becker packet.code = JOIN_DISALLOW; 8776714d8e8SKurt Hackel } else { 8786714d8e8SKurt Hackel /* Alright we're fully a part of this domain 8796714d8e8SKurt Hackel * so we keep some state as to who's joining 8806714d8e8SKurt Hackel * and indicate to him that needs to be fixed 8816714d8e8SKurt Hackel * up. */ 882d24fbcdaSJoel Becker 883d24fbcdaSJoel Becker /* Make sure we speak compatible locking protocols. */ 884d24fbcdaSJoel Becker if (dlm_query_join_proto_check("DLM", bit, 885d24fbcdaSJoel Becker &dlm->dlm_locking_proto, 886d24fbcdaSJoel Becker &query->dlm_proto)) { 8870f71b7b4SJoel Becker packet.code = JOIN_PROTOCOL_MISMATCH; 888d24fbcdaSJoel Becker } else if (dlm_query_join_proto_check("fs", bit, 889d24fbcdaSJoel Becker &dlm->fs_locking_proto, 890d24fbcdaSJoel Becker &query->fs_proto)) { 8910f71b7b4SJoel Becker packet.code = JOIN_PROTOCOL_MISMATCH; 892d24fbcdaSJoel Becker } else { 8930f71b7b4SJoel Becker packet.dlm_minor = query->dlm_proto.pv_minor; 8940f71b7b4SJoel Becker packet.fs_minor = query->fs_proto.pv_minor; 8950f71b7b4SJoel Becker packet.code = JOIN_OK; 8966714d8e8SKurt Hackel __dlm_set_joining_node(dlm, query->node_idx); 8976714d8e8SKurt Hackel } 898d24fbcdaSJoel Becker } 8996714d8e8SKurt Hackel 9006714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 9016714d8e8SKurt Hackel } 9021faf2894SSrinivas Eeda unlock_respond: 9036714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 9046714d8e8SKurt Hackel 9056714d8e8SKurt Hackel respond: 9060f71b7b4SJoel Becker mlog(0, "We respond with %u\n", packet.code); 9076714d8e8SKurt Hackel 9080f71b7b4SJoel Becker dlm_query_join_packet_to_wire(&packet, &response); 9090f71b7b4SJoel Becker return response; 9106714d8e8SKurt Hackel } 9116714d8e8SKurt Hackel 912d74c9803SKurt Hackel static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, 913d74c9803SKurt Hackel void **ret_data) 9146714d8e8SKurt Hackel { 9156714d8e8SKurt Hackel struct dlm_assert_joined *assert; 9166714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 9176714d8e8SKurt Hackel 9186714d8e8SKurt Hackel assert = (struct dlm_assert_joined *) msg->buf; 9196714d8e8SKurt Hackel 9206714d8e8SKurt Hackel mlog(0, "node %u asserts join on domain %s\n", assert->node_idx, 9216714d8e8SKurt Hackel assert->domain); 9226714d8e8SKurt Hackel 9236714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 9246714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(assert->domain, assert->name_len); 9256714d8e8SKurt Hackel /* XXX should we consider no dlm ctxt an error? */ 9266714d8e8SKurt Hackel if (dlm) { 9276714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 9286714d8e8SKurt Hackel 9296714d8e8SKurt Hackel /* Alright, this node has officially joined our 9306714d8e8SKurt Hackel * domain. Set him in the map and clean up our 9316714d8e8SKurt Hackel * leftover join state. */ 9326714d8e8SKurt Hackel BUG_ON(dlm->joining_node != assert->node_idx); 93301c6222fSXue jiufei 93401c6222fSXue jiufei if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 93501c6222fSXue jiufei mlog(0, "dlm recovery is ongoing, disallow join\n"); 93601c6222fSXue jiufei spin_unlock(&dlm->spinlock); 93701c6222fSXue jiufei spin_unlock(&dlm_domain_lock); 93801c6222fSXue jiufei return -EAGAIN; 93901c6222fSXue jiufei } 94001c6222fSXue jiufei 9416714d8e8SKurt Hackel set_bit(assert->node_idx, dlm->domain_map); 942bddefdeeSSunil Mushran clear_bit(assert->node_idx, dlm->exit_domain_map); 9436714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 9446714d8e8SKurt Hackel 9458decab3cSSunil Mushran printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ", 946781ee3e2SSunil Mushran assert->node_idx, dlm->name); 9476714d8e8SKurt Hackel __dlm_print_nodes(dlm); 9486714d8e8SKurt Hackel 9496714d8e8SKurt Hackel /* notify anything attached to the heartbeat events */ 9506714d8e8SKurt Hackel dlm_hb_event_notify_attached(dlm, assert->node_idx, 1); 9516714d8e8SKurt Hackel 9526714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 9536714d8e8SKurt Hackel } 9546714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 9556714d8e8SKurt Hackel 9566714d8e8SKurt Hackel return 0; 9576714d8e8SKurt Hackel } 9586714d8e8SKurt Hackel 959ea203441SSunil Mushran static int dlm_match_regions(struct dlm_ctxt *dlm, 960770c4d81SSunil Mushran struct dlm_query_region *qr, 961770c4d81SSunil Mushran char *local, int locallen) 962ea203441SSunil Mushran { 963770c4d81SSunil Mushran char *remote = qr->qr_regions; 964ea203441SSunil Mushran char *l, *r; 965ea203441SSunil Mushran int localnr, i, j, foundit; 966ea203441SSunil Mushran int status = 0; 967ea203441SSunil Mushran 968ea203441SSunil Mushran if (!o2hb_global_heartbeat_active()) { 969ea203441SSunil Mushran if (qr->qr_numregions) { 970ea203441SSunil Mushran mlog(ML_ERROR, "Domain %s: Joining node %d has global " 971ea203441SSunil Mushran "heartbeat enabled but local node %d does not\n", 972ea203441SSunil Mushran qr->qr_domain, qr->qr_node, dlm->node_num); 973ea203441SSunil Mushran status = -EINVAL; 974ea203441SSunil Mushran } 975ea203441SSunil Mushran goto bail; 976ea203441SSunil Mushran } 977ea203441SSunil Mushran 978ea203441SSunil Mushran if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { 979ea203441SSunil Mushran mlog(ML_ERROR, "Domain %s: Local node %d has global " 980ea203441SSunil Mushran "heartbeat enabled but joining node %d does not\n", 981ea203441SSunil Mushran qr->qr_domain, dlm->node_num, qr->qr_node); 982ea203441SSunil Mushran status = -EINVAL; 983ea203441SSunil Mushran goto bail; 984ea203441SSunil Mushran } 985ea203441SSunil Mushran 986ea203441SSunil Mushran r = remote; 987ea203441SSunil Mushran for (i = 0; i < qr->qr_numregions; ++i) { 988ea203441SSunil Mushran mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); 989ea203441SSunil Mushran r += O2HB_MAX_REGION_NAME_LEN; 990ea203441SSunil Mushran } 991ea203441SSunil Mushran 992770c4d81SSunil Mushran localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN); 993770c4d81SSunil Mushran localnr = o2hb_get_all_regions(local, (u8)localnr); 994ea203441SSunil Mushran 995ea203441SSunil Mushran /* compare local regions with remote */ 996ea203441SSunil Mushran l = local; 997ea203441SSunil Mushran for (i = 0; i < localnr; ++i) { 998ea203441SSunil Mushran foundit = 0; 999ea203441SSunil Mushran r = remote; 1000ea203441SSunil Mushran for (j = 0; j <= qr->qr_numregions; ++j) { 1001ea203441SSunil Mushran if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { 1002ea203441SSunil Mushran foundit = 1; 1003ea203441SSunil Mushran break; 1004ea203441SSunil Mushran } 1005ea203441SSunil Mushran r += O2HB_MAX_REGION_NAME_LEN; 1006ea203441SSunil Mushran } 1007ea203441SSunil Mushran if (!foundit) { 1008ea203441SSunil Mushran status = -EINVAL; 1009ea203441SSunil Mushran mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " 1010ea203441SSunil Mushran "in local node %d but not in joining node %d\n", 1011ea203441SSunil Mushran qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, 1012ea203441SSunil Mushran dlm->node_num, qr->qr_node); 1013ea203441SSunil Mushran goto bail; 1014ea203441SSunil Mushran } 1015ea203441SSunil Mushran l += O2HB_MAX_REGION_NAME_LEN; 1016ea203441SSunil Mushran } 1017ea203441SSunil Mushran 1018ea203441SSunil Mushran /* compare remote with local regions */ 1019ea203441SSunil Mushran r = remote; 1020ea203441SSunil Mushran for (i = 0; i < qr->qr_numregions; ++i) { 1021ea203441SSunil Mushran foundit = 0; 1022ea203441SSunil Mushran l = local; 1023ea203441SSunil Mushran for (j = 0; j < localnr; ++j) { 1024ea203441SSunil Mushran if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { 1025ea203441SSunil Mushran foundit = 1; 1026ea203441SSunil Mushran break; 1027ea203441SSunil Mushran } 1028ea203441SSunil Mushran l += O2HB_MAX_REGION_NAME_LEN; 1029ea203441SSunil Mushran } 1030ea203441SSunil Mushran if (!foundit) { 1031ea203441SSunil Mushran status = -EINVAL; 1032ea203441SSunil Mushran mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " 1033ea203441SSunil Mushran "in joining node %d but not in local node %d\n", 1034ea203441SSunil Mushran qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, 1035ea203441SSunil Mushran qr->qr_node, dlm->node_num); 1036ea203441SSunil Mushran goto bail; 1037ea203441SSunil Mushran } 1038ea203441SSunil Mushran r += O2HB_MAX_REGION_NAME_LEN; 1039ea203441SSunil Mushran } 1040ea203441SSunil Mushran 1041ea203441SSunil Mushran bail: 1042ea203441SSunil Mushran return status; 1043ea203441SSunil Mushran } 1044ea203441SSunil Mushran 1045ea203441SSunil Mushran static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) 1046ea203441SSunil Mushran { 1047ea203441SSunil Mushran struct dlm_query_region *qr = NULL; 1048ea203441SSunil Mushran int status, ret = 0, i; 1049ea203441SSunil Mushran char *p; 1050ea203441SSunil Mushran 1051ea203441SSunil Mushran if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) 1052ea203441SSunil Mushran goto bail; 1053ea203441SSunil Mushran 1054ea203441SSunil Mushran qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); 1055ea203441SSunil Mushran if (!qr) { 1056ea203441SSunil Mushran ret = -ENOMEM; 1057ea203441SSunil Mushran mlog_errno(ret); 1058ea203441SSunil Mushran goto bail; 1059ea203441SSunil Mushran } 1060ea203441SSunil Mushran 1061ea203441SSunil Mushran qr->qr_node = dlm->node_num; 1062ea203441SSunil Mushran qr->qr_namelen = strlen(dlm->name); 1063ea203441SSunil Mushran memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); 1064ea203441SSunil Mushran /* if local hb, the numregions will be zero */ 1065ea203441SSunil Mushran if (o2hb_global_heartbeat_active()) 1066ea203441SSunil Mushran qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, 1067ea203441SSunil Mushran O2NM_MAX_REGIONS); 1068ea203441SSunil Mushran 1069ea203441SSunil Mushran p = qr->qr_regions; 1070ea203441SSunil Mushran for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) 1071ea203441SSunil Mushran mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); 1072ea203441SSunil Mushran 1073ea203441SSunil Mushran i = -1; 1074ea203441SSunil Mushran while ((i = find_next_bit(node_map, O2NM_MAX_NODES, 1075ea203441SSunil Mushran i + 1)) < O2NM_MAX_NODES) { 1076ea203441SSunil Mushran if (i == dlm->node_num) 1077ea203441SSunil Mushran continue; 1078ea203441SSunil Mushran 1079ea203441SSunil Mushran mlog(0, "Sending regions to node %d\n", i); 1080ea203441SSunil Mushran 1081ea203441SSunil Mushran ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, 1082ea203441SSunil Mushran sizeof(struct dlm_query_region), 1083ea203441SSunil Mushran i, &status); 1084ea203441SSunil Mushran if (ret >= 0) 1085ea203441SSunil Mushran ret = status; 1086ea203441SSunil Mushran if (ret) { 1087ea203441SSunil Mushran mlog(ML_ERROR, "Region mismatch %d, node %d\n", 1088ea203441SSunil Mushran ret, i); 1089ea203441SSunil Mushran break; 1090ea203441SSunil Mushran } 1091ea203441SSunil Mushran } 1092ea203441SSunil Mushran 1093ea203441SSunil Mushran bail: 1094ea203441SSunil Mushran kfree(qr); 1095ea203441SSunil Mushran return ret; 1096ea203441SSunil Mushran } 1097ea203441SSunil Mushran 1098ea203441SSunil Mushran static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, 1099ea203441SSunil Mushran void *data, void **ret_data) 1100ea203441SSunil Mushran { 1101ea203441SSunil Mushran struct dlm_query_region *qr; 1102ea203441SSunil Mushran struct dlm_ctxt *dlm = NULL; 1103770c4d81SSunil Mushran char *local = NULL; 1104ea203441SSunil Mushran int status = 0; 1105ea203441SSunil Mushran 1106ea203441SSunil Mushran qr = (struct dlm_query_region *) msg->buf; 1107ea203441SSunil Mushran 1108ea203441SSunil Mushran mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, 1109ea203441SSunil Mushran qr->qr_domain); 1110ea203441SSunil Mushran 1111770c4d81SSunil Mushran /* buffer used in dlm_mast_regions() */ 1112770c4d81SSunil Mushran local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); 1113a35ad97cSZhonghua Guo if (!local) 1114a35ad97cSZhonghua Guo return -ENOMEM; 1115770c4d81SSunil Mushran 1116ea203441SSunil Mushran status = -EINVAL; 1117ea203441SSunil Mushran 1118ea203441SSunil Mushran spin_lock(&dlm_domain_lock); 1119ea203441SSunil Mushran dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); 1120ea203441SSunil Mushran if (!dlm) { 1121ea203441SSunil Mushran mlog(ML_ERROR, "Node %d queried hb regions on domain %s " 1122ea203441SSunil Mushran "before join domain\n", qr->qr_node, qr->qr_domain); 1123a35ad97cSZhonghua Guo goto out_domain_lock; 1124ea203441SSunil Mushran } 1125ea203441SSunil Mushran 1126ea203441SSunil Mushran spin_lock(&dlm->spinlock); 1127ea203441SSunil Mushran if (dlm->joining_node != qr->qr_node) { 1128ea203441SSunil Mushran mlog(ML_ERROR, "Node %d queried hb regions on domain %s " 1129ea203441SSunil Mushran "but joining node is %d\n", qr->qr_node, qr->qr_domain, 1130ea203441SSunil Mushran dlm->joining_node); 1131a35ad97cSZhonghua Guo goto out_dlm_lock; 1132ea203441SSunil Mushran } 1133ea203441SSunil Mushran 1134ea203441SSunil Mushran /* Support for global heartbeat was added in 1.1 */ 1135ea203441SSunil Mushran if (dlm->dlm_locking_proto.pv_major == 1 && 1136ea203441SSunil Mushran dlm->dlm_locking_proto.pv_minor == 0) { 1137ea203441SSunil Mushran mlog(ML_ERROR, "Node %d queried hb regions on domain %s " 1138ea203441SSunil Mushran "but active dlm protocol is %d.%d\n", qr->qr_node, 1139ea203441SSunil Mushran qr->qr_domain, dlm->dlm_locking_proto.pv_major, 1140ea203441SSunil Mushran dlm->dlm_locking_proto.pv_minor); 1141a35ad97cSZhonghua Guo goto out_dlm_lock; 1142ea203441SSunil Mushran } 1143ea203441SSunil Mushran 1144770c4d81SSunil Mushran status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); 1145ea203441SSunil Mushran 1146a35ad97cSZhonghua Guo out_dlm_lock: 1147ea203441SSunil Mushran spin_unlock(&dlm->spinlock); 1148a35ad97cSZhonghua Guo 1149a35ad97cSZhonghua Guo out_domain_lock: 1150ea203441SSunil Mushran spin_unlock(&dlm_domain_lock); 1151ea203441SSunil Mushran 1152770c4d81SSunil Mushran kfree(local); 1153770c4d81SSunil Mushran 1154ea203441SSunil Mushran return status; 1155ea203441SSunil Mushran } 1156ea203441SSunil Mushran 115718cfdf1bSSunil Mushran static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) 115818cfdf1bSSunil Mushran { 115918cfdf1bSSunil Mushran struct o2nm_node *local; 116018cfdf1bSSunil Mushran struct dlm_node_info *remote; 116118cfdf1bSSunil Mushran int i, j; 116218cfdf1bSSunil Mushran int status = 0; 116318cfdf1bSSunil Mushran 116418cfdf1bSSunil Mushran for (j = 0; j < qn->qn_numnodes; ++j) 116518cfdf1bSSunil Mushran mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, 116618cfdf1bSSunil Mushran &(qn->qn_nodes[j].ni_ipv4_address), 116718cfdf1bSSunil Mushran ntohs(qn->qn_nodes[j].ni_ipv4_port)); 116818cfdf1bSSunil Mushran 116918cfdf1bSSunil Mushran for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { 117018cfdf1bSSunil Mushran local = o2nm_get_node_by_num(i); 117118cfdf1bSSunil Mushran remote = NULL; 117218cfdf1bSSunil Mushran for (j = 0; j < qn->qn_numnodes; ++j) { 117318cfdf1bSSunil Mushran if (qn->qn_nodes[j].ni_nodenum == i) { 117418cfdf1bSSunil Mushran remote = &(qn->qn_nodes[j]); 117518cfdf1bSSunil Mushran break; 117618cfdf1bSSunil Mushran } 117718cfdf1bSSunil Mushran } 117818cfdf1bSSunil Mushran 117918cfdf1bSSunil Mushran if (!local && !remote) 118018cfdf1bSSunil Mushran continue; 118118cfdf1bSSunil Mushran 118218cfdf1bSSunil Mushran if ((local && !remote) || (!local && remote)) 118318cfdf1bSSunil Mushran status = -EINVAL; 118418cfdf1bSSunil Mushran 118518cfdf1bSSunil Mushran if (!status && 118618cfdf1bSSunil Mushran ((remote->ni_nodenum != local->nd_num) || 118718cfdf1bSSunil Mushran (remote->ni_ipv4_port != local->nd_ipv4_port) || 118818cfdf1bSSunil Mushran (remote->ni_ipv4_address != local->nd_ipv4_address))) 118918cfdf1bSSunil Mushran status = -EINVAL; 119018cfdf1bSSunil Mushran 119118cfdf1bSSunil Mushran if (status) { 119218cfdf1bSSunil Mushran if (remote && !local) 119318cfdf1bSSunil Mushran mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " 119418cfdf1bSSunil Mushran "registered in joining node %d but not in " 119518cfdf1bSSunil Mushran "local node %d\n", qn->qn_domain, 119618cfdf1bSSunil Mushran remote->ni_nodenum, 119718cfdf1bSSunil Mushran &(remote->ni_ipv4_address), 119818cfdf1bSSunil Mushran ntohs(remote->ni_ipv4_port), 119918cfdf1bSSunil Mushran qn->qn_nodenum, dlm->node_num); 120018cfdf1bSSunil Mushran if (local && !remote) 120118cfdf1bSSunil Mushran mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " 120218cfdf1bSSunil Mushran "registered in local node %d but not in " 120318cfdf1bSSunil Mushran "joining node %d\n", qn->qn_domain, 120418cfdf1bSSunil Mushran local->nd_num, &(local->nd_ipv4_address), 120518cfdf1bSSunil Mushran ntohs(local->nd_ipv4_port), 120618cfdf1bSSunil Mushran dlm->node_num, qn->qn_nodenum); 120718cfdf1bSSunil Mushran BUG_ON((!local && !remote)); 120818cfdf1bSSunil Mushran } 120918cfdf1bSSunil Mushran 121018cfdf1bSSunil Mushran if (local) 121118cfdf1bSSunil Mushran o2nm_node_put(local); 121218cfdf1bSSunil Mushran } 121318cfdf1bSSunil Mushran 121418cfdf1bSSunil Mushran return status; 121518cfdf1bSSunil Mushran } 121618cfdf1bSSunil Mushran 121718cfdf1bSSunil Mushran static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) 121818cfdf1bSSunil Mushran { 121918cfdf1bSSunil Mushran struct dlm_query_nodeinfo *qn = NULL; 122018cfdf1bSSunil Mushran struct o2nm_node *node; 122118cfdf1bSSunil Mushran int ret = 0, status, count, i; 122218cfdf1bSSunil Mushran 122318cfdf1bSSunil Mushran if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) 122418cfdf1bSSunil Mushran goto bail; 122518cfdf1bSSunil Mushran 122618cfdf1bSSunil Mushran qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); 122718cfdf1bSSunil Mushran if (!qn) { 122818cfdf1bSSunil Mushran ret = -ENOMEM; 122918cfdf1bSSunil Mushran mlog_errno(ret); 123018cfdf1bSSunil Mushran goto bail; 123118cfdf1bSSunil Mushran } 123218cfdf1bSSunil Mushran 123318cfdf1bSSunil Mushran for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { 123418cfdf1bSSunil Mushran node = o2nm_get_node_by_num(i); 123518cfdf1bSSunil Mushran if (!node) 123618cfdf1bSSunil Mushran continue; 123718cfdf1bSSunil Mushran qn->qn_nodes[count].ni_nodenum = node->nd_num; 123818cfdf1bSSunil Mushran qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; 123918cfdf1bSSunil Mushran qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; 124018cfdf1bSSunil Mushran mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, 124118cfdf1bSSunil Mushran &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); 124218cfdf1bSSunil Mushran ++count; 124318cfdf1bSSunil Mushran o2nm_node_put(node); 124418cfdf1bSSunil Mushran } 124518cfdf1bSSunil Mushran 124618cfdf1bSSunil Mushran qn->qn_nodenum = dlm->node_num; 124718cfdf1bSSunil Mushran qn->qn_numnodes = count; 124818cfdf1bSSunil Mushran qn->qn_namelen = strlen(dlm->name); 124918cfdf1bSSunil Mushran memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); 125018cfdf1bSSunil Mushran 125118cfdf1bSSunil Mushran i = -1; 125218cfdf1bSSunil Mushran while ((i = find_next_bit(node_map, O2NM_MAX_NODES, 125318cfdf1bSSunil Mushran i + 1)) < O2NM_MAX_NODES) { 125418cfdf1bSSunil Mushran if (i == dlm->node_num) 125518cfdf1bSSunil Mushran continue; 125618cfdf1bSSunil Mushran 125718cfdf1bSSunil Mushran mlog(0, "Sending nodeinfo to node %d\n", i); 125818cfdf1bSSunil Mushran 125918cfdf1bSSunil Mushran ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, 126018cfdf1bSSunil Mushran qn, sizeof(struct dlm_query_nodeinfo), 126118cfdf1bSSunil Mushran i, &status); 126218cfdf1bSSunil Mushran if (ret >= 0) 126318cfdf1bSSunil Mushran ret = status; 126418cfdf1bSSunil Mushran if (ret) { 126518cfdf1bSSunil Mushran mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); 126618cfdf1bSSunil Mushran break; 126718cfdf1bSSunil Mushran } 126818cfdf1bSSunil Mushran } 126918cfdf1bSSunil Mushran 127018cfdf1bSSunil Mushran bail: 127118cfdf1bSSunil Mushran kfree(qn); 127218cfdf1bSSunil Mushran return ret; 127318cfdf1bSSunil Mushran } 127418cfdf1bSSunil Mushran 127518cfdf1bSSunil Mushran static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, 127618cfdf1bSSunil Mushran void *data, void **ret_data) 127718cfdf1bSSunil Mushran { 127818cfdf1bSSunil Mushran struct dlm_query_nodeinfo *qn; 127918cfdf1bSSunil Mushran struct dlm_ctxt *dlm = NULL; 128018cfdf1bSSunil Mushran int locked = 0, status = -EINVAL; 128118cfdf1bSSunil Mushran 128218cfdf1bSSunil Mushran qn = (struct dlm_query_nodeinfo *) msg->buf; 128318cfdf1bSSunil Mushran 128418cfdf1bSSunil Mushran mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, 128518cfdf1bSSunil Mushran qn->qn_domain); 128618cfdf1bSSunil Mushran 128718cfdf1bSSunil Mushran spin_lock(&dlm_domain_lock); 128818cfdf1bSSunil Mushran dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); 128918cfdf1bSSunil Mushran if (!dlm) { 129018cfdf1bSSunil Mushran mlog(ML_ERROR, "Node %d queried nodes on domain %s before " 129118cfdf1bSSunil Mushran "join domain\n", qn->qn_nodenum, qn->qn_domain); 129218cfdf1bSSunil Mushran goto bail; 129318cfdf1bSSunil Mushran } 129418cfdf1bSSunil Mushran 129518cfdf1bSSunil Mushran spin_lock(&dlm->spinlock); 129618cfdf1bSSunil Mushran locked = 1; 129718cfdf1bSSunil Mushran if (dlm->joining_node != qn->qn_nodenum) { 129818cfdf1bSSunil Mushran mlog(ML_ERROR, "Node %d queried nodes on domain %s but " 129918cfdf1bSSunil Mushran "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, 130018cfdf1bSSunil Mushran dlm->joining_node); 130118cfdf1bSSunil Mushran goto bail; 130218cfdf1bSSunil Mushran } 130318cfdf1bSSunil Mushran 130418cfdf1bSSunil Mushran /* Support for node query was added in 1.1 */ 130518cfdf1bSSunil Mushran if (dlm->dlm_locking_proto.pv_major == 1 && 130618cfdf1bSSunil Mushran dlm->dlm_locking_proto.pv_minor == 0) { 130718cfdf1bSSunil Mushran mlog(ML_ERROR, "Node %d queried nodes on domain %s " 130818cfdf1bSSunil Mushran "but active dlm protocol is %d.%d\n", qn->qn_nodenum, 130918cfdf1bSSunil Mushran qn->qn_domain, dlm->dlm_locking_proto.pv_major, 131018cfdf1bSSunil Mushran dlm->dlm_locking_proto.pv_minor); 131118cfdf1bSSunil Mushran goto bail; 131218cfdf1bSSunil Mushran } 131318cfdf1bSSunil Mushran 131418cfdf1bSSunil Mushran status = dlm_match_nodes(dlm, qn); 131518cfdf1bSSunil Mushran 131618cfdf1bSSunil Mushran bail: 131718cfdf1bSSunil Mushran if (locked) 131818cfdf1bSSunil Mushran spin_unlock(&dlm->spinlock); 131918cfdf1bSSunil Mushran spin_unlock(&dlm_domain_lock); 132018cfdf1bSSunil Mushran 132118cfdf1bSSunil Mushran return status; 132218cfdf1bSSunil Mushran } 132318cfdf1bSSunil Mushran 1324d74c9803SKurt Hackel static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 1325d74c9803SKurt Hackel void **ret_data) 13266714d8e8SKurt Hackel { 13276714d8e8SKurt Hackel struct dlm_cancel_join *cancel; 13286714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 13296714d8e8SKurt Hackel 13306714d8e8SKurt Hackel cancel = (struct dlm_cancel_join *) msg->buf; 13316714d8e8SKurt Hackel 13326714d8e8SKurt Hackel mlog(0, "node %u cancels join on domain %s\n", cancel->node_idx, 13336714d8e8SKurt Hackel cancel->domain); 13346714d8e8SKurt Hackel 13356714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 13366714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(cancel->domain, cancel->name_len); 13376714d8e8SKurt Hackel 13386714d8e8SKurt Hackel if (dlm) { 13396714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 13406714d8e8SKurt Hackel 13416714d8e8SKurt Hackel /* Yikes, this guy wants to cancel his join. No 13426714d8e8SKurt Hackel * problem, we simply cleanup our join state. */ 13436714d8e8SKurt Hackel BUG_ON(dlm->joining_node != cancel->node_idx); 13446714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 13456714d8e8SKurt Hackel 13466714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 13476714d8e8SKurt Hackel } 13486714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 13496714d8e8SKurt Hackel 13506714d8e8SKurt Hackel return 0; 13516714d8e8SKurt Hackel } 13526714d8e8SKurt Hackel 13536714d8e8SKurt Hackel static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm, 13546714d8e8SKurt Hackel unsigned int node) 13556714d8e8SKurt Hackel { 13566714d8e8SKurt Hackel int status; 13576714d8e8SKurt Hackel struct dlm_cancel_join cancel_msg; 13586714d8e8SKurt Hackel 13596714d8e8SKurt Hackel memset(&cancel_msg, 0, sizeof(cancel_msg)); 13606714d8e8SKurt Hackel cancel_msg.node_idx = dlm->node_num; 13616714d8e8SKurt Hackel cancel_msg.name_len = strlen(dlm->name); 13626714d8e8SKurt Hackel memcpy(cancel_msg.domain, dlm->name, cancel_msg.name_len); 13636714d8e8SKurt Hackel 13646714d8e8SKurt Hackel status = o2net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 13656714d8e8SKurt Hackel &cancel_msg, sizeof(cancel_msg), node, 13666714d8e8SKurt Hackel NULL); 13676714d8e8SKurt Hackel if (status < 0) { 1368a5196ec5SWengang Wang mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 1369a5196ec5SWengang Wang "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 1370a5196ec5SWengang Wang node); 13716714d8e8SKurt Hackel goto bail; 13726714d8e8SKurt Hackel } 13736714d8e8SKurt Hackel 13746714d8e8SKurt Hackel bail: 13756714d8e8SKurt Hackel return status; 13766714d8e8SKurt Hackel } 13776714d8e8SKurt Hackel 13786714d8e8SKurt Hackel /* map_size should be in bytes. */ 13796714d8e8SKurt Hackel static int dlm_send_join_cancels(struct dlm_ctxt *dlm, 13806714d8e8SKurt Hackel unsigned long *node_map, 13816714d8e8SKurt Hackel unsigned int map_size) 13826714d8e8SKurt Hackel { 13836714d8e8SKurt Hackel int status, tmpstat; 13848d67d3c2SJun Piao int node; 13856714d8e8SKurt Hackel 13866714d8e8SKurt Hackel if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * 13876714d8e8SKurt Hackel sizeof(unsigned long))) { 13886714d8e8SKurt Hackel mlog(ML_ERROR, 13896714d8e8SKurt Hackel "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n", 13903a4780a8SAndrew Morton map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES)); 13916714d8e8SKurt Hackel return -EINVAL; 13926714d8e8SKurt Hackel } 13936714d8e8SKurt Hackel 13946714d8e8SKurt Hackel status = 0; 13956714d8e8SKurt Hackel node = -1; 13966714d8e8SKurt Hackel while ((node = find_next_bit(node_map, O2NM_MAX_NODES, 13976714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 13986714d8e8SKurt Hackel if (node == dlm->node_num) 13996714d8e8SKurt Hackel continue; 14006714d8e8SKurt Hackel 14016714d8e8SKurt Hackel tmpstat = dlm_send_one_join_cancel(dlm, node); 14026714d8e8SKurt Hackel if (tmpstat) { 14036714d8e8SKurt Hackel mlog(ML_ERROR, "Error return %d cancelling join on " 14046714d8e8SKurt Hackel "node %d\n", tmpstat, node); 14056714d8e8SKurt Hackel if (!status) 14066714d8e8SKurt Hackel status = tmpstat; 14076714d8e8SKurt Hackel } 14086714d8e8SKurt Hackel } 14096714d8e8SKurt Hackel 14106714d8e8SKurt Hackel if (status) 14116714d8e8SKurt Hackel mlog_errno(status); 14126714d8e8SKurt Hackel return status; 14136714d8e8SKurt Hackel } 14146714d8e8SKurt Hackel 14156714d8e8SKurt Hackel static int dlm_request_join(struct dlm_ctxt *dlm, 14166714d8e8SKurt Hackel int node, 1417d24fbcdaSJoel Becker enum dlm_query_join_response_code *response) 14186714d8e8SKurt Hackel { 1419d24fbcdaSJoel Becker int status; 14206714d8e8SKurt Hackel struct dlm_query_join_request join_msg; 14210f71b7b4SJoel Becker struct dlm_query_join_packet packet; 14220f71b7b4SJoel Becker u32 join_resp; 14236714d8e8SKurt Hackel 14246714d8e8SKurt Hackel mlog(0, "querying node %d\n", node); 14256714d8e8SKurt Hackel 14266714d8e8SKurt Hackel memset(&join_msg, 0, sizeof(join_msg)); 14276714d8e8SKurt Hackel join_msg.node_idx = dlm->node_num; 14286714d8e8SKurt Hackel join_msg.name_len = strlen(dlm->name); 14296714d8e8SKurt Hackel memcpy(join_msg.domain, dlm->name, join_msg.name_len); 1430d24fbcdaSJoel Becker join_msg.dlm_proto = dlm->dlm_locking_proto; 1431d24fbcdaSJoel Becker join_msg.fs_proto = dlm->fs_locking_proto; 14326714d8e8SKurt Hackel 14331faf2894SSrinivas Eeda /* copy live node map to join message */ 14341faf2894SSrinivas Eeda byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); 14351faf2894SSrinivas Eeda 14366714d8e8SKurt Hackel status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 1437a5196ec5SWengang Wang sizeof(join_msg), node, &join_resp); 14386714d8e8SKurt Hackel if (status < 0 && status != -ENOPROTOOPT) { 1439a5196ec5SWengang Wang mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 1440a5196ec5SWengang Wang "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, 1441a5196ec5SWengang Wang node); 14426714d8e8SKurt Hackel goto bail; 14436714d8e8SKurt Hackel } 14440f71b7b4SJoel Becker dlm_query_join_wire_to_packet(join_resp, &packet); 14456714d8e8SKurt Hackel 14466714d8e8SKurt Hackel /* -ENOPROTOOPT from the net code means the other side isn't 14476714d8e8SKurt Hackel listening for our message type -- that's fine, it means 14486714d8e8SKurt Hackel his dlm isn't up, so we can consider him a 'yes' but not 14496714d8e8SKurt Hackel joined into the domain. */ 14506714d8e8SKurt Hackel if (status == -ENOPROTOOPT) { 14516714d8e8SKurt Hackel status = 0; 14526714d8e8SKurt Hackel *response = JOIN_OK_NO_MAP; 145372f6fe1fSNorton.Zhu } else { 14540f71b7b4SJoel Becker *response = packet.code; 145572f6fe1fSNorton.Zhu switch (packet.code) { 145672f6fe1fSNorton.Zhu case JOIN_DISALLOW: 145772f6fe1fSNorton.Zhu case JOIN_OK_NO_MAP: 145872f6fe1fSNorton.Zhu break; 145972f6fe1fSNorton.Zhu case JOIN_PROTOCOL_MISMATCH: 1460d24fbcdaSJoel Becker mlog(ML_NOTICE, 1461d24fbcdaSJoel Becker "This node requested DLM locking protocol %u.%u and " 1462d24fbcdaSJoel Becker "filesystem locking protocol %u.%u. At least one of " 1463d24fbcdaSJoel Becker "the protocol versions on node %d is not compatible, " 1464d24fbcdaSJoel Becker "disconnecting\n", 1465d24fbcdaSJoel Becker dlm->dlm_locking_proto.pv_major, 1466d24fbcdaSJoel Becker dlm->dlm_locking_proto.pv_minor, 1467d24fbcdaSJoel Becker dlm->fs_locking_proto.pv_major, 1468d24fbcdaSJoel Becker dlm->fs_locking_proto.pv_minor, 1469d24fbcdaSJoel Becker node); 1470d24fbcdaSJoel Becker status = -EPROTO; 147172f6fe1fSNorton.Zhu break; 147272f6fe1fSNorton.Zhu case JOIN_OK: 1473d24fbcdaSJoel Becker /* Use the same locking protocol as the remote node */ 14740f71b7b4SJoel Becker dlm->dlm_locking_proto.pv_minor = packet.dlm_minor; 14750f71b7b4SJoel Becker dlm->fs_locking_proto.pv_minor = packet.fs_minor; 1476d24fbcdaSJoel Becker mlog(0, 1477d24fbcdaSJoel Becker "Node %d responds JOIN_OK with DLM locking protocol " 1478d24fbcdaSJoel Becker "%u.%u and fs locking protocol %u.%u\n", 1479d24fbcdaSJoel Becker node, 1480d24fbcdaSJoel Becker dlm->dlm_locking_proto.pv_major, 1481d24fbcdaSJoel Becker dlm->dlm_locking_proto.pv_minor, 1482d24fbcdaSJoel Becker dlm->fs_locking_proto.pv_major, 1483d24fbcdaSJoel Becker dlm->fs_locking_proto.pv_minor); 148472f6fe1fSNorton.Zhu break; 148572f6fe1fSNorton.Zhu default: 14866714d8e8SKurt Hackel status = -EINVAL; 1487d24fbcdaSJoel Becker mlog(ML_ERROR, "invalid response %d from node %u\n", 14880f71b7b4SJoel Becker packet.code, node); 148972f6fe1fSNorton.Zhu /* Reset response to JOIN_DISALLOW */ 149072f6fe1fSNorton.Zhu *response = JOIN_DISALLOW; 149172f6fe1fSNorton.Zhu break; 149272f6fe1fSNorton.Zhu } 14936714d8e8SKurt Hackel } 14946714d8e8SKurt Hackel 14956714d8e8SKurt Hackel mlog(0, "status %d, node %d response is %d\n", status, node, 14966714d8e8SKurt Hackel *response); 14976714d8e8SKurt Hackel 14986714d8e8SKurt Hackel bail: 14996714d8e8SKurt Hackel return status; 15006714d8e8SKurt Hackel } 15016714d8e8SKurt Hackel 15026714d8e8SKurt Hackel static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, 15036714d8e8SKurt Hackel unsigned int node) 15046714d8e8SKurt Hackel { 15056714d8e8SKurt Hackel int status; 150601c6222fSXue jiufei int ret; 15076714d8e8SKurt Hackel struct dlm_assert_joined assert_msg; 15086714d8e8SKurt Hackel 15096714d8e8SKurt Hackel mlog(0, "Sending join assert to node %u\n", node); 15106714d8e8SKurt Hackel 15116714d8e8SKurt Hackel memset(&assert_msg, 0, sizeof(assert_msg)); 15126714d8e8SKurt Hackel assert_msg.node_idx = dlm->node_num; 15136714d8e8SKurt Hackel assert_msg.name_len = strlen(dlm->name); 15146714d8e8SKurt Hackel memcpy(assert_msg.domain, dlm->name, assert_msg.name_len); 15156714d8e8SKurt Hackel 15166714d8e8SKurt Hackel status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 15176714d8e8SKurt Hackel &assert_msg, sizeof(assert_msg), node, 151801c6222fSXue jiufei &ret); 15196714d8e8SKurt Hackel if (status < 0) 1520a5196ec5SWengang Wang mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 1521a5196ec5SWengang Wang "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 1522a5196ec5SWengang Wang node); 152301c6222fSXue jiufei else 152401c6222fSXue jiufei status = ret; 15256714d8e8SKurt Hackel 15266714d8e8SKurt Hackel return status; 15276714d8e8SKurt Hackel } 15286714d8e8SKurt Hackel 15296714d8e8SKurt Hackel static void dlm_send_join_asserts(struct dlm_ctxt *dlm, 15306714d8e8SKurt Hackel unsigned long *node_map) 15316714d8e8SKurt Hackel { 15326714d8e8SKurt Hackel int status, node, live; 15336714d8e8SKurt Hackel 15346714d8e8SKurt Hackel status = 0; 15356714d8e8SKurt Hackel node = -1; 15366714d8e8SKurt Hackel while ((node = find_next_bit(node_map, O2NM_MAX_NODES, 15376714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 15386714d8e8SKurt Hackel if (node == dlm->node_num) 15396714d8e8SKurt Hackel continue; 15406714d8e8SKurt Hackel 15416714d8e8SKurt Hackel do { 15426714d8e8SKurt Hackel /* It is very important that this message be 15436714d8e8SKurt Hackel * received so we spin until either the node 15446714d8e8SKurt Hackel * has died or it gets the message. */ 15456714d8e8SKurt Hackel status = dlm_send_one_join_assert(dlm, node); 15466714d8e8SKurt Hackel 15476714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 15486714d8e8SKurt Hackel live = test_bit(node, dlm->live_nodes_map); 15496714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 15506714d8e8SKurt Hackel 15516714d8e8SKurt Hackel if (status) { 15526714d8e8SKurt Hackel mlog(ML_ERROR, "Error return %d asserting " 15536714d8e8SKurt Hackel "join on node %d\n", status, node); 15546714d8e8SKurt Hackel 15556714d8e8SKurt Hackel /* give us some time between errors... */ 15566714d8e8SKurt Hackel if (live) 15576714d8e8SKurt Hackel msleep(DLM_DOMAIN_BACKOFF_MS); 15586714d8e8SKurt Hackel } 15596714d8e8SKurt Hackel } while (status && live); 15606714d8e8SKurt Hackel } 15616714d8e8SKurt Hackel } 15626714d8e8SKurt Hackel 15636714d8e8SKurt Hackel struct domain_join_ctxt { 15646714d8e8SKurt Hackel unsigned long live_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 15656714d8e8SKurt Hackel unsigned long yes_resp_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 15666714d8e8SKurt Hackel }; 15676714d8e8SKurt Hackel 15686714d8e8SKurt Hackel static int dlm_should_restart_join(struct dlm_ctxt *dlm, 15696714d8e8SKurt Hackel struct domain_join_ctxt *ctxt, 1570d24fbcdaSJoel Becker enum dlm_query_join_response_code response) 15716714d8e8SKurt Hackel { 15726714d8e8SKurt Hackel int ret; 15736714d8e8SKurt Hackel 15746714d8e8SKurt Hackel if (response == JOIN_DISALLOW) { 15756714d8e8SKurt Hackel mlog(0, "Latest response of disallow -- should restart\n"); 15766714d8e8SKurt Hackel return 1; 15776714d8e8SKurt Hackel } 15786714d8e8SKurt Hackel 15796714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 15806714d8e8SKurt Hackel /* For now, we restart the process if the node maps have 15816714d8e8SKurt Hackel * changed at all */ 15826714d8e8SKurt Hackel ret = memcmp(ctxt->live_map, dlm->live_nodes_map, 15836714d8e8SKurt Hackel sizeof(dlm->live_nodes_map)); 15846714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 15856714d8e8SKurt Hackel 15866714d8e8SKurt Hackel if (ret) 15876714d8e8SKurt Hackel mlog(0, "Node maps changed -- should restart\n"); 15886714d8e8SKurt Hackel 15896714d8e8SKurt Hackel return ret; 15906714d8e8SKurt Hackel } 15916714d8e8SKurt Hackel 15926714d8e8SKurt Hackel static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) 15936714d8e8SKurt Hackel { 15946714d8e8SKurt Hackel int status = 0, tmpstat, node; 15956714d8e8SKurt Hackel struct domain_join_ctxt *ctxt; 1596d24fbcdaSJoel Becker enum dlm_query_join_response_code response = JOIN_DISALLOW; 15976714d8e8SKurt Hackel 1598ef6b689bSTao Ma mlog(0, "%p", dlm); 15996714d8e8SKurt Hackel 1600cd861280SRobert P. J. Day ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 16016714d8e8SKurt Hackel if (!ctxt) { 16026714d8e8SKurt Hackel status = -ENOMEM; 16036714d8e8SKurt Hackel mlog_errno(status); 16046714d8e8SKurt Hackel goto bail; 16056714d8e8SKurt Hackel } 16066714d8e8SKurt Hackel 16076714d8e8SKurt Hackel /* group sem locking should work for us here -- we're already 16086714d8e8SKurt Hackel * registered for heartbeat events so filling this should be 16096714d8e8SKurt Hackel * atomic wrt getting those handlers called. */ 16106714d8e8SKurt Hackel o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); 16116714d8e8SKurt Hackel 16126714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 16136714d8e8SKurt Hackel memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); 16146714d8e8SKurt Hackel 16156714d8e8SKurt Hackel __dlm_set_joining_node(dlm, dlm->node_num); 16166714d8e8SKurt Hackel 16176714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 16186714d8e8SKurt Hackel 16196714d8e8SKurt Hackel node = -1; 16206714d8e8SKurt Hackel while ((node = find_next_bit(ctxt->live_map, O2NM_MAX_NODES, 16216714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 16226714d8e8SKurt Hackel if (node == dlm->node_num) 16236714d8e8SKurt Hackel continue; 16246714d8e8SKurt Hackel 16256714d8e8SKurt Hackel status = dlm_request_join(dlm, node, &response); 16266714d8e8SKurt Hackel if (status < 0) { 16276714d8e8SKurt Hackel mlog_errno(status); 16286714d8e8SKurt Hackel goto bail; 16296714d8e8SKurt Hackel } 16306714d8e8SKurt Hackel 16316714d8e8SKurt Hackel /* Ok, either we got a response or the node doesn't have a 16326714d8e8SKurt Hackel * dlm up. */ 16336714d8e8SKurt Hackel if (response == JOIN_OK) 16346714d8e8SKurt Hackel set_bit(node, ctxt->yes_resp_map); 16356714d8e8SKurt Hackel 16366714d8e8SKurt Hackel if (dlm_should_restart_join(dlm, ctxt, response)) { 16376714d8e8SKurt Hackel status = -EAGAIN; 16386714d8e8SKurt Hackel goto bail; 16396714d8e8SKurt Hackel } 16406714d8e8SKurt Hackel } 16416714d8e8SKurt Hackel 16426714d8e8SKurt Hackel mlog(0, "Yay, done querying nodes!\n"); 16436714d8e8SKurt Hackel 16446714d8e8SKurt Hackel /* Yay, everyone agree's we can join the domain. My domain is 16456714d8e8SKurt Hackel * comprised of all nodes who were put in the 16466714d8e8SKurt Hackel * yes_resp_map. Copy that into our domain map and send a join 16476714d8e8SKurt Hackel * assert message to clean up everyone elses state. */ 16486714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 16496714d8e8SKurt Hackel memcpy(dlm->domain_map, ctxt->yes_resp_map, 16506714d8e8SKurt Hackel sizeof(ctxt->yes_resp_map)); 16516714d8e8SKurt Hackel set_bit(dlm->node_num, dlm->domain_map); 16526714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 16536714d8e8SKurt Hackel 165418cfdf1bSSunil Mushran /* Support for global heartbeat and node info was added in 1.1 */ 16554da6dc29SSunil Mushran if (dlm->dlm_locking_proto.pv_major > 1 || 16564da6dc29SSunil Mushran dlm->dlm_locking_proto.pv_minor > 0) { 165718cfdf1bSSunil Mushran status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); 165818cfdf1bSSunil Mushran if (status) { 165918cfdf1bSSunil Mushran mlog_errno(status); 166018cfdf1bSSunil Mushran goto bail; 166118cfdf1bSSunil Mushran } 1662ea203441SSunil Mushran status = dlm_send_regions(dlm, ctxt->yes_resp_map); 1663ea203441SSunil Mushran if (status) { 1664ea203441SSunil Mushran mlog_errno(status); 1665ea203441SSunil Mushran goto bail; 1666ea203441SSunil Mushran } 1667ea203441SSunil Mushran } 1668ea203441SSunil Mushran 16696714d8e8SKurt Hackel dlm_send_join_asserts(dlm, ctxt->yes_resp_map); 16706714d8e8SKurt Hackel 16716714d8e8SKurt Hackel /* Joined state *must* be set before the joining node 16726714d8e8SKurt Hackel * information, otherwise the query_join handler may read no 16736714d8e8SKurt Hackel * current joiner but a state of NEW and tell joining nodes 16746714d8e8SKurt Hackel * we're not in the domain. */ 16756714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 16766714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_JOINED; 16776714d8e8SKurt Hackel dlm->num_joins++; 16786714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 16796714d8e8SKurt Hackel 16806714d8e8SKurt Hackel bail: 16816714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 16826714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 16838decab3cSSunil Mushran if (!status) { 16848decab3cSSunil Mushran printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name); 16856714d8e8SKurt Hackel __dlm_print_nodes(dlm); 16868decab3cSSunil Mushran } 16876714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 16886714d8e8SKurt Hackel 16896714d8e8SKurt Hackel if (ctxt) { 16906714d8e8SKurt Hackel /* Do we need to send a cancel message to any nodes? */ 16916714d8e8SKurt Hackel if (status < 0) { 16926714d8e8SKurt Hackel tmpstat = dlm_send_join_cancels(dlm, 16936714d8e8SKurt Hackel ctxt->yes_resp_map, 16946714d8e8SKurt Hackel sizeof(ctxt->yes_resp_map)); 16956714d8e8SKurt Hackel if (tmpstat < 0) 16966714d8e8SKurt Hackel mlog_errno(tmpstat); 16976714d8e8SKurt Hackel } 16986714d8e8SKurt Hackel kfree(ctxt); 16996714d8e8SKurt Hackel } 17006714d8e8SKurt Hackel 17016714d8e8SKurt Hackel mlog(0, "returning %d\n", status); 17026714d8e8SKurt Hackel return status; 17036714d8e8SKurt Hackel } 17046714d8e8SKurt Hackel 17056714d8e8SKurt Hackel static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) 17066714d8e8SKurt Hackel { 170758a3158aSSunil Mushran o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up); 170858a3158aSSunil Mushran o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down); 17096714d8e8SKurt Hackel o2net_unregister_handler_list(&dlm->dlm_domain_handlers); 17106714d8e8SKurt Hackel } 17116714d8e8SKurt Hackel 17126714d8e8SKurt Hackel static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) 17136714d8e8SKurt Hackel { 17146714d8e8SKurt Hackel int status; 17156714d8e8SKurt Hackel 17166714d8e8SKurt Hackel mlog(0, "registering handlers.\n"); 17176714d8e8SKurt Hackel 17186714d8e8SKurt Hackel o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, 17196714d8e8SKurt Hackel dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); 1720cdd09f49SJoseph Qi o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, 1721cdd09f49SJoseph Qi dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); 1722cdd09f49SJoseph Qi 172358a3158aSSunil Mushran status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down); 17246714d8e8SKurt Hackel if (status) 17256714d8e8SKurt Hackel goto bail; 17266714d8e8SKurt Hackel 172758a3158aSSunil Mushran status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up); 17286714d8e8SKurt Hackel if (status) 17296714d8e8SKurt Hackel goto bail; 17306714d8e8SKurt Hackel 17316714d8e8SKurt Hackel status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, 17326714d8e8SKurt Hackel sizeof(struct dlm_master_request), 17336714d8e8SKurt Hackel dlm_master_request_handler, 1734d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17356714d8e8SKurt Hackel if (status) 17366714d8e8SKurt Hackel goto bail; 17376714d8e8SKurt Hackel 17386714d8e8SKurt Hackel status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, 17396714d8e8SKurt Hackel sizeof(struct dlm_assert_master), 17406714d8e8SKurt Hackel dlm_assert_master_handler, 17413b8118cfSKurt Hackel dlm, dlm_assert_master_post_handler, 17423b8118cfSKurt Hackel &dlm->dlm_domain_handlers); 17436714d8e8SKurt Hackel if (status) 17446714d8e8SKurt Hackel goto bail; 17456714d8e8SKurt Hackel 17466714d8e8SKurt Hackel status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, 17476714d8e8SKurt Hackel sizeof(struct dlm_create_lock), 17486714d8e8SKurt Hackel dlm_create_lock_handler, 1749d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17506714d8e8SKurt Hackel if (status) 17516714d8e8SKurt Hackel goto bail; 17526714d8e8SKurt Hackel 17536714d8e8SKurt Hackel status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, 17546714d8e8SKurt Hackel DLM_CONVERT_LOCK_MAX_LEN, 17556714d8e8SKurt Hackel dlm_convert_lock_handler, 1756d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17576714d8e8SKurt Hackel if (status) 17586714d8e8SKurt Hackel goto bail; 17596714d8e8SKurt Hackel 17606714d8e8SKurt Hackel status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, 17616714d8e8SKurt Hackel DLM_UNLOCK_LOCK_MAX_LEN, 17626714d8e8SKurt Hackel dlm_unlock_lock_handler, 1763d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17646714d8e8SKurt Hackel if (status) 17656714d8e8SKurt Hackel goto bail; 17666714d8e8SKurt Hackel 17676714d8e8SKurt Hackel status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, 17686714d8e8SKurt Hackel DLM_PROXY_AST_MAX_LEN, 17696714d8e8SKurt Hackel dlm_proxy_ast_handler, 1770d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17716714d8e8SKurt Hackel if (status) 17726714d8e8SKurt Hackel goto bail; 17736714d8e8SKurt Hackel 17746714d8e8SKurt Hackel status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, 17756714d8e8SKurt Hackel sizeof(struct dlm_exit_domain), 17766714d8e8SKurt Hackel dlm_exit_domain_handler, 1777d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17786714d8e8SKurt Hackel if (status) 17796714d8e8SKurt Hackel goto bail; 17806714d8e8SKurt Hackel 1781ba2bf218SKurt Hackel status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key, 1782ba2bf218SKurt Hackel sizeof(struct dlm_deref_lockres), 1783ba2bf218SKurt Hackel dlm_deref_lockres_handler, 1784d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 1785ba2bf218SKurt Hackel if (status) 1786ba2bf218SKurt Hackel goto bail; 1787ba2bf218SKurt Hackel 17886714d8e8SKurt Hackel status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, 17896714d8e8SKurt Hackel sizeof(struct dlm_migrate_request), 17906714d8e8SKurt Hackel dlm_migrate_request_handler, 1791d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17926714d8e8SKurt Hackel if (status) 17936714d8e8SKurt Hackel goto bail; 17946714d8e8SKurt Hackel 17956714d8e8SKurt Hackel status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, 17966714d8e8SKurt Hackel DLM_MIG_LOCKRES_MAX_LEN, 17976714d8e8SKurt Hackel dlm_mig_lockres_handler, 1798d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 17996714d8e8SKurt Hackel if (status) 18006714d8e8SKurt Hackel goto bail; 18016714d8e8SKurt Hackel 18026714d8e8SKurt Hackel status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, 18036714d8e8SKurt Hackel sizeof(struct dlm_master_requery), 18046714d8e8SKurt Hackel dlm_master_requery_handler, 1805d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 18066714d8e8SKurt Hackel if (status) 18076714d8e8SKurt Hackel goto bail; 18086714d8e8SKurt Hackel 18096714d8e8SKurt Hackel status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, 18106714d8e8SKurt Hackel sizeof(struct dlm_lock_request), 18116714d8e8SKurt Hackel dlm_request_all_locks_handler, 1812d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 18136714d8e8SKurt Hackel if (status) 18146714d8e8SKurt Hackel goto bail; 18156714d8e8SKurt Hackel 18166714d8e8SKurt Hackel status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, 18176714d8e8SKurt Hackel sizeof(struct dlm_reco_data_done), 18186714d8e8SKurt Hackel dlm_reco_data_done_handler, 1819d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 18206714d8e8SKurt Hackel if (status) 18216714d8e8SKurt Hackel goto bail; 18226714d8e8SKurt Hackel 18236714d8e8SKurt Hackel status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, 18246714d8e8SKurt Hackel sizeof(struct dlm_begin_reco), 18256714d8e8SKurt Hackel dlm_begin_reco_handler, 1826d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 18276714d8e8SKurt Hackel if (status) 18286714d8e8SKurt Hackel goto bail; 18296714d8e8SKurt Hackel 18306714d8e8SKurt Hackel status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, 18316714d8e8SKurt Hackel sizeof(struct dlm_finalize_reco), 18326714d8e8SKurt Hackel dlm_finalize_reco_handler, 1833d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 18346714d8e8SKurt Hackel if (status) 18356714d8e8SKurt Hackel goto bail; 18366714d8e8SKurt Hackel 1837bddefdeeSSunil Mushran status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, 1838bddefdeeSSunil Mushran sizeof(struct dlm_exit_domain), 1839bddefdeeSSunil Mushran dlm_begin_exit_domain_handler, 1840bddefdeeSSunil Mushran dlm, NULL, &dlm->dlm_domain_handlers); 184160d663cbSxuejiufei if (status) 184260d663cbSxuejiufei goto bail; 1843bddefdeeSSunil Mushran 184460d663cbSxuejiufei status = o2net_register_handler(DLM_DEREF_LOCKRES_DONE, dlm->key, 184560d663cbSxuejiufei sizeof(struct dlm_deref_lockres_done), 184660d663cbSxuejiufei dlm_deref_lockres_done_handler, 184760d663cbSxuejiufei dlm, NULL, &dlm->dlm_domain_handlers); 18486714d8e8SKurt Hackel bail: 18496714d8e8SKurt Hackel if (status) 18506714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 18516714d8e8SKurt Hackel 18526714d8e8SKurt Hackel return status; 18536714d8e8SKurt Hackel } 18546714d8e8SKurt Hackel 18556714d8e8SKurt Hackel static int dlm_join_domain(struct dlm_ctxt *dlm) 18566714d8e8SKurt Hackel { 18576714d8e8SKurt Hackel int status; 18580dd82141SSunil Mushran unsigned int backoff; 18590dd82141SSunil Mushran unsigned int total_backoff = 0; 18605afc44e2SJoseph Qi char wq_name[O2NM_MAX_NAME_LEN]; 18616714d8e8SKurt Hackel 18626714d8e8SKurt Hackel BUG_ON(!dlm); 18636714d8e8SKurt Hackel 18646714d8e8SKurt Hackel mlog(0, "Join domain %s\n", dlm->name); 18656714d8e8SKurt Hackel 18666714d8e8SKurt Hackel status = dlm_register_domain_handlers(dlm); 18676714d8e8SKurt Hackel if (status) { 18686714d8e8SKurt Hackel mlog_errno(status); 18696714d8e8SKurt Hackel goto bail; 18706714d8e8SKurt Hackel } 18716714d8e8SKurt Hackel 18726714d8e8SKurt Hackel status = dlm_launch_thread(dlm); 18736714d8e8SKurt Hackel if (status < 0) { 18746714d8e8SKurt Hackel mlog_errno(status); 18756714d8e8SKurt Hackel goto bail; 18766714d8e8SKurt Hackel } 18776714d8e8SKurt Hackel 18786714d8e8SKurt Hackel status = dlm_launch_recovery_thread(dlm); 18796714d8e8SKurt Hackel if (status < 0) { 18806714d8e8SKurt Hackel mlog_errno(status); 18816714d8e8SKurt Hackel goto bail; 18826714d8e8SKurt Hackel } 18836714d8e8SKurt Hackel 1884e581595eSGreg Kroah-Hartman dlm_debug_init(dlm); 1885181a9a04SZongxun Wang 18865afc44e2SJoseph Qi snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name); 1887055fdcffSBhaktipriya Shridhar dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0); 18883156d267SKurt Hackel if (!dlm->dlm_worker) { 18893156d267SKurt Hackel status = -ENOMEM; 18903156d267SKurt Hackel mlog_errno(status); 18913156d267SKurt Hackel goto bail; 18923156d267SKurt Hackel } 18933156d267SKurt Hackel 18946714d8e8SKurt Hackel do { 18956714d8e8SKurt Hackel status = dlm_try_to_join_domain(dlm); 18966714d8e8SKurt Hackel 18976714d8e8SKurt Hackel /* If we're racing another node to the join, then we 18986714d8e8SKurt Hackel * need to back off temporarily and let them 18996714d8e8SKurt Hackel * complete. */ 19000dd82141SSunil Mushran #define DLM_JOIN_TIMEOUT_MSECS 90000 19016714d8e8SKurt Hackel if (status == -EAGAIN) { 19026714d8e8SKurt Hackel if (signal_pending(current)) { 19036714d8e8SKurt Hackel status = -ERESTARTSYS; 19046714d8e8SKurt Hackel goto bail; 19056714d8e8SKurt Hackel } 19066714d8e8SKurt Hackel 19077567c148SXue jiufei if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) { 19080dd82141SSunil Mushran status = -ERESTARTSYS; 19090dd82141SSunil Mushran mlog(ML_NOTICE, "Timed out joining dlm domain " 19100dd82141SSunil Mushran "%s after %u msecs\n", dlm->name, 19117567c148SXue jiufei total_backoff); 19120dd82141SSunil Mushran goto bail; 19130dd82141SSunil Mushran } 19140dd82141SSunil Mushran 19156714d8e8SKurt Hackel /* 19166714d8e8SKurt Hackel * <chip> After you! 19176714d8e8SKurt Hackel * <dale> No, after you! 19186714d8e8SKurt Hackel * <chip> I insist! 19196714d8e8SKurt Hackel * <dale> But you first! 19206714d8e8SKurt Hackel * ... 19216714d8e8SKurt Hackel */ 19226714d8e8SKurt Hackel backoff = (unsigned int)(jiffies & 0x3); 19236714d8e8SKurt Hackel backoff *= DLM_DOMAIN_BACKOFF_MS; 19240dd82141SSunil Mushran total_backoff += backoff; 19256714d8e8SKurt Hackel mlog(0, "backoff %d\n", backoff); 19266714d8e8SKurt Hackel msleep(backoff); 19276714d8e8SKurt Hackel } 19286714d8e8SKurt Hackel } while (status == -EAGAIN); 19296714d8e8SKurt Hackel 19306714d8e8SKurt Hackel if (status < 0) { 19316714d8e8SKurt Hackel mlog_errno(status); 19326714d8e8SKurt Hackel goto bail; 19336714d8e8SKurt Hackel } 19346714d8e8SKurt Hackel 19356714d8e8SKurt Hackel status = 0; 19366714d8e8SKurt Hackel bail: 19376714d8e8SKurt Hackel wake_up(&dlm_domain_events); 19386714d8e8SKurt Hackel 19396714d8e8SKurt Hackel if (status) { 19406714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 1941007dce53SSunil Mushran dlm_debug_shutdown(dlm); 19426714d8e8SKurt Hackel dlm_complete_thread(dlm); 19436714d8e8SKurt Hackel dlm_complete_recovery_thread(dlm); 19443156d267SKurt Hackel dlm_destroy_dlm_worker(dlm); 19456714d8e8SKurt Hackel } 19466714d8e8SKurt Hackel 19476714d8e8SKurt Hackel return status; 19486714d8e8SKurt Hackel } 19496714d8e8SKurt Hackel 19506714d8e8SKurt Hackel static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, 19516714d8e8SKurt Hackel u32 key) 19526714d8e8SKurt Hackel { 19536714d8e8SKurt Hackel int i; 19546325b4a2SSunil Mushran int ret; 19556714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 19566714d8e8SKurt Hackel 1957cd861280SRobert P. J. Day dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); 19586714d8e8SKurt Hackel if (!dlm) { 1959190a7721SJoseph Qi ret = -ENOMEM; 1960190a7721SJoseph Qi mlog_errno(ret); 19616714d8e8SKurt Hackel goto leave; 19626714d8e8SKurt Hackel } 19636714d8e8SKurt Hackel 1964316ce2baSJulia Lawall dlm->name = kstrdup(domain, GFP_KERNEL); 19656714d8e8SKurt Hackel if (dlm->name == NULL) { 1966190a7721SJoseph Qi ret = -ENOMEM; 1967190a7721SJoseph Qi mlog_errno(ret); 19686714d8e8SKurt Hackel goto leave; 19696714d8e8SKurt Hackel } 19706714d8e8SKurt Hackel 197103d864c0SDaniel Phillips dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); 197281f2094aSMark Fasheh if (!dlm->lockres_hash) { 1973190a7721SJoseph Qi ret = -ENOMEM; 1974190a7721SJoseph Qi mlog_errno(ret); 19756714d8e8SKurt Hackel goto leave; 19766714d8e8SKurt Hackel } 19776714d8e8SKurt Hackel 197881f2094aSMark Fasheh for (i = 0; i < DLM_HASH_BUCKETS; i++) 197903d864c0SDaniel Phillips INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); 19806714d8e8SKurt Hackel 1981e2b66ddcSSunil Mushran dlm->master_hash = (struct hlist_head **) 1982e2b66ddcSSunil Mushran dlm_alloc_pagevec(DLM_HASH_PAGES); 1983e2b66ddcSSunil Mushran if (!dlm->master_hash) { 1984190a7721SJoseph Qi ret = -ENOMEM; 1985190a7721SJoseph Qi mlog_errno(ret); 1986e2b66ddcSSunil Mushran goto leave; 1987e2b66ddcSSunil Mushran } 1988e2b66ddcSSunil Mushran 1989e2b66ddcSSunil Mushran for (i = 0; i < DLM_HASH_BUCKETS; i++) 1990e2b66ddcSSunil Mushran INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); 1991e2b66ddcSSunil Mushran 19926714d8e8SKurt Hackel dlm->key = key; 19936714d8e8SKurt Hackel dlm->node_num = o2nm_this_node(); 19946714d8e8SKurt Hackel 19956325b4a2SSunil Mushran ret = dlm_create_debugfs_subroot(dlm); 1996190a7721SJoseph Qi if (ret < 0) 19976325b4a2SSunil Mushran goto leave; 19986325b4a2SSunil Mushran 19996714d8e8SKurt Hackel spin_lock_init(&dlm->spinlock); 20006714d8e8SKurt Hackel spin_lock_init(&dlm->master_lock); 20016714d8e8SKurt Hackel spin_lock_init(&dlm->ast_lock); 2002b0d4f817SSunil Mushran spin_lock_init(&dlm->track_lock); 20036714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->list); 20046714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dirty_list); 20056714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->reco.resources); 20066714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->reco.node_data); 20076714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->purge_list); 20086714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dlm_domain_handlers); 200929576f8bSSunil Mushran INIT_LIST_HEAD(&dlm->tracking_list); 20106714d8e8SKurt Hackel dlm->reco.state = 0; 20116714d8e8SKurt Hackel 20126714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->pending_asts); 20136714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->pending_basts); 20146714d8e8SKurt Hackel 20156714d8e8SKurt Hackel mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n", 20166714d8e8SKurt Hackel dlm->recovery_map, &(dlm->recovery_map[0])); 20176714d8e8SKurt Hackel 20186714d8e8SKurt Hackel memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map)); 20196714d8e8SKurt Hackel memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map)); 20206714d8e8SKurt Hackel memset(dlm->domain_map, 0, sizeof(dlm->domain_map)); 20216714d8e8SKurt Hackel 20226714d8e8SKurt Hackel dlm->dlm_thread_task = NULL; 20236714d8e8SKurt Hackel dlm->dlm_reco_thread_task = NULL; 20243156d267SKurt Hackel dlm->dlm_worker = NULL; 20256714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_thread_wq); 20266714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_reco_thread_wq); 20276714d8e8SKurt Hackel init_waitqueue_head(&dlm->reco.event); 20286714d8e8SKurt Hackel init_waitqueue_head(&dlm->ast_wq); 20296714d8e8SKurt Hackel init_waitqueue_head(&dlm->migration_wq); 20306714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->mle_hb_events); 20316714d8e8SKurt Hackel 20326714d8e8SKurt Hackel dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; 20336714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_join_events); 20346714d8e8SKurt Hackel 203560c7ec9eSpiaojun dlm->migrate_done = 0; 203660c7ec9eSpiaojun 20376714d8e8SKurt Hackel dlm->reco.new_master = O2NM_INVALID_NODE_NUM; 20386714d8e8SKurt Hackel dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 20396714d8e8SKurt Hackel 20406800791aSSunil Mushran atomic_set(&dlm->res_tot_count, 0); 20416800791aSSunil Mushran atomic_set(&dlm->res_cur_count, 0); 20422041d8fdSSunil Mushran for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { 20432041d8fdSSunil Mushran atomic_set(&dlm->mle_tot_count[i], 0); 20442041d8fdSSunil Mushran atomic_set(&dlm->mle_cur_count[i], 0); 20452041d8fdSSunil Mushran } 20462041d8fdSSunil Mushran 20476714d8e8SKurt Hackel spin_lock_init(&dlm->work_lock); 20486714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->work_list); 2049c4028958SDavid Howells INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work); 20506714d8e8SKurt Hackel 20516714d8e8SKurt Hackel kref_init(&dlm->dlm_refs); 20526714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_NEW; 20536714d8e8SKurt Hackel 20546714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); 20556714d8e8SKurt Hackel 20566714d8e8SKurt Hackel mlog(0, "context init: refcount %u\n", 20572c935bc5SPeter Zijlstra kref_read(&dlm->dlm_refs)); 20586714d8e8SKurt Hackel 20596714d8e8SKurt Hackel leave: 2060190a7721SJoseph Qi if (ret < 0 && dlm) { 2061190a7721SJoseph Qi if (dlm->master_hash) 2062190a7721SJoseph Qi dlm_free_pagevec((void **)dlm->master_hash, 2063190a7721SJoseph Qi DLM_HASH_PAGES); 2064190a7721SJoseph Qi 2065190a7721SJoseph Qi if (dlm->lockres_hash) 2066190a7721SJoseph Qi dlm_free_pagevec((void **)dlm->lockres_hash, 2067190a7721SJoseph Qi DLM_HASH_PAGES); 2068190a7721SJoseph Qi 2069190a7721SJoseph Qi kfree(dlm->name); 2070190a7721SJoseph Qi kfree(dlm); 2071190a7721SJoseph Qi dlm = NULL; 2072190a7721SJoseph Qi } 20736714d8e8SKurt Hackel return dlm; 20746714d8e8SKurt Hackel } 20756714d8e8SKurt Hackel 20766714d8e8SKurt Hackel /* 2077d24fbcdaSJoel Becker * Compare a requested locking protocol version against the current one. 2078d24fbcdaSJoel Becker * 2079d24fbcdaSJoel Becker * If the major numbers are different, they are incompatible. 2080d24fbcdaSJoel Becker * If the current minor is greater than the request, they are incompatible. 2081d24fbcdaSJoel Becker * If the current minor is less than or equal to the request, they are 2082d24fbcdaSJoel Becker * compatible, and the requester should run at the current minor version. 2083d24fbcdaSJoel Becker */ 2084d24fbcdaSJoel Becker static int dlm_protocol_compare(struct dlm_protocol_version *existing, 2085d24fbcdaSJoel Becker struct dlm_protocol_version *request) 2086d24fbcdaSJoel Becker { 2087d24fbcdaSJoel Becker if (existing->pv_major != request->pv_major) 2088d24fbcdaSJoel Becker return 1; 2089d24fbcdaSJoel Becker 2090d24fbcdaSJoel Becker if (existing->pv_minor > request->pv_minor) 2091d24fbcdaSJoel Becker return 1; 2092d24fbcdaSJoel Becker 2093d24fbcdaSJoel Becker if (existing->pv_minor < request->pv_minor) 2094d24fbcdaSJoel Becker request->pv_minor = existing->pv_minor; 2095d24fbcdaSJoel Becker 2096d24fbcdaSJoel Becker return 0; 2097d24fbcdaSJoel Becker } 2098d24fbcdaSJoel Becker 2099d24fbcdaSJoel Becker /* 2100d24fbcdaSJoel Becker * dlm_register_domain: one-time setup per "domain". 2101d24fbcdaSJoel Becker * 2102d24fbcdaSJoel Becker * The filesystem passes in the requested locking version via proto. 2103d24fbcdaSJoel Becker * If registration was successful, proto will contain the negotiated 2104d24fbcdaSJoel Becker * locking protocol. 21056714d8e8SKurt Hackel */ 21066714d8e8SKurt Hackel struct dlm_ctxt * dlm_register_domain(const char *domain, 2107d24fbcdaSJoel Becker u32 key, 2108d24fbcdaSJoel Becker struct dlm_protocol_version *fs_proto) 21096714d8e8SKurt Hackel { 21106714d8e8SKurt Hackel int ret; 21116714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 21126714d8e8SKurt Hackel struct dlm_ctxt *new_ctxt = NULL; 21136714d8e8SKurt Hackel 2114e372357bSDan Carpenter if (strlen(domain) >= O2NM_MAX_NAME_LEN) { 21156714d8e8SKurt Hackel ret = -ENAMETOOLONG; 21166714d8e8SKurt Hackel mlog(ML_ERROR, "domain name length too long\n"); 21176714d8e8SKurt Hackel goto leave; 21186714d8e8SKurt Hackel } 21196714d8e8SKurt Hackel 21206714d8e8SKurt Hackel mlog(0, "register called for domain \"%s\"\n", domain); 21216714d8e8SKurt Hackel 21226714d8e8SKurt Hackel retry: 21236714d8e8SKurt Hackel dlm = NULL; 21246714d8e8SKurt Hackel if (signal_pending(current)) { 21256714d8e8SKurt Hackel ret = -ERESTARTSYS; 21266714d8e8SKurt Hackel mlog_errno(ret); 21276714d8e8SKurt Hackel goto leave; 21286714d8e8SKurt Hackel } 21296714d8e8SKurt Hackel 21306714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 21316714d8e8SKurt Hackel 21326714d8e8SKurt Hackel dlm = __dlm_lookup_domain(domain); 21336714d8e8SKurt Hackel if (dlm) { 21346714d8e8SKurt Hackel if (dlm->dlm_state != DLM_CTXT_JOINED) { 21356714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 21366714d8e8SKurt Hackel 21376714d8e8SKurt Hackel mlog(0, "This ctxt is not joined yet!\n"); 21386714d8e8SKurt Hackel wait_event_interruptible(dlm_domain_events, 21396714d8e8SKurt Hackel dlm_wait_on_domain_helper( 21406714d8e8SKurt Hackel domain)); 21416714d8e8SKurt Hackel goto retry; 21426714d8e8SKurt Hackel } 21436714d8e8SKurt Hackel 2144d24fbcdaSJoel Becker if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { 21456469272cSJulia Lawall spin_unlock(&dlm_domain_lock); 2146d24fbcdaSJoel Becker mlog(ML_ERROR, 2147d24fbcdaSJoel Becker "Requested locking protocol version is not " 2148d24fbcdaSJoel Becker "compatible with already registered domain " 2149d24fbcdaSJoel Becker "\"%s\"\n", domain); 2150d24fbcdaSJoel Becker ret = -EPROTO; 2151d24fbcdaSJoel Becker goto leave; 2152d24fbcdaSJoel Becker } 2153d24fbcdaSJoel Becker 21546714d8e8SKurt Hackel __dlm_get(dlm); 21556714d8e8SKurt Hackel dlm->num_joins++; 21566714d8e8SKurt Hackel 21576714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 21586714d8e8SKurt Hackel 21596714d8e8SKurt Hackel ret = 0; 21606714d8e8SKurt Hackel goto leave; 21616714d8e8SKurt Hackel } 21626714d8e8SKurt Hackel 21636714d8e8SKurt Hackel /* doesn't exist */ 21646714d8e8SKurt Hackel if (!new_ctxt) { 21656714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 21666714d8e8SKurt Hackel 21676714d8e8SKurt Hackel new_ctxt = dlm_alloc_ctxt(domain, key); 21686714d8e8SKurt Hackel if (new_ctxt) 21696714d8e8SKurt Hackel goto retry; 21706714d8e8SKurt Hackel 21716714d8e8SKurt Hackel ret = -ENOMEM; 21726714d8e8SKurt Hackel mlog_errno(ret); 21736714d8e8SKurt Hackel goto leave; 21746714d8e8SKurt Hackel } 21756714d8e8SKurt Hackel 21766714d8e8SKurt Hackel /* a little variable switch-a-roo here... */ 21776714d8e8SKurt Hackel dlm = new_ctxt; 21786714d8e8SKurt Hackel new_ctxt = NULL; 21796714d8e8SKurt Hackel 21806714d8e8SKurt Hackel /* add the new domain */ 21816714d8e8SKurt Hackel list_add_tail(&dlm->list, &dlm_domains); 21826714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 21836714d8e8SKurt Hackel 2184d24fbcdaSJoel Becker /* 2185d24fbcdaSJoel Becker * Pass the locking protocol version into the join. If the join 2186d24fbcdaSJoel Becker * succeeds, it will have the negotiated protocol set. 2187d24fbcdaSJoel Becker */ 2188d24fbcdaSJoel Becker dlm->dlm_locking_proto = dlm_protocol; 2189d24fbcdaSJoel Becker dlm->fs_locking_proto = *fs_proto; 2190d24fbcdaSJoel Becker 21916714d8e8SKurt Hackel ret = dlm_join_domain(dlm); 21926714d8e8SKurt Hackel if (ret) { 21936714d8e8SKurt Hackel mlog_errno(ret); 21946714d8e8SKurt Hackel dlm_put(dlm); 21956714d8e8SKurt Hackel goto leave; 21966714d8e8SKurt Hackel } 21976714d8e8SKurt Hackel 2198d24fbcdaSJoel Becker /* Tell the caller what locking protocol we negotiated */ 2199d24fbcdaSJoel Becker *fs_proto = dlm->fs_locking_proto; 2200d24fbcdaSJoel Becker 22016714d8e8SKurt Hackel ret = 0; 22026714d8e8SKurt Hackel leave: 22036714d8e8SKurt Hackel if (new_ctxt) 22046714d8e8SKurt Hackel dlm_free_ctxt_mem(new_ctxt); 22056714d8e8SKurt Hackel 22066714d8e8SKurt Hackel if (ret < 0) 22076714d8e8SKurt Hackel dlm = ERR_PTR(ret); 22086714d8e8SKurt Hackel 22096714d8e8SKurt Hackel return dlm; 22106714d8e8SKurt Hackel } 22116714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_register_domain); 22126714d8e8SKurt Hackel 22136714d8e8SKurt Hackel static LIST_HEAD(dlm_join_handlers); 22146714d8e8SKurt Hackel 22156714d8e8SKurt Hackel static void dlm_unregister_net_handlers(void) 22166714d8e8SKurt Hackel { 22176714d8e8SKurt Hackel o2net_unregister_handler_list(&dlm_join_handlers); 22186714d8e8SKurt Hackel } 22196714d8e8SKurt Hackel 22206714d8e8SKurt Hackel static int dlm_register_net_handlers(void) 22216714d8e8SKurt Hackel { 22226714d8e8SKurt Hackel int status = 0; 22236714d8e8SKurt Hackel 22246714d8e8SKurt Hackel status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, 22256714d8e8SKurt Hackel sizeof(struct dlm_query_join_request), 22266714d8e8SKurt Hackel dlm_query_join_handler, 2227d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 22286714d8e8SKurt Hackel if (status) 22296714d8e8SKurt Hackel goto bail; 22306714d8e8SKurt Hackel 22316714d8e8SKurt Hackel status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 22326714d8e8SKurt Hackel sizeof(struct dlm_assert_joined), 22336714d8e8SKurt Hackel dlm_assert_joined_handler, 2234d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 22356714d8e8SKurt Hackel if (status) 22366714d8e8SKurt Hackel goto bail; 22376714d8e8SKurt Hackel 22386714d8e8SKurt Hackel status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 22396714d8e8SKurt Hackel sizeof(struct dlm_cancel_join), 22406714d8e8SKurt Hackel dlm_cancel_join_handler, 2241d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 2242ea203441SSunil Mushran if (status) 2243ea203441SSunil Mushran goto bail; 2244ea203441SSunil Mushran 2245ea203441SSunil Mushran status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, 2246ea203441SSunil Mushran sizeof(struct dlm_query_region), 2247ea203441SSunil Mushran dlm_query_region_handler, 2248ea203441SSunil Mushran NULL, NULL, &dlm_join_handlers); 22496714d8e8SKurt Hackel 225018cfdf1bSSunil Mushran if (status) 225118cfdf1bSSunil Mushran goto bail; 225218cfdf1bSSunil Mushran 225318cfdf1bSSunil Mushran status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, 225418cfdf1bSSunil Mushran sizeof(struct dlm_query_nodeinfo), 225518cfdf1bSSunil Mushran dlm_query_nodeinfo_handler, 225618cfdf1bSSunil Mushran NULL, NULL, &dlm_join_handlers); 22576714d8e8SKurt Hackel bail: 22586714d8e8SKurt Hackel if (status < 0) 22596714d8e8SKurt Hackel dlm_unregister_net_handlers(); 22606714d8e8SKurt Hackel 22616714d8e8SKurt Hackel return status; 22626714d8e8SKurt Hackel } 22636714d8e8SKurt Hackel 22646714d8e8SKurt Hackel /* Domain eviction callback handling. 22656714d8e8SKurt Hackel * 22666714d8e8SKurt Hackel * The file system requires notification of node death *before* the 22676714d8e8SKurt Hackel * dlm completes it's recovery work, otherwise it may be able to 22686714d8e8SKurt Hackel * acquire locks on resources requiring recovery. Since the dlm can 22696714d8e8SKurt Hackel * evict a node from it's domain *before* heartbeat fires, a similar 22706714d8e8SKurt Hackel * mechanism is required. */ 22716714d8e8SKurt Hackel 22726714d8e8SKurt Hackel /* Eviction is not expected to happen often, so a per-domain lock is 22736714d8e8SKurt Hackel * not necessary. Eviction callbacks are allowed to sleep for short 22746714d8e8SKurt Hackel * periods of time. */ 22756714d8e8SKurt Hackel static DECLARE_RWSEM(dlm_callback_sem); 22766714d8e8SKurt Hackel 22776714d8e8SKurt Hackel void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, 22786714d8e8SKurt Hackel int node_num) 22796714d8e8SKurt Hackel { 22806714d8e8SKurt Hackel struct dlm_eviction_cb *cb; 22816714d8e8SKurt Hackel 22826714d8e8SKurt Hackel down_read(&dlm_callback_sem); 2283df53cd3bSDong Fang list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { 22846714d8e8SKurt Hackel cb->ec_func(node_num, cb->ec_data); 22856714d8e8SKurt Hackel } 22866714d8e8SKurt Hackel up_read(&dlm_callback_sem); 22876714d8e8SKurt Hackel } 22886714d8e8SKurt Hackel 22896714d8e8SKurt Hackel void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb, 22906714d8e8SKurt Hackel dlm_eviction_func *f, 22916714d8e8SKurt Hackel void *data) 22926714d8e8SKurt Hackel { 22936714d8e8SKurt Hackel INIT_LIST_HEAD(&cb->ec_item); 22946714d8e8SKurt Hackel cb->ec_func = f; 22956714d8e8SKurt Hackel cb->ec_data = data; 22966714d8e8SKurt Hackel } 22976714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_setup_eviction_cb); 22986714d8e8SKurt Hackel 22996714d8e8SKurt Hackel void dlm_register_eviction_cb(struct dlm_ctxt *dlm, 23006714d8e8SKurt Hackel struct dlm_eviction_cb *cb) 23016714d8e8SKurt Hackel { 23026714d8e8SKurt Hackel down_write(&dlm_callback_sem); 23036714d8e8SKurt Hackel list_add_tail(&cb->ec_item, &dlm->dlm_eviction_callbacks); 23046714d8e8SKurt Hackel up_write(&dlm_callback_sem); 23056714d8e8SKurt Hackel } 23066714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_register_eviction_cb); 23076714d8e8SKurt Hackel 23086714d8e8SKurt Hackel void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb) 23096714d8e8SKurt Hackel { 23106714d8e8SKurt Hackel down_write(&dlm_callback_sem); 23116714d8e8SKurt Hackel list_del_init(&cb->ec_item); 23126714d8e8SKurt Hackel up_write(&dlm_callback_sem); 23136714d8e8SKurt Hackel } 23146714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_unregister_eviction_cb); 23156714d8e8SKurt Hackel 23166714d8e8SKurt Hackel static int __init dlm_init(void) 23176714d8e8SKurt Hackel { 23186714d8e8SKurt Hackel int status; 23196714d8e8SKurt Hackel 23206714d8e8SKurt Hackel status = dlm_init_mle_cache(); 232112eb0035SSunil Mushran if (status) { 232212eb0035SSunil Mushran mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); 2323724bdca9SSunil Mushran goto error; 2324724bdca9SSunil Mushran } 2325724bdca9SSunil Mushran 2326724bdca9SSunil Mushran status = dlm_init_master_caches(); 2327724bdca9SSunil Mushran if (status) { 2328724bdca9SSunil Mushran mlog(ML_ERROR, "Could not create o2dlm_lockres and " 2329724bdca9SSunil Mushran "o2dlm_lockname slabcaches\n"); 2330724bdca9SSunil Mushran goto error; 2331724bdca9SSunil Mushran } 2332724bdca9SSunil Mushran 2333724bdca9SSunil Mushran status = dlm_init_lock_cache(); 2334724bdca9SSunil Mushran if (status) { 2335724bdca9SSunil Mushran mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n"); 2336724bdca9SSunil Mushran goto error; 233712eb0035SSunil Mushran } 23386714d8e8SKurt Hackel 23396714d8e8SKurt Hackel status = dlm_register_net_handlers(); 23406714d8e8SKurt Hackel if (status) { 2341724bdca9SSunil Mushran mlog(ML_ERROR, "Unable to register network handlers\n"); 2342724bdca9SSunil Mushran goto error; 23436714d8e8SKurt Hackel } 23446714d8e8SKurt Hackel 2345e581595eSGreg Kroah-Hartman dlm_create_debugfs_root(); 23466325b4a2SSunil Mushran 23476714d8e8SKurt Hackel return 0; 2348724bdca9SSunil Mushran error: 23496325b4a2SSunil Mushran dlm_unregister_net_handlers(); 2350724bdca9SSunil Mushran dlm_destroy_lock_cache(); 2351724bdca9SSunil Mushran dlm_destroy_master_caches(); 2352724bdca9SSunil Mushran dlm_destroy_mle_cache(); 2353724bdca9SSunil Mushran return -1; 23546714d8e8SKurt Hackel } 23556714d8e8SKurt Hackel 23566714d8e8SKurt Hackel static void __exit dlm_exit (void) 23576714d8e8SKurt Hackel { 23586325b4a2SSunil Mushran dlm_destroy_debugfs_root(); 23596714d8e8SKurt Hackel dlm_unregister_net_handlers(); 2360724bdca9SSunil Mushran dlm_destroy_lock_cache(); 2361724bdca9SSunil Mushran dlm_destroy_master_caches(); 23626714d8e8SKurt Hackel dlm_destroy_mle_cache(); 23636714d8e8SKurt Hackel } 23646714d8e8SKurt Hackel 23656714d8e8SKurt Hackel MODULE_AUTHOR("Oracle"); 23666714d8e8SKurt Hackel MODULE_LICENSE("GPL"); 2367ff8fb335SGoldwyn Rodrigues MODULE_DESCRIPTION("OCFS2 Distributed Lock Management"); 23686714d8e8SKurt Hackel 23696714d8e8SKurt Hackel module_init(dlm_init); 23706714d8e8SKurt Hackel module_exit(dlm_exit); 2371