16714d8e8SKurt Hackel /* -*- mode: c; c-basic-offset: 8; -*- 26714d8e8SKurt Hackel * vim: noexpandtab sw=8 ts=8 sts=0: 36714d8e8SKurt Hackel * 46714d8e8SKurt Hackel * dlmdomain.c 56714d8e8SKurt Hackel * 66714d8e8SKurt Hackel * defines domain join / leave apis 76714d8e8SKurt Hackel * 86714d8e8SKurt Hackel * Copyright (C) 2004 Oracle. All rights reserved. 96714d8e8SKurt Hackel * 106714d8e8SKurt Hackel * This program is free software; you can redistribute it and/or 116714d8e8SKurt Hackel * modify it under the terms of the GNU General Public 126714d8e8SKurt Hackel * License as published by the Free Software Foundation; either 136714d8e8SKurt Hackel * version 2 of the License, or (at your option) any later version. 146714d8e8SKurt Hackel * 156714d8e8SKurt Hackel * This program is distributed in the hope that it will be useful, 166714d8e8SKurt Hackel * but WITHOUT ANY WARRANTY; without even the implied warranty of 176714d8e8SKurt Hackel * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 186714d8e8SKurt Hackel * General Public License for more details. 196714d8e8SKurt Hackel * 206714d8e8SKurt Hackel * You should have received a copy of the GNU General Public 216714d8e8SKurt Hackel * License along with this program; if not, write to the 226714d8e8SKurt Hackel * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 236714d8e8SKurt Hackel * Boston, MA 021110-1307, USA. 246714d8e8SKurt Hackel * 256714d8e8SKurt Hackel */ 266714d8e8SKurt Hackel 276714d8e8SKurt Hackel #include <linux/module.h> 286714d8e8SKurt Hackel #include <linux/types.h> 296714d8e8SKurt Hackel #include <linux/slab.h> 306714d8e8SKurt Hackel #include <linux/highmem.h> 316714d8e8SKurt Hackel #include <linux/utsname.h> 326714d8e8SKurt Hackel #include <linux/init.h> 336714d8e8SKurt Hackel #include <linux/spinlock.h> 346714d8e8SKurt Hackel #include <linux/delay.h> 356714d8e8SKurt Hackel #include <linux/err.h> 366714d8e8SKurt Hackel 376714d8e8SKurt Hackel #include "cluster/heartbeat.h" 386714d8e8SKurt Hackel #include "cluster/nodemanager.h" 396714d8e8SKurt Hackel #include "cluster/tcp.h" 406714d8e8SKurt Hackel 416714d8e8SKurt Hackel #include "dlmapi.h" 426714d8e8SKurt Hackel #include "dlmcommon.h" 436714d8e8SKurt Hackel 446714d8e8SKurt Hackel #include "dlmdomain.h" 456714d8e8SKurt Hackel 466714d8e8SKurt Hackel #include "dlmver.h" 476714d8e8SKurt Hackel 486714d8e8SKurt Hackel #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 496714d8e8SKurt Hackel #include "cluster/masklog.h" 506714d8e8SKurt Hackel 511faf2894SSrinivas Eeda /* 521faf2894SSrinivas Eeda * ocfs2 node maps are array of long int, which limits to send them freely 531faf2894SSrinivas Eeda * across the wire due to endianness issues. To workaround this, we convert 541faf2894SSrinivas Eeda * long ints to byte arrays. Following 3 routines are helper functions to 551faf2894SSrinivas Eeda * set/test/copy bits within those array of bytes 561faf2894SSrinivas Eeda */ 571faf2894SSrinivas Eeda static inline void byte_set_bit(u8 nr, u8 map[]) 581faf2894SSrinivas Eeda { 591faf2894SSrinivas Eeda map[nr >> 3] |= (1UL << (nr & 7)); 601faf2894SSrinivas Eeda } 611faf2894SSrinivas Eeda 621faf2894SSrinivas Eeda static inline int byte_test_bit(u8 nr, u8 map[]) 631faf2894SSrinivas Eeda { 641faf2894SSrinivas Eeda return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; 651faf2894SSrinivas Eeda } 661faf2894SSrinivas Eeda 671faf2894SSrinivas Eeda static inline void byte_copymap(u8 dmap[], unsigned long smap[], 681faf2894SSrinivas Eeda unsigned int sz) 691faf2894SSrinivas Eeda { 701faf2894SSrinivas Eeda unsigned int nn; 711faf2894SSrinivas Eeda 721faf2894SSrinivas Eeda if (!sz) 731faf2894SSrinivas Eeda return; 741faf2894SSrinivas Eeda 751faf2894SSrinivas Eeda memset(dmap, 0, ((sz + 7) >> 3)); 761faf2894SSrinivas Eeda for (nn = 0 ; nn < sz; nn++) 771faf2894SSrinivas Eeda if (test_bit(nn, smap)) 781faf2894SSrinivas Eeda byte_set_bit(nn, dmap); 791faf2894SSrinivas Eeda } 801faf2894SSrinivas Eeda 8103d864c0SDaniel Phillips static void dlm_free_pagevec(void **vec, int pages) 8203d864c0SDaniel Phillips { 8303d864c0SDaniel Phillips while (pages--) 8403d864c0SDaniel Phillips free_page((unsigned long)vec[pages]); 8503d864c0SDaniel Phillips kfree(vec); 8603d864c0SDaniel Phillips } 8703d864c0SDaniel Phillips 8803d864c0SDaniel Phillips static void **dlm_alloc_pagevec(int pages) 8903d864c0SDaniel Phillips { 9003d864c0SDaniel Phillips void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL); 9103d864c0SDaniel Phillips int i; 9203d864c0SDaniel Phillips 9303d864c0SDaniel Phillips if (!vec) 9403d864c0SDaniel Phillips return NULL; 9503d864c0SDaniel Phillips 9603d864c0SDaniel Phillips for (i = 0; i < pages; i++) 9703d864c0SDaniel Phillips if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) 9803d864c0SDaniel Phillips goto out_free; 99c8f33b6eSJoel Becker 100685f1adbSMark Fasheh mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n", 101f5a923d1SMark Fasheh pages, (unsigned long)DLM_HASH_PAGES, 102f5a923d1SMark Fasheh (unsigned long)DLM_BUCKETS_PER_PAGE); 10303d864c0SDaniel Phillips return vec; 10403d864c0SDaniel Phillips out_free: 10503d864c0SDaniel Phillips dlm_free_pagevec(vec, i); 10603d864c0SDaniel Phillips return NULL; 10703d864c0SDaniel Phillips } 10803d864c0SDaniel Phillips 1096714d8e8SKurt Hackel /* 1106714d8e8SKurt Hackel * 1116714d8e8SKurt Hackel * spinlock lock ordering: if multiple locks are needed, obey this ordering: 1126714d8e8SKurt Hackel * dlm_domain_lock 1136714d8e8SKurt Hackel * struct dlm_ctxt->spinlock 1146714d8e8SKurt Hackel * struct dlm_lock_resource->spinlock 1156714d8e8SKurt Hackel * struct dlm_ctxt->master_lock 1166714d8e8SKurt Hackel * struct dlm_ctxt->ast_lock 1176714d8e8SKurt Hackel * dlm_master_list_entry->spinlock 1186714d8e8SKurt Hackel * dlm_lock->spinlock 1196714d8e8SKurt Hackel * 1206714d8e8SKurt Hackel */ 1216714d8e8SKurt Hackel 12234af946aSIngo Molnar DEFINE_SPINLOCK(dlm_domain_lock); 1236714d8e8SKurt Hackel LIST_HEAD(dlm_domains); 1246714d8e8SKurt Hackel static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); 1256714d8e8SKurt Hackel 1266714d8e8SKurt Hackel #define DLM_DOMAIN_BACKOFF_MS 200 1276714d8e8SKurt Hackel 128d74c9803SKurt Hackel static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, 129d74c9803SKurt Hackel void **ret_data); 130d74c9803SKurt Hackel static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, 131d74c9803SKurt Hackel void **ret_data); 132d74c9803SKurt Hackel static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 133d74c9803SKurt Hackel void **ret_data); 134d74c9803SKurt Hackel static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 135d74c9803SKurt Hackel void **ret_data); 1366714d8e8SKurt Hackel 1376714d8e8SKurt Hackel static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); 1386714d8e8SKurt Hackel 1396714d8e8SKurt Hackel void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) 1406714d8e8SKurt Hackel { 14181f2094aSMark Fasheh hlist_del_init(&lockres->hash_node); 1426714d8e8SKurt Hackel dlm_lockres_put(lockres); 1436714d8e8SKurt Hackel } 1446714d8e8SKurt Hackel 1456714d8e8SKurt Hackel void __dlm_insert_lockres(struct dlm_ctxt *dlm, 1466714d8e8SKurt Hackel struct dlm_lock_resource *res) 1476714d8e8SKurt Hackel { 14881f2094aSMark Fasheh struct hlist_head *bucket; 1496714d8e8SKurt Hackel struct qstr *q; 1506714d8e8SKurt Hackel 1516714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 1526714d8e8SKurt Hackel 1536714d8e8SKurt Hackel q = &res->lockname; 15403d864c0SDaniel Phillips bucket = dlm_lockres_hash(dlm, q->hash); 1556714d8e8SKurt Hackel 1566714d8e8SKurt Hackel /* get a reference for our hashtable */ 1576714d8e8SKurt Hackel dlm_lockres_get(res); 1586714d8e8SKurt Hackel 15981f2094aSMark Fasheh hlist_add_head(&res->hash_node, bucket); 1606714d8e8SKurt Hackel } 1616714d8e8SKurt Hackel 162ba2bf218SKurt Hackel struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, 1636714d8e8SKurt Hackel const char *name, 164a3d33291SMark Fasheh unsigned int len, 165a3d33291SMark Fasheh unsigned int hash) 1666714d8e8SKurt Hackel { 16781f2094aSMark Fasheh struct hlist_head *bucket; 1684198985fSDaniel Phillips struct hlist_node *list; 1696714d8e8SKurt Hackel 1706714d8e8SKurt Hackel mlog_entry("%.*s\n", len, name); 1716714d8e8SKurt Hackel 1726714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 1736714d8e8SKurt Hackel 17403d864c0SDaniel Phillips bucket = dlm_lockres_hash(dlm, hash); 17503d864c0SDaniel Phillips 1764198985fSDaniel Phillips hlist_for_each(list, bucket) { 1774198985fSDaniel Phillips struct dlm_lock_resource *res = hlist_entry(list, 1784198985fSDaniel Phillips struct dlm_lock_resource, hash_node); 1794198985fSDaniel Phillips if (res->lockname.name[0] != name[0]) 1804198985fSDaniel Phillips continue; 1814198985fSDaniel Phillips if (unlikely(res->lockname.len != len)) 1824198985fSDaniel Phillips continue; 1834198985fSDaniel Phillips if (memcmp(res->lockname.name + 1, name + 1, len - 1)) 1844198985fSDaniel Phillips continue; 1854198985fSDaniel Phillips dlm_lockres_get(res); 1864198985fSDaniel Phillips return res; 1876714d8e8SKurt Hackel } 1884198985fSDaniel Phillips return NULL; 1896714d8e8SKurt Hackel } 1906714d8e8SKurt Hackel 191ba2bf218SKurt Hackel /* intended to be called by functions which do not care about lock 192ba2bf218SKurt Hackel * resources which are being purged (most net _handler functions). 193ba2bf218SKurt Hackel * this will return NULL for any lock resource which is found but 194ba2bf218SKurt Hackel * currently in the process of dropping its mastery reference. 195ba2bf218SKurt Hackel * use __dlm_lookup_lockres_full when you need the lock resource 196ba2bf218SKurt Hackel * regardless (e.g. dlm_get_lock_resource) */ 197ba2bf218SKurt Hackel struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 198ba2bf218SKurt Hackel const char *name, 199ba2bf218SKurt Hackel unsigned int len, 200ba2bf218SKurt Hackel unsigned int hash) 201ba2bf218SKurt Hackel { 202ba2bf218SKurt Hackel struct dlm_lock_resource *res = NULL; 203ba2bf218SKurt Hackel 204ba2bf218SKurt Hackel mlog_entry("%.*s\n", len, name); 205ba2bf218SKurt Hackel 206ba2bf218SKurt Hackel assert_spin_locked(&dlm->spinlock); 207ba2bf218SKurt Hackel 208ba2bf218SKurt Hackel res = __dlm_lookup_lockres_full(dlm, name, len, hash); 209ba2bf218SKurt Hackel if (res) { 210ba2bf218SKurt Hackel spin_lock(&res->spinlock); 211ba2bf218SKurt Hackel if (res->state & DLM_LOCK_RES_DROPPING_REF) { 212ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 213ba2bf218SKurt Hackel dlm_lockres_put(res); 214ba2bf218SKurt Hackel return NULL; 215ba2bf218SKurt Hackel } 216ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 217ba2bf218SKurt Hackel } 218ba2bf218SKurt Hackel 219ba2bf218SKurt Hackel return res; 220ba2bf218SKurt Hackel } 221ba2bf218SKurt Hackel 2226714d8e8SKurt Hackel struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 2236714d8e8SKurt Hackel const char *name, 2246714d8e8SKurt Hackel unsigned int len) 2256714d8e8SKurt Hackel { 2266714d8e8SKurt Hackel struct dlm_lock_resource *res; 227a3d33291SMark Fasheh unsigned int hash = dlm_lockid_hash(name, len); 2286714d8e8SKurt Hackel 2296714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 230a3d33291SMark Fasheh res = __dlm_lookup_lockres(dlm, name, len, hash); 2316714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 2326714d8e8SKurt Hackel return res; 2336714d8e8SKurt Hackel } 2346714d8e8SKurt Hackel 2356714d8e8SKurt Hackel static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) 2366714d8e8SKurt Hackel { 2376714d8e8SKurt Hackel struct dlm_ctxt *tmp = NULL; 2386714d8e8SKurt Hackel struct list_head *iter; 2396714d8e8SKurt Hackel 2406714d8e8SKurt Hackel assert_spin_locked(&dlm_domain_lock); 2416714d8e8SKurt Hackel 2426714d8e8SKurt Hackel /* tmp->name here is always NULL terminated, 2436714d8e8SKurt Hackel * but domain may not be! */ 2446714d8e8SKurt Hackel list_for_each(iter, &dlm_domains) { 2456714d8e8SKurt Hackel tmp = list_entry (iter, struct dlm_ctxt, list); 2466714d8e8SKurt Hackel if (strlen(tmp->name) == len && 2476714d8e8SKurt Hackel memcmp(tmp->name, domain, len)==0) 2486714d8e8SKurt Hackel break; 2496714d8e8SKurt Hackel tmp = NULL; 2506714d8e8SKurt Hackel } 2516714d8e8SKurt Hackel 2526714d8e8SKurt Hackel return tmp; 2536714d8e8SKurt Hackel } 2546714d8e8SKurt Hackel 2556714d8e8SKurt Hackel /* For null terminated domain strings ONLY */ 2566714d8e8SKurt Hackel static struct dlm_ctxt * __dlm_lookup_domain(const char *domain) 2576714d8e8SKurt Hackel { 2586714d8e8SKurt Hackel assert_spin_locked(&dlm_domain_lock); 2596714d8e8SKurt Hackel 2606714d8e8SKurt Hackel return __dlm_lookup_domain_full(domain, strlen(domain)); 2616714d8e8SKurt Hackel } 2626714d8e8SKurt Hackel 2636714d8e8SKurt Hackel 2646714d8e8SKurt Hackel /* returns true on one of two conditions: 2656714d8e8SKurt Hackel * 1) the domain does not exist 2666714d8e8SKurt Hackel * 2) the domain exists and it's state is "joined" */ 2676714d8e8SKurt Hackel static int dlm_wait_on_domain_helper(const char *domain) 2686714d8e8SKurt Hackel { 2696714d8e8SKurt Hackel int ret = 0; 2706714d8e8SKurt Hackel struct dlm_ctxt *tmp = NULL; 2716714d8e8SKurt Hackel 2726714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 2736714d8e8SKurt Hackel 2746714d8e8SKurt Hackel tmp = __dlm_lookup_domain(domain); 2756714d8e8SKurt Hackel if (!tmp) 2766714d8e8SKurt Hackel ret = 1; 2776714d8e8SKurt Hackel else if (tmp->dlm_state == DLM_CTXT_JOINED) 2786714d8e8SKurt Hackel ret = 1; 2796714d8e8SKurt Hackel 2806714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 2816714d8e8SKurt Hackel return ret; 2826714d8e8SKurt Hackel } 2836714d8e8SKurt Hackel 2846714d8e8SKurt Hackel static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) 2856714d8e8SKurt Hackel { 28681f2094aSMark Fasheh if (dlm->lockres_hash) 28703d864c0SDaniel Phillips dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); 2886714d8e8SKurt Hackel 2896714d8e8SKurt Hackel if (dlm->name) 2906714d8e8SKurt Hackel kfree(dlm->name); 2916714d8e8SKurt Hackel 2926714d8e8SKurt Hackel kfree(dlm); 2936714d8e8SKurt Hackel } 2946714d8e8SKurt Hackel 2956714d8e8SKurt Hackel /* A little strange - this function will be called while holding 2966714d8e8SKurt Hackel * dlm_domain_lock and is expected to be holding it on the way out. We 2976714d8e8SKurt Hackel * will however drop and reacquire it multiple times */ 2986714d8e8SKurt Hackel static void dlm_ctxt_release(struct kref *kref) 2996714d8e8SKurt Hackel { 3006714d8e8SKurt Hackel struct dlm_ctxt *dlm; 3016714d8e8SKurt Hackel 3026714d8e8SKurt Hackel dlm = container_of(kref, struct dlm_ctxt, dlm_refs); 3036714d8e8SKurt Hackel 3046714d8e8SKurt Hackel BUG_ON(dlm->num_joins); 3056714d8e8SKurt Hackel BUG_ON(dlm->dlm_state == DLM_CTXT_JOINED); 3066714d8e8SKurt Hackel 3076714d8e8SKurt Hackel /* we may still be in the list if we hit an error during join. */ 3086714d8e8SKurt Hackel list_del_init(&dlm->list); 3096714d8e8SKurt Hackel 3106714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3116714d8e8SKurt Hackel 3126714d8e8SKurt Hackel mlog(0, "freeing memory from domain %s\n", dlm->name); 3136714d8e8SKurt Hackel 3146714d8e8SKurt Hackel wake_up(&dlm_domain_events); 3156714d8e8SKurt Hackel 3166714d8e8SKurt Hackel dlm_free_ctxt_mem(dlm); 3176714d8e8SKurt Hackel 3186714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3196714d8e8SKurt Hackel } 3206714d8e8SKurt Hackel 3216714d8e8SKurt Hackel void dlm_put(struct dlm_ctxt *dlm) 3226714d8e8SKurt Hackel { 3236714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3246714d8e8SKurt Hackel kref_put(&dlm->dlm_refs, dlm_ctxt_release); 3256714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3266714d8e8SKurt Hackel } 3276714d8e8SKurt Hackel 3286714d8e8SKurt Hackel static void __dlm_get(struct dlm_ctxt *dlm) 3296714d8e8SKurt Hackel { 3306714d8e8SKurt Hackel kref_get(&dlm->dlm_refs); 3316714d8e8SKurt Hackel } 3326714d8e8SKurt Hackel 3336714d8e8SKurt Hackel /* given a questionable reference to a dlm object, gets a reference if 3346714d8e8SKurt Hackel * it can find it in the list, otherwise returns NULL in which case 3356714d8e8SKurt Hackel * you shouldn't trust your pointer. */ 3366714d8e8SKurt Hackel struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) 3376714d8e8SKurt Hackel { 3386714d8e8SKurt Hackel struct list_head *iter; 3396714d8e8SKurt Hackel struct dlm_ctxt *target = NULL; 3406714d8e8SKurt Hackel 3416714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3426714d8e8SKurt Hackel 3436714d8e8SKurt Hackel list_for_each(iter, &dlm_domains) { 3446714d8e8SKurt Hackel target = list_entry (iter, struct dlm_ctxt, list); 3456714d8e8SKurt Hackel 3466714d8e8SKurt Hackel if (target == dlm) { 3476714d8e8SKurt Hackel __dlm_get(target); 3486714d8e8SKurt Hackel break; 3496714d8e8SKurt Hackel } 3506714d8e8SKurt Hackel 3516714d8e8SKurt Hackel target = NULL; 3526714d8e8SKurt Hackel } 3536714d8e8SKurt Hackel 3546714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3556714d8e8SKurt Hackel 3566714d8e8SKurt Hackel return target; 3576714d8e8SKurt Hackel } 3586714d8e8SKurt Hackel 3596714d8e8SKurt Hackel int dlm_domain_fully_joined(struct dlm_ctxt *dlm) 3606714d8e8SKurt Hackel { 3616714d8e8SKurt Hackel int ret; 3626714d8e8SKurt Hackel 3636714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3646714d8e8SKurt Hackel ret = (dlm->dlm_state == DLM_CTXT_JOINED) || 3656714d8e8SKurt Hackel (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN); 3666714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3676714d8e8SKurt Hackel 3686714d8e8SKurt Hackel return ret; 3696714d8e8SKurt Hackel } 3706714d8e8SKurt Hackel 3713156d267SKurt Hackel static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) 3723156d267SKurt Hackel { 3733156d267SKurt Hackel if (dlm->dlm_worker) { 3743156d267SKurt Hackel flush_workqueue(dlm->dlm_worker); 3753156d267SKurt Hackel destroy_workqueue(dlm->dlm_worker); 3763156d267SKurt Hackel dlm->dlm_worker = NULL; 3773156d267SKurt Hackel } 3783156d267SKurt Hackel } 3793156d267SKurt Hackel 3806714d8e8SKurt Hackel static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) 3816714d8e8SKurt Hackel { 3826714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 3836714d8e8SKurt Hackel dlm_complete_thread(dlm); 3846714d8e8SKurt Hackel dlm_complete_recovery_thread(dlm); 3853156d267SKurt Hackel dlm_destroy_dlm_worker(dlm); 3866714d8e8SKurt Hackel 3876714d8e8SKurt Hackel /* We've left the domain. Now we can take ourselves out of the 3886714d8e8SKurt Hackel * list and allow the kref stuff to help us free the 3896714d8e8SKurt Hackel * memory. */ 3906714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 3916714d8e8SKurt Hackel list_del_init(&dlm->list); 3926714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 3936714d8e8SKurt Hackel 3946714d8e8SKurt Hackel /* Wake up anyone waiting for us to remove this domain */ 3956714d8e8SKurt Hackel wake_up(&dlm_domain_events); 3966714d8e8SKurt Hackel } 3976714d8e8SKurt Hackel 398ba2bf218SKurt Hackel static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) 3996714d8e8SKurt Hackel { 400ba2bf218SKurt Hackel int i, num, n, ret = 0; 4016714d8e8SKurt Hackel struct dlm_lock_resource *res; 402ba2bf218SKurt Hackel struct hlist_node *iter; 403ba2bf218SKurt Hackel struct hlist_head *bucket; 404ba2bf218SKurt Hackel int dropped; 4056714d8e8SKurt Hackel 4066714d8e8SKurt Hackel mlog(0, "Migrating locks from domain %s\n", dlm->name); 407ba2bf218SKurt Hackel 408ba2bf218SKurt Hackel num = 0; 4096714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 41081f2094aSMark Fasheh for (i = 0; i < DLM_HASH_BUCKETS; i++) { 411ba2bf218SKurt Hackel redo_bucket: 412ba2bf218SKurt Hackel n = 0; 413ba2bf218SKurt Hackel bucket = dlm_lockres_hash(dlm, i); 414ba2bf218SKurt Hackel iter = bucket->first; 415ba2bf218SKurt Hackel while (iter) { 416ba2bf218SKurt Hackel n++; 417ba2bf218SKurt Hackel res = hlist_entry(iter, struct dlm_lock_resource, 418ba2bf218SKurt Hackel hash_node); 4196714d8e8SKurt Hackel dlm_lockres_get(res); 420ba2bf218SKurt Hackel /* migrate, if necessary. this will drop the dlm 421ba2bf218SKurt Hackel * spinlock and retake it if it does migration. */ 422ba2bf218SKurt Hackel dropped = dlm_empty_lockres(dlm, res); 4236714d8e8SKurt Hackel 424ba2bf218SKurt Hackel spin_lock(&res->spinlock); 425ba2bf218SKurt Hackel __dlm_lockres_calc_usage(dlm, res); 426ba2bf218SKurt Hackel iter = res->hash_node.next; 427ba2bf218SKurt Hackel spin_unlock(&res->spinlock); 428ba2bf218SKurt Hackel 4296714d8e8SKurt Hackel dlm_lockres_put(res); 430ba2bf218SKurt Hackel 431ba2bf218SKurt Hackel cond_resched_lock(&dlm->spinlock); 432ba2bf218SKurt Hackel 433ba2bf218SKurt Hackel if (dropped) 434ba2bf218SKurt Hackel goto redo_bucket; 4356714d8e8SKurt Hackel } 436ba2bf218SKurt Hackel num += n; 437ba2bf218SKurt Hackel mlog(0, "%s: touched %d lockreses in bucket %d " 438ba2bf218SKurt Hackel "(tot=%d)\n", dlm->name, n, i, num); 4396714d8e8SKurt Hackel } 4406714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 441ba2bf218SKurt Hackel wake_up(&dlm->dlm_thread_wq); 4426714d8e8SKurt Hackel 443ba2bf218SKurt Hackel /* let the dlm thread take care of purging, keep scanning until 444ba2bf218SKurt Hackel * nothing remains in the hash */ 445ba2bf218SKurt Hackel if (num) { 446ba2bf218SKurt Hackel mlog(0, "%s: %d lock resources in hash last pass\n", 447ba2bf218SKurt Hackel dlm->name, num); 448ba2bf218SKurt Hackel ret = -EAGAIN; 449ba2bf218SKurt Hackel } 4506714d8e8SKurt Hackel mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); 451ba2bf218SKurt Hackel return ret; 4526714d8e8SKurt Hackel } 4536714d8e8SKurt Hackel 4546714d8e8SKurt Hackel static int dlm_no_joining_node(struct dlm_ctxt *dlm) 4556714d8e8SKurt Hackel { 4566714d8e8SKurt Hackel int ret; 4576714d8e8SKurt Hackel 4586714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 4596714d8e8SKurt Hackel ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN; 4606714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4616714d8e8SKurt Hackel 4626714d8e8SKurt Hackel return ret; 4636714d8e8SKurt Hackel } 4646714d8e8SKurt Hackel 4656714d8e8SKurt Hackel static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) 4666714d8e8SKurt Hackel { 4676714d8e8SKurt Hackel /* Yikes, a double spinlock! I need domain_lock for the dlm 4686714d8e8SKurt Hackel * state and the dlm spinlock for join state... Sorry! */ 4696714d8e8SKurt Hackel again: 4706714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 4716714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 4726714d8e8SKurt Hackel 4736714d8e8SKurt Hackel if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 4746714d8e8SKurt Hackel mlog(0, "Node %d is joining, we wait on it.\n", 4756714d8e8SKurt Hackel dlm->joining_node); 4766714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4776714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 4786714d8e8SKurt Hackel 4796714d8e8SKurt Hackel wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm)); 4806714d8e8SKurt Hackel goto again; 4816714d8e8SKurt Hackel } 4826714d8e8SKurt Hackel 4836714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_LEAVING; 4846714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4856714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 4866714d8e8SKurt Hackel } 4876714d8e8SKurt Hackel 4886714d8e8SKurt Hackel static void __dlm_print_nodes(struct dlm_ctxt *dlm) 4896714d8e8SKurt Hackel { 4906714d8e8SKurt Hackel int node = -1; 4916714d8e8SKurt Hackel 4926714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 4936714d8e8SKurt Hackel 494781ee3e2SSunil Mushran printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); 4956714d8e8SKurt Hackel 4966714d8e8SKurt Hackel while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 4976714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 498781ee3e2SSunil Mushran printk("%d ", node); 4996714d8e8SKurt Hackel } 500781ee3e2SSunil Mushran printk("\n"); 5016714d8e8SKurt Hackel } 5026714d8e8SKurt Hackel 503d74c9803SKurt Hackel static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 504d74c9803SKurt Hackel void **ret_data) 5056714d8e8SKurt Hackel { 5066714d8e8SKurt Hackel struct dlm_ctxt *dlm = data; 5076714d8e8SKurt Hackel unsigned int node; 5086714d8e8SKurt Hackel struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; 5096714d8e8SKurt Hackel 5106714d8e8SKurt Hackel mlog_entry("%p %u %p", msg, len, data); 5116714d8e8SKurt Hackel 5126714d8e8SKurt Hackel if (!dlm_grab(dlm)) 5136714d8e8SKurt Hackel return 0; 5146714d8e8SKurt Hackel 5156714d8e8SKurt Hackel node = exit_msg->node_idx; 5166714d8e8SKurt Hackel 517781ee3e2SSunil Mushran printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); 5186714d8e8SKurt Hackel 5196714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 5206714d8e8SKurt Hackel clear_bit(node, dlm->domain_map); 5216714d8e8SKurt Hackel __dlm_print_nodes(dlm); 5226714d8e8SKurt Hackel 5236714d8e8SKurt Hackel /* notify anything attached to the heartbeat events */ 5246714d8e8SKurt Hackel dlm_hb_event_notify_attached(dlm, node, 0); 5256714d8e8SKurt Hackel 5266714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 5276714d8e8SKurt Hackel 5286714d8e8SKurt Hackel dlm_put(dlm); 5296714d8e8SKurt Hackel 5306714d8e8SKurt Hackel return 0; 5316714d8e8SKurt Hackel } 5326714d8e8SKurt Hackel 5336714d8e8SKurt Hackel static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, 5346714d8e8SKurt Hackel unsigned int node) 5356714d8e8SKurt Hackel { 5366714d8e8SKurt Hackel int status; 5376714d8e8SKurt Hackel struct dlm_exit_domain leave_msg; 5386714d8e8SKurt Hackel 5396714d8e8SKurt Hackel mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", 5406714d8e8SKurt Hackel node, dlm->name, dlm->node_num); 5416714d8e8SKurt Hackel 5426714d8e8SKurt Hackel memset(&leave_msg, 0, sizeof(leave_msg)); 5436714d8e8SKurt Hackel leave_msg.node_idx = dlm->node_num; 5446714d8e8SKurt Hackel 5456714d8e8SKurt Hackel status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, 5466714d8e8SKurt Hackel &leave_msg, sizeof(leave_msg), node, 5476714d8e8SKurt Hackel NULL); 5486714d8e8SKurt Hackel 5496714d8e8SKurt Hackel mlog(0, "status return %d from o2net_send_message\n", status); 5506714d8e8SKurt Hackel 5516714d8e8SKurt Hackel return status; 5526714d8e8SKurt Hackel } 5536714d8e8SKurt Hackel 5546714d8e8SKurt Hackel 5556714d8e8SKurt Hackel static void dlm_leave_domain(struct dlm_ctxt *dlm) 5566714d8e8SKurt Hackel { 5576714d8e8SKurt Hackel int node, clear_node, status; 5586714d8e8SKurt Hackel 5596714d8e8SKurt Hackel /* At this point we've migrated away all our locks and won't 5606714d8e8SKurt Hackel * accept mastership of new ones. The dlm is responsible for 5616714d8e8SKurt Hackel * almost nothing now. We make sure not to confuse any joining 5626714d8e8SKurt Hackel * nodes and then commence shutdown procedure. */ 5636714d8e8SKurt Hackel 5646714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 5656714d8e8SKurt Hackel /* Clear ourselves from the domain map */ 5666714d8e8SKurt Hackel clear_bit(dlm->node_num, dlm->domain_map); 5676714d8e8SKurt Hackel while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 5686714d8e8SKurt Hackel 0)) < O2NM_MAX_NODES) { 5696714d8e8SKurt Hackel /* Drop the dlm spinlock. This is safe wrt the domain_map. 5706714d8e8SKurt Hackel * -nodes cannot be added now as the 5716714d8e8SKurt Hackel * query_join_handlers knows to respond with OK_NO_MAP 5726714d8e8SKurt Hackel * -we catch the right network errors if a node is 5736714d8e8SKurt Hackel * removed from the map while we're sending him the 5746714d8e8SKurt Hackel * exit message. */ 5756714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 5766714d8e8SKurt Hackel 5776714d8e8SKurt Hackel clear_node = 1; 5786714d8e8SKurt Hackel 5796714d8e8SKurt Hackel status = dlm_send_one_domain_exit(dlm, node); 5806714d8e8SKurt Hackel if (status < 0 && 5816714d8e8SKurt Hackel status != -ENOPROTOOPT && 5826714d8e8SKurt Hackel status != -ENOTCONN) { 5836714d8e8SKurt Hackel mlog(ML_NOTICE, "Error %d sending domain exit message " 5846714d8e8SKurt Hackel "to node %d\n", status, node); 5856714d8e8SKurt Hackel 5866714d8e8SKurt Hackel /* Not sure what to do here but lets sleep for 5876714d8e8SKurt Hackel * a bit in case this was a transient 5886714d8e8SKurt Hackel * error... */ 5896714d8e8SKurt Hackel msleep(DLM_DOMAIN_BACKOFF_MS); 5906714d8e8SKurt Hackel clear_node = 0; 5916714d8e8SKurt Hackel } 5926714d8e8SKurt Hackel 5936714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 5946714d8e8SKurt Hackel /* If we're not clearing the node bit then we intend 5956714d8e8SKurt Hackel * to loop back around to try again. */ 5966714d8e8SKurt Hackel if (clear_node) 5976714d8e8SKurt Hackel clear_bit(node, dlm->domain_map); 5986714d8e8SKurt Hackel } 5996714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 6006714d8e8SKurt Hackel } 6016714d8e8SKurt Hackel 6026714d8e8SKurt Hackel int dlm_joined(struct dlm_ctxt *dlm) 6036714d8e8SKurt Hackel { 6046714d8e8SKurt Hackel int ret = 0; 6056714d8e8SKurt Hackel 6066714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 6076714d8e8SKurt Hackel 6086714d8e8SKurt Hackel if (dlm->dlm_state == DLM_CTXT_JOINED) 6096714d8e8SKurt Hackel ret = 1; 6106714d8e8SKurt Hackel 6116714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 6126714d8e8SKurt Hackel 6136714d8e8SKurt Hackel return ret; 6146714d8e8SKurt Hackel } 6156714d8e8SKurt Hackel 6166714d8e8SKurt Hackel int dlm_shutting_down(struct dlm_ctxt *dlm) 6176714d8e8SKurt Hackel { 6186714d8e8SKurt Hackel int ret = 0; 6196714d8e8SKurt Hackel 6206714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 6216714d8e8SKurt Hackel 6226714d8e8SKurt Hackel if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) 6236714d8e8SKurt Hackel ret = 1; 6246714d8e8SKurt Hackel 6256714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 6266714d8e8SKurt Hackel 6276714d8e8SKurt Hackel return ret; 6286714d8e8SKurt Hackel } 6296714d8e8SKurt Hackel 6306714d8e8SKurt Hackel void dlm_unregister_domain(struct dlm_ctxt *dlm) 6316714d8e8SKurt Hackel { 6326714d8e8SKurt Hackel int leave = 0; 6336714d8e8SKurt Hackel 6346714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 6356714d8e8SKurt Hackel BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); 6366714d8e8SKurt Hackel BUG_ON(!dlm->num_joins); 6376714d8e8SKurt Hackel 6386714d8e8SKurt Hackel dlm->num_joins--; 6396714d8e8SKurt Hackel if (!dlm->num_joins) { 6406714d8e8SKurt Hackel /* We mark it "in shutdown" now so new register 6416714d8e8SKurt Hackel * requests wait until we've completely left the 6426714d8e8SKurt Hackel * domain. Don't use DLM_CTXT_LEAVING yet as we still 6436714d8e8SKurt Hackel * want new domain joins to communicate with us at 6446714d8e8SKurt Hackel * least until we've completed migration of our 6456714d8e8SKurt Hackel * resources. */ 6466714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN; 6476714d8e8SKurt Hackel leave = 1; 6486714d8e8SKurt Hackel } 6496714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 6506714d8e8SKurt Hackel 6516714d8e8SKurt Hackel if (leave) { 6526714d8e8SKurt Hackel mlog(0, "shutting down domain %s\n", dlm->name); 6536714d8e8SKurt Hackel 6546714d8e8SKurt Hackel /* We changed dlm state, notify the thread */ 6556714d8e8SKurt Hackel dlm_kick_thread(dlm, NULL); 6566714d8e8SKurt Hackel 657ba2bf218SKurt Hackel while (dlm_migrate_all_locks(dlm)) { 658ba2bf218SKurt Hackel mlog(0, "%s: more migration to do\n", dlm->name); 659ba2bf218SKurt Hackel } 6606714d8e8SKurt Hackel dlm_mark_domain_leaving(dlm); 6616714d8e8SKurt Hackel dlm_leave_domain(dlm); 6626714d8e8SKurt Hackel dlm_complete_dlm_shutdown(dlm); 6636714d8e8SKurt Hackel } 6646714d8e8SKurt Hackel dlm_put(dlm); 6656714d8e8SKurt Hackel } 6666714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_unregister_domain); 6676714d8e8SKurt Hackel 668d74c9803SKurt Hackel static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, 669d74c9803SKurt Hackel void **ret_data) 6706714d8e8SKurt Hackel { 6716714d8e8SKurt Hackel struct dlm_query_join_request *query; 6726714d8e8SKurt Hackel enum dlm_query_join_response response; 6736714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 6741faf2894SSrinivas Eeda u8 nodenum; 6756714d8e8SKurt Hackel 6766714d8e8SKurt Hackel query = (struct dlm_query_join_request *) msg->buf; 6776714d8e8SKurt Hackel 6786714d8e8SKurt Hackel mlog(0, "node %u wants to join domain %s\n", query->node_idx, 6796714d8e8SKurt Hackel query->domain); 6806714d8e8SKurt Hackel 6816714d8e8SKurt Hackel /* 6826714d8e8SKurt Hackel * If heartbeat doesn't consider the node live, tell it 6836714d8e8SKurt Hackel * to back off and try again. This gives heartbeat a chance 6846714d8e8SKurt Hackel * to catch up. 6856714d8e8SKurt Hackel */ 6866714d8e8SKurt Hackel if (!o2hb_check_node_heartbeating(query->node_idx)) { 6876714d8e8SKurt Hackel mlog(0, "node %u is not in our live map yet\n", 6886714d8e8SKurt Hackel query->node_idx); 6896714d8e8SKurt Hackel 6906714d8e8SKurt Hackel response = JOIN_DISALLOW; 6916714d8e8SKurt Hackel goto respond; 6926714d8e8SKurt Hackel } 6936714d8e8SKurt Hackel 6946714d8e8SKurt Hackel response = JOIN_OK_NO_MAP; 6956714d8e8SKurt Hackel 6966714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 6976714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 6981faf2894SSrinivas Eeda if (!dlm) 6991faf2894SSrinivas Eeda goto unlock_respond; 7001faf2894SSrinivas Eeda 7011faf2894SSrinivas Eeda /* 7021faf2894SSrinivas Eeda * There is a small window where the joining node may not see the 7031faf2894SSrinivas Eeda * node(s) that just left but still part of the cluster. DISALLOW 7041faf2894SSrinivas Eeda * join request if joining node has different node map. 7051faf2894SSrinivas Eeda */ 7061faf2894SSrinivas Eeda nodenum=0; 7071faf2894SSrinivas Eeda while (nodenum < O2NM_MAX_NODES) { 7081faf2894SSrinivas Eeda if (test_bit(nodenum, dlm->domain_map)) { 7091faf2894SSrinivas Eeda if (!byte_test_bit(nodenum, query->node_map)) { 7101faf2894SSrinivas Eeda response = JOIN_DISALLOW; 7111faf2894SSrinivas Eeda goto unlock_respond; 7121faf2894SSrinivas Eeda } 7131faf2894SSrinivas Eeda } 7141faf2894SSrinivas Eeda nodenum++; 7151faf2894SSrinivas Eeda } 7161faf2894SSrinivas Eeda 7176714d8e8SKurt Hackel /* Once the dlm ctxt is marked as leaving then we don't want 718e2faea4cSKurt Hackel * to be put in someone's domain map. 719e2faea4cSKurt Hackel * Also, explicitly disallow joining at certain troublesome 720e2faea4cSKurt Hackel * times (ie. during recovery). */ 7216714d8e8SKurt Hackel if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { 722e2faea4cSKurt Hackel int bit = query->node_idx; 7236714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 7246714d8e8SKurt Hackel 7256714d8e8SKurt Hackel if (dlm->dlm_state == DLM_CTXT_NEW && 7266714d8e8SKurt Hackel dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN) { 7276714d8e8SKurt Hackel /*If this is a brand new context and we 7286714d8e8SKurt Hackel * haven't started our join process yet, then 7296714d8e8SKurt Hackel * the other node won the race. */ 7306714d8e8SKurt Hackel response = JOIN_OK_NO_MAP; 7316714d8e8SKurt Hackel } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 7326714d8e8SKurt Hackel /* Disallow parallel joins. */ 7336714d8e8SKurt Hackel response = JOIN_DISALLOW; 734e2faea4cSKurt Hackel } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 735e2faea4cSKurt Hackel mlog(ML_NOTICE, "node %u trying to join, but recovery " 736e2faea4cSKurt Hackel "is ongoing.\n", bit); 737e2faea4cSKurt Hackel response = JOIN_DISALLOW; 738e2faea4cSKurt Hackel } else if (test_bit(bit, dlm->recovery_map)) { 739e2faea4cSKurt Hackel mlog(ML_NOTICE, "node %u trying to join, but it " 740e2faea4cSKurt Hackel "still needs recovery.\n", bit); 741e2faea4cSKurt Hackel response = JOIN_DISALLOW; 742e2faea4cSKurt Hackel } else if (test_bit(bit, dlm->domain_map)) { 743e2faea4cSKurt Hackel mlog(ML_NOTICE, "node %u trying to join, but it " 744e2faea4cSKurt Hackel "is still in the domain! needs recovery?\n", 745e2faea4cSKurt Hackel bit); 746e2faea4cSKurt Hackel response = JOIN_DISALLOW; 7476714d8e8SKurt Hackel } else { 7486714d8e8SKurt Hackel /* Alright we're fully a part of this domain 7496714d8e8SKurt Hackel * so we keep some state as to who's joining 7506714d8e8SKurt Hackel * and indicate to him that needs to be fixed 7516714d8e8SKurt Hackel * up. */ 7526714d8e8SKurt Hackel response = JOIN_OK; 7536714d8e8SKurt Hackel __dlm_set_joining_node(dlm, query->node_idx); 7546714d8e8SKurt Hackel } 7556714d8e8SKurt Hackel 7566714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 7576714d8e8SKurt Hackel } 7581faf2894SSrinivas Eeda unlock_respond: 7596714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 7606714d8e8SKurt Hackel 7616714d8e8SKurt Hackel respond: 7626714d8e8SKurt Hackel mlog(0, "We respond with %u\n", response); 7636714d8e8SKurt Hackel 7646714d8e8SKurt Hackel return response; 7656714d8e8SKurt Hackel } 7666714d8e8SKurt Hackel 767d74c9803SKurt Hackel static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, 768d74c9803SKurt Hackel void **ret_data) 7696714d8e8SKurt Hackel { 7706714d8e8SKurt Hackel struct dlm_assert_joined *assert; 7716714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 7726714d8e8SKurt Hackel 7736714d8e8SKurt Hackel assert = (struct dlm_assert_joined *) msg->buf; 7746714d8e8SKurt Hackel 7756714d8e8SKurt Hackel mlog(0, "node %u asserts join on domain %s\n", assert->node_idx, 7766714d8e8SKurt Hackel assert->domain); 7776714d8e8SKurt Hackel 7786714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 7796714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(assert->domain, assert->name_len); 7806714d8e8SKurt Hackel /* XXX should we consider no dlm ctxt an error? */ 7816714d8e8SKurt Hackel if (dlm) { 7826714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 7836714d8e8SKurt Hackel 7846714d8e8SKurt Hackel /* Alright, this node has officially joined our 7856714d8e8SKurt Hackel * domain. Set him in the map and clean up our 7866714d8e8SKurt Hackel * leftover join state. */ 7876714d8e8SKurt Hackel BUG_ON(dlm->joining_node != assert->node_idx); 7886714d8e8SKurt Hackel set_bit(assert->node_idx, dlm->domain_map); 7896714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 7906714d8e8SKurt Hackel 791781ee3e2SSunil Mushran printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n", 792781ee3e2SSunil Mushran assert->node_idx, dlm->name); 7936714d8e8SKurt Hackel __dlm_print_nodes(dlm); 7946714d8e8SKurt Hackel 7956714d8e8SKurt Hackel /* notify anything attached to the heartbeat events */ 7966714d8e8SKurt Hackel dlm_hb_event_notify_attached(dlm, assert->node_idx, 1); 7976714d8e8SKurt Hackel 7986714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 7996714d8e8SKurt Hackel } 8006714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 8016714d8e8SKurt Hackel 8026714d8e8SKurt Hackel return 0; 8036714d8e8SKurt Hackel } 8046714d8e8SKurt Hackel 805d74c9803SKurt Hackel static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 806d74c9803SKurt Hackel void **ret_data) 8076714d8e8SKurt Hackel { 8086714d8e8SKurt Hackel struct dlm_cancel_join *cancel; 8096714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 8106714d8e8SKurt Hackel 8116714d8e8SKurt Hackel cancel = (struct dlm_cancel_join *) msg->buf; 8126714d8e8SKurt Hackel 8136714d8e8SKurt Hackel mlog(0, "node %u cancels join on domain %s\n", cancel->node_idx, 8146714d8e8SKurt Hackel cancel->domain); 8156714d8e8SKurt Hackel 8166714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 8176714d8e8SKurt Hackel dlm = __dlm_lookup_domain_full(cancel->domain, cancel->name_len); 8186714d8e8SKurt Hackel 8196714d8e8SKurt Hackel if (dlm) { 8206714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 8216714d8e8SKurt Hackel 8226714d8e8SKurt Hackel /* Yikes, this guy wants to cancel his join. No 8236714d8e8SKurt Hackel * problem, we simply cleanup our join state. */ 8246714d8e8SKurt Hackel BUG_ON(dlm->joining_node != cancel->node_idx); 8256714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 8266714d8e8SKurt Hackel 8276714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 8286714d8e8SKurt Hackel } 8296714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 8306714d8e8SKurt Hackel 8316714d8e8SKurt Hackel return 0; 8326714d8e8SKurt Hackel } 8336714d8e8SKurt Hackel 8346714d8e8SKurt Hackel static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm, 8356714d8e8SKurt Hackel unsigned int node) 8366714d8e8SKurt Hackel { 8376714d8e8SKurt Hackel int status; 8386714d8e8SKurt Hackel struct dlm_cancel_join cancel_msg; 8396714d8e8SKurt Hackel 8406714d8e8SKurt Hackel memset(&cancel_msg, 0, sizeof(cancel_msg)); 8416714d8e8SKurt Hackel cancel_msg.node_idx = dlm->node_num; 8426714d8e8SKurt Hackel cancel_msg.name_len = strlen(dlm->name); 8436714d8e8SKurt Hackel memcpy(cancel_msg.domain, dlm->name, cancel_msg.name_len); 8446714d8e8SKurt Hackel 8456714d8e8SKurt Hackel status = o2net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 8466714d8e8SKurt Hackel &cancel_msg, sizeof(cancel_msg), node, 8476714d8e8SKurt Hackel NULL); 8486714d8e8SKurt Hackel if (status < 0) { 8496714d8e8SKurt Hackel mlog_errno(status); 8506714d8e8SKurt Hackel goto bail; 8516714d8e8SKurt Hackel } 8526714d8e8SKurt Hackel 8536714d8e8SKurt Hackel bail: 8546714d8e8SKurt Hackel return status; 8556714d8e8SKurt Hackel } 8566714d8e8SKurt Hackel 8576714d8e8SKurt Hackel /* map_size should be in bytes. */ 8586714d8e8SKurt Hackel static int dlm_send_join_cancels(struct dlm_ctxt *dlm, 8596714d8e8SKurt Hackel unsigned long *node_map, 8606714d8e8SKurt Hackel unsigned int map_size) 8616714d8e8SKurt Hackel { 8626714d8e8SKurt Hackel int status, tmpstat; 8636714d8e8SKurt Hackel unsigned int node; 8646714d8e8SKurt Hackel 8656714d8e8SKurt Hackel if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * 8666714d8e8SKurt Hackel sizeof(unsigned long))) { 8676714d8e8SKurt Hackel mlog(ML_ERROR, 8686714d8e8SKurt Hackel "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n", 8696714d8e8SKurt Hackel map_size, BITS_TO_LONGS(O2NM_MAX_NODES)); 8706714d8e8SKurt Hackel return -EINVAL; 8716714d8e8SKurt Hackel } 8726714d8e8SKurt Hackel 8736714d8e8SKurt Hackel status = 0; 8746714d8e8SKurt Hackel node = -1; 8756714d8e8SKurt Hackel while ((node = find_next_bit(node_map, O2NM_MAX_NODES, 8766714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 8776714d8e8SKurt Hackel if (node == dlm->node_num) 8786714d8e8SKurt Hackel continue; 8796714d8e8SKurt Hackel 8806714d8e8SKurt Hackel tmpstat = dlm_send_one_join_cancel(dlm, node); 8816714d8e8SKurt Hackel if (tmpstat) { 8826714d8e8SKurt Hackel mlog(ML_ERROR, "Error return %d cancelling join on " 8836714d8e8SKurt Hackel "node %d\n", tmpstat, node); 8846714d8e8SKurt Hackel if (!status) 8856714d8e8SKurt Hackel status = tmpstat; 8866714d8e8SKurt Hackel } 8876714d8e8SKurt Hackel } 8886714d8e8SKurt Hackel 8896714d8e8SKurt Hackel if (status) 8906714d8e8SKurt Hackel mlog_errno(status); 8916714d8e8SKurt Hackel return status; 8926714d8e8SKurt Hackel } 8936714d8e8SKurt Hackel 8946714d8e8SKurt Hackel static int dlm_request_join(struct dlm_ctxt *dlm, 8956714d8e8SKurt Hackel int node, 8966714d8e8SKurt Hackel enum dlm_query_join_response *response) 8976714d8e8SKurt Hackel { 8986714d8e8SKurt Hackel int status, retval; 8996714d8e8SKurt Hackel struct dlm_query_join_request join_msg; 9006714d8e8SKurt Hackel 9016714d8e8SKurt Hackel mlog(0, "querying node %d\n", node); 9026714d8e8SKurt Hackel 9036714d8e8SKurt Hackel memset(&join_msg, 0, sizeof(join_msg)); 9046714d8e8SKurt Hackel join_msg.node_idx = dlm->node_num; 9056714d8e8SKurt Hackel join_msg.name_len = strlen(dlm->name); 9066714d8e8SKurt Hackel memcpy(join_msg.domain, dlm->name, join_msg.name_len); 9076714d8e8SKurt Hackel 9081faf2894SSrinivas Eeda /* copy live node map to join message */ 9091faf2894SSrinivas Eeda byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); 9101faf2894SSrinivas Eeda 9116714d8e8SKurt Hackel status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 9126714d8e8SKurt Hackel sizeof(join_msg), node, &retval); 9136714d8e8SKurt Hackel if (status < 0 && status != -ENOPROTOOPT) { 9146714d8e8SKurt Hackel mlog_errno(status); 9156714d8e8SKurt Hackel goto bail; 9166714d8e8SKurt Hackel } 9176714d8e8SKurt Hackel 9186714d8e8SKurt Hackel /* -ENOPROTOOPT from the net code means the other side isn't 9196714d8e8SKurt Hackel listening for our message type -- that's fine, it means 9206714d8e8SKurt Hackel his dlm isn't up, so we can consider him a 'yes' but not 9216714d8e8SKurt Hackel joined into the domain. */ 9226714d8e8SKurt Hackel if (status == -ENOPROTOOPT) { 9236714d8e8SKurt Hackel status = 0; 9246714d8e8SKurt Hackel *response = JOIN_OK_NO_MAP; 9256714d8e8SKurt Hackel } else if (retval == JOIN_DISALLOW || 9266714d8e8SKurt Hackel retval == JOIN_OK || 9276714d8e8SKurt Hackel retval == JOIN_OK_NO_MAP) { 9286714d8e8SKurt Hackel *response = retval; 9296714d8e8SKurt Hackel } else { 9306714d8e8SKurt Hackel status = -EINVAL; 9316714d8e8SKurt Hackel mlog(ML_ERROR, "invalid response %d from node %u\n", retval, 9326714d8e8SKurt Hackel node); 9336714d8e8SKurt Hackel } 9346714d8e8SKurt Hackel 9356714d8e8SKurt Hackel mlog(0, "status %d, node %d response is %d\n", status, node, 9366714d8e8SKurt Hackel *response); 9376714d8e8SKurt Hackel 9386714d8e8SKurt Hackel bail: 9396714d8e8SKurt Hackel return status; 9406714d8e8SKurt Hackel } 9416714d8e8SKurt Hackel 9426714d8e8SKurt Hackel static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, 9436714d8e8SKurt Hackel unsigned int node) 9446714d8e8SKurt Hackel { 9456714d8e8SKurt Hackel int status; 9466714d8e8SKurt Hackel struct dlm_assert_joined assert_msg; 9476714d8e8SKurt Hackel 9486714d8e8SKurt Hackel mlog(0, "Sending join assert to node %u\n", node); 9496714d8e8SKurt Hackel 9506714d8e8SKurt Hackel memset(&assert_msg, 0, sizeof(assert_msg)); 9516714d8e8SKurt Hackel assert_msg.node_idx = dlm->node_num; 9526714d8e8SKurt Hackel assert_msg.name_len = strlen(dlm->name); 9536714d8e8SKurt Hackel memcpy(assert_msg.domain, dlm->name, assert_msg.name_len); 9546714d8e8SKurt Hackel 9556714d8e8SKurt Hackel status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 9566714d8e8SKurt Hackel &assert_msg, sizeof(assert_msg), node, 9576714d8e8SKurt Hackel NULL); 9586714d8e8SKurt Hackel if (status < 0) 9596714d8e8SKurt Hackel mlog_errno(status); 9606714d8e8SKurt Hackel 9616714d8e8SKurt Hackel return status; 9626714d8e8SKurt Hackel } 9636714d8e8SKurt Hackel 9646714d8e8SKurt Hackel static void dlm_send_join_asserts(struct dlm_ctxt *dlm, 9656714d8e8SKurt Hackel unsigned long *node_map) 9666714d8e8SKurt Hackel { 9676714d8e8SKurt Hackel int status, node, live; 9686714d8e8SKurt Hackel 9696714d8e8SKurt Hackel status = 0; 9706714d8e8SKurt Hackel node = -1; 9716714d8e8SKurt Hackel while ((node = find_next_bit(node_map, O2NM_MAX_NODES, 9726714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 9736714d8e8SKurt Hackel if (node == dlm->node_num) 9746714d8e8SKurt Hackel continue; 9756714d8e8SKurt Hackel 9766714d8e8SKurt Hackel do { 9776714d8e8SKurt Hackel /* It is very important that this message be 9786714d8e8SKurt Hackel * received so we spin until either the node 9796714d8e8SKurt Hackel * has died or it gets the message. */ 9806714d8e8SKurt Hackel status = dlm_send_one_join_assert(dlm, node); 9816714d8e8SKurt Hackel 9826714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 9836714d8e8SKurt Hackel live = test_bit(node, dlm->live_nodes_map); 9846714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 9856714d8e8SKurt Hackel 9866714d8e8SKurt Hackel if (status) { 9876714d8e8SKurt Hackel mlog(ML_ERROR, "Error return %d asserting " 9886714d8e8SKurt Hackel "join on node %d\n", status, node); 9896714d8e8SKurt Hackel 9906714d8e8SKurt Hackel /* give us some time between errors... */ 9916714d8e8SKurt Hackel if (live) 9926714d8e8SKurt Hackel msleep(DLM_DOMAIN_BACKOFF_MS); 9936714d8e8SKurt Hackel } 9946714d8e8SKurt Hackel } while (status && live); 9956714d8e8SKurt Hackel } 9966714d8e8SKurt Hackel } 9976714d8e8SKurt Hackel 9986714d8e8SKurt Hackel struct domain_join_ctxt { 9996714d8e8SKurt Hackel unsigned long live_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 10006714d8e8SKurt Hackel unsigned long yes_resp_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 10016714d8e8SKurt Hackel }; 10026714d8e8SKurt Hackel 10036714d8e8SKurt Hackel static int dlm_should_restart_join(struct dlm_ctxt *dlm, 10046714d8e8SKurt Hackel struct domain_join_ctxt *ctxt, 10056714d8e8SKurt Hackel enum dlm_query_join_response response) 10066714d8e8SKurt Hackel { 10076714d8e8SKurt Hackel int ret; 10086714d8e8SKurt Hackel 10096714d8e8SKurt Hackel if (response == JOIN_DISALLOW) { 10106714d8e8SKurt Hackel mlog(0, "Latest response of disallow -- should restart\n"); 10116714d8e8SKurt Hackel return 1; 10126714d8e8SKurt Hackel } 10136714d8e8SKurt Hackel 10146714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 10156714d8e8SKurt Hackel /* For now, we restart the process if the node maps have 10166714d8e8SKurt Hackel * changed at all */ 10176714d8e8SKurt Hackel ret = memcmp(ctxt->live_map, dlm->live_nodes_map, 10186714d8e8SKurt Hackel sizeof(dlm->live_nodes_map)); 10196714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 10206714d8e8SKurt Hackel 10216714d8e8SKurt Hackel if (ret) 10226714d8e8SKurt Hackel mlog(0, "Node maps changed -- should restart\n"); 10236714d8e8SKurt Hackel 10246714d8e8SKurt Hackel return ret; 10256714d8e8SKurt Hackel } 10266714d8e8SKurt Hackel 10276714d8e8SKurt Hackel static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) 10286714d8e8SKurt Hackel { 10296714d8e8SKurt Hackel int status = 0, tmpstat, node; 10306714d8e8SKurt Hackel struct domain_join_ctxt *ctxt; 10316714d8e8SKurt Hackel enum dlm_query_join_response response; 10326714d8e8SKurt Hackel 10336714d8e8SKurt Hackel mlog_entry("%p", dlm); 10346714d8e8SKurt Hackel 1035cd861280SRobert P. J. Day ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 10366714d8e8SKurt Hackel if (!ctxt) { 10376714d8e8SKurt Hackel status = -ENOMEM; 10386714d8e8SKurt Hackel mlog_errno(status); 10396714d8e8SKurt Hackel goto bail; 10406714d8e8SKurt Hackel } 10416714d8e8SKurt Hackel 10426714d8e8SKurt Hackel /* group sem locking should work for us here -- we're already 10436714d8e8SKurt Hackel * registered for heartbeat events so filling this should be 10446714d8e8SKurt Hackel * atomic wrt getting those handlers called. */ 10456714d8e8SKurt Hackel o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); 10466714d8e8SKurt Hackel 10476714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 10486714d8e8SKurt Hackel memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); 10496714d8e8SKurt Hackel 10506714d8e8SKurt Hackel __dlm_set_joining_node(dlm, dlm->node_num); 10516714d8e8SKurt Hackel 10526714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 10536714d8e8SKurt Hackel 10546714d8e8SKurt Hackel node = -1; 10556714d8e8SKurt Hackel while ((node = find_next_bit(ctxt->live_map, O2NM_MAX_NODES, 10566714d8e8SKurt Hackel node + 1)) < O2NM_MAX_NODES) { 10576714d8e8SKurt Hackel if (node == dlm->node_num) 10586714d8e8SKurt Hackel continue; 10596714d8e8SKurt Hackel 10606714d8e8SKurt Hackel status = dlm_request_join(dlm, node, &response); 10616714d8e8SKurt Hackel if (status < 0) { 10626714d8e8SKurt Hackel mlog_errno(status); 10636714d8e8SKurt Hackel goto bail; 10646714d8e8SKurt Hackel } 10656714d8e8SKurt Hackel 10666714d8e8SKurt Hackel /* Ok, either we got a response or the node doesn't have a 10676714d8e8SKurt Hackel * dlm up. */ 10686714d8e8SKurt Hackel if (response == JOIN_OK) 10696714d8e8SKurt Hackel set_bit(node, ctxt->yes_resp_map); 10706714d8e8SKurt Hackel 10716714d8e8SKurt Hackel if (dlm_should_restart_join(dlm, ctxt, response)) { 10726714d8e8SKurt Hackel status = -EAGAIN; 10736714d8e8SKurt Hackel goto bail; 10746714d8e8SKurt Hackel } 10756714d8e8SKurt Hackel } 10766714d8e8SKurt Hackel 10776714d8e8SKurt Hackel mlog(0, "Yay, done querying nodes!\n"); 10786714d8e8SKurt Hackel 10796714d8e8SKurt Hackel /* Yay, everyone agree's we can join the domain. My domain is 10806714d8e8SKurt Hackel * comprised of all nodes who were put in the 10816714d8e8SKurt Hackel * yes_resp_map. Copy that into our domain map and send a join 10826714d8e8SKurt Hackel * assert message to clean up everyone elses state. */ 10836714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 10846714d8e8SKurt Hackel memcpy(dlm->domain_map, ctxt->yes_resp_map, 10856714d8e8SKurt Hackel sizeof(ctxt->yes_resp_map)); 10866714d8e8SKurt Hackel set_bit(dlm->node_num, dlm->domain_map); 10876714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 10886714d8e8SKurt Hackel 10896714d8e8SKurt Hackel dlm_send_join_asserts(dlm, ctxt->yes_resp_map); 10906714d8e8SKurt Hackel 10916714d8e8SKurt Hackel /* Joined state *must* be set before the joining node 10926714d8e8SKurt Hackel * information, otherwise the query_join handler may read no 10936714d8e8SKurt Hackel * current joiner but a state of NEW and tell joining nodes 10946714d8e8SKurt Hackel * we're not in the domain. */ 10956714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 10966714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_JOINED; 10976714d8e8SKurt Hackel dlm->num_joins++; 10986714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 10996714d8e8SKurt Hackel 11006714d8e8SKurt Hackel bail: 11016714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 11026714d8e8SKurt Hackel __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 11036714d8e8SKurt Hackel if (!status) 11046714d8e8SKurt Hackel __dlm_print_nodes(dlm); 11056714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 11066714d8e8SKurt Hackel 11076714d8e8SKurt Hackel if (ctxt) { 11086714d8e8SKurt Hackel /* Do we need to send a cancel message to any nodes? */ 11096714d8e8SKurt Hackel if (status < 0) { 11106714d8e8SKurt Hackel tmpstat = dlm_send_join_cancels(dlm, 11116714d8e8SKurt Hackel ctxt->yes_resp_map, 11126714d8e8SKurt Hackel sizeof(ctxt->yes_resp_map)); 11136714d8e8SKurt Hackel if (tmpstat < 0) 11146714d8e8SKurt Hackel mlog_errno(tmpstat); 11156714d8e8SKurt Hackel } 11166714d8e8SKurt Hackel kfree(ctxt); 11176714d8e8SKurt Hackel } 11186714d8e8SKurt Hackel 11196714d8e8SKurt Hackel mlog(0, "returning %d\n", status); 11206714d8e8SKurt Hackel return status; 11216714d8e8SKurt Hackel } 11226714d8e8SKurt Hackel 11236714d8e8SKurt Hackel static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) 11246714d8e8SKurt Hackel { 11256714d8e8SKurt Hackel o2hb_unregister_callback(&dlm->dlm_hb_up); 11266714d8e8SKurt Hackel o2hb_unregister_callback(&dlm->dlm_hb_down); 11276714d8e8SKurt Hackel o2net_unregister_handler_list(&dlm->dlm_domain_handlers); 11286714d8e8SKurt Hackel } 11296714d8e8SKurt Hackel 11306714d8e8SKurt Hackel static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) 11316714d8e8SKurt Hackel { 11326714d8e8SKurt Hackel int status; 11336714d8e8SKurt Hackel 11346714d8e8SKurt Hackel mlog(0, "registering handlers.\n"); 11356714d8e8SKurt Hackel 11366714d8e8SKurt Hackel o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, 11376714d8e8SKurt Hackel dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); 11386714d8e8SKurt Hackel status = o2hb_register_callback(&dlm->dlm_hb_down); 11396714d8e8SKurt Hackel if (status) 11406714d8e8SKurt Hackel goto bail; 11416714d8e8SKurt Hackel 11426714d8e8SKurt Hackel o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, 11436714d8e8SKurt Hackel dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); 11446714d8e8SKurt Hackel status = o2hb_register_callback(&dlm->dlm_hb_up); 11456714d8e8SKurt Hackel if (status) 11466714d8e8SKurt Hackel goto bail; 11476714d8e8SKurt Hackel 11486714d8e8SKurt Hackel status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, 11496714d8e8SKurt Hackel sizeof(struct dlm_master_request), 11506714d8e8SKurt Hackel dlm_master_request_handler, 1151d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11526714d8e8SKurt Hackel if (status) 11536714d8e8SKurt Hackel goto bail; 11546714d8e8SKurt Hackel 11556714d8e8SKurt Hackel status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, 11566714d8e8SKurt Hackel sizeof(struct dlm_assert_master), 11576714d8e8SKurt Hackel dlm_assert_master_handler, 11583b8118cfSKurt Hackel dlm, dlm_assert_master_post_handler, 11593b8118cfSKurt Hackel &dlm->dlm_domain_handlers); 11606714d8e8SKurt Hackel if (status) 11616714d8e8SKurt Hackel goto bail; 11626714d8e8SKurt Hackel 11636714d8e8SKurt Hackel status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, 11646714d8e8SKurt Hackel sizeof(struct dlm_create_lock), 11656714d8e8SKurt Hackel dlm_create_lock_handler, 1166d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11676714d8e8SKurt Hackel if (status) 11686714d8e8SKurt Hackel goto bail; 11696714d8e8SKurt Hackel 11706714d8e8SKurt Hackel status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, 11716714d8e8SKurt Hackel DLM_CONVERT_LOCK_MAX_LEN, 11726714d8e8SKurt Hackel dlm_convert_lock_handler, 1173d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11746714d8e8SKurt Hackel if (status) 11756714d8e8SKurt Hackel goto bail; 11766714d8e8SKurt Hackel 11776714d8e8SKurt Hackel status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, 11786714d8e8SKurt Hackel DLM_UNLOCK_LOCK_MAX_LEN, 11796714d8e8SKurt Hackel dlm_unlock_lock_handler, 1180d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11816714d8e8SKurt Hackel if (status) 11826714d8e8SKurt Hackel goto bail; 11836714d8e8SKurt Hackel 11846714d8e8SKurt Hackel status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, 11856714d8e8SKurt Hackel DLM_PROXY_AST_MAX_LEN, 11866714d8e8SKurt Hackel dlm_proxy_ast_handler, 1187d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11886714d8e8SKurt Hackel if (status) 11896714d8e8SKurt Hackel goto bail; 11906714d8e8SKurt Hackel 11916714d8e8SKurt Hackel status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, 11926714d8e8SKurt Hackel sizeof(struct dlm_exit_domain), 11936714d8e8SKurt Hackel dlm_exit_domain_handler, 1194d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 11956714d8e8SKurt Hackel if (status) 11966714d8e8SKurt Hackel goto bail; 11976714d8e8SKurt Hackel 1198ba2bf218SKurt Hackel status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key, 1199ba2bf218SKurt Hackel sizeof(struct dlm_deref_lockres), 1200ba2bf218SKurt Hackel dlm_deref_lockres_handler, 1201d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 1202ba2bf218SKurt Hackel if (status) 1203ba2bf218SKurt Hackel goto bail; 1204ba2bf218SKurt Hackel 12056714d8e8SKurt Hackel status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, 12066714d8e8SKurt Hackel sizeof(struct dlm_migrate_request), 12076714d8e8SKurt Hackel dlm_migrate_request_handler, 1208d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12096714d8e8SKurt Hackel if (status) 12106714d8e8SKurt Hackel goto bail; 12116714d8e8SKurt Hackel 12126714d8e8SKurt Hackel status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, 12136714d8e8SKurt Hackel DLM_MIG_LOCKRES_MAX_LEN, 12146714d8e8SKurt Hackel dlm_mig_lockres_handler, 1215d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12166714d8e8SKurt Hackel if (status) 12176714d8e8SKurt Hackel goto bail; 12186714d8e8SKurt Hackel 12196714d8e8SKurt Hackel status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, 12206714d8e8SKurt Hackel sizeof(struct dlm_master_requery), 12216714d8e8SKurt Hackel dlm_master_requery_handler, 1222d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12236714d8e8SKurt Hackel if (status) 12246714d8e8SKurt Hackel goto bail; 12256714d8e8SKurt Hackel 12266714d8e8SKurt Hackel status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, 12276714d8e8SKurt Hackel sizeof(struct dlm_lock_request), 12286714d8e8SKurt Hackel dlm_request_all_locks_handler, 1229d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12306714d8e8SKurt Hackel if (status) 12316714d8e8SKurt Hackel goto bail; 12326714d8e8SKurt Hackel 12336714d8e8SKurt Hackel status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, 12346714d8e8SKurt Hackel sizeof(struct dlm_reco_data_done), 12356714d8e8SKurt Hackel dlm_reco_data_done_handler, 1236d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12376714d8e8SKurt Hackel if (status) 12386714d8e8SKurt Hackel goto bail; 12396714d8e8SKurt Hackel 12406714d8e8SKurt Hackel status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, 12416714d8e8SKurt Hackel sizeof(struct dlm_begin_reco), 12426714d8e8SKurt Hackel dlm_begin_reco_handler, 1243d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12446714d8e8SKurt Hackel if (status) 12456714d8e8SKurt Hackel goto bail; 12466714d8e8SKurt Hackel 12476714d8e8SKurt Hackel status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, 12486714d8e8SKurt Hackel sizeof(struct dlm_finalize_reco), 12496714d8e8SKurt Hackel dlm_finalize_reco_handler, 1250d74c9803SKurt Hackel dlm, NULL, &dlm->dlm_domain_handlers); 12516714d8e8SKurt Hackel if (status) 12526714d8e8SKurt Hackel goto bail; 12536714d8e8SKurt Hackel 12546714d8e8SKurt Hackel bail: 12556714d8e8SKurt Hackel if (status) 12566714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 12576714d8e8SKurt Hackel 12586714d8e8SKurt Hackel return status; 12596714d8e8SKurt Hackel } 12606714d8e8SKurt Hackel 12616714d8e8SKurt Hackel static int dlm_join_domain(struct dlm_ctxt *dlm) 12626714d8e8SKurt Hackel { 12636714d8e8SKurt Hackel int status; 12646714d8e8SKurt Hackel 12656714d8e8SKurt Hackel BUG_ON(!dlm); 12666714d8e8SKurt Hackel 12676714d8e8SKurt Hackel mlog(0, "Join domain %s\n", dlm->name); 12686714d8e8SKurt Hackel 12696714d8e8SKurt Hackel status = dlm_register_domain_handlers(dlm); 12706714d8e8SKurt Hackel if (status) { 12716714d8e8SKurt Hackel mlog_errno(status); 12726714d8e8SKurt Hackel goto bail; 12736714d8e8SKurt Hackel } 12746714d8e8SKurt Hackel 12756714d8e8SKurt Hackel status = dlm_launch_thread(dlm); 12766714d8e8SKurt Hackel if (status < 0) { 12776714d8e8SKurt Hackel mlog_errno(status); 12786714d8e8SKurt Hackel goto bail; 12796714d8e8SKurt Hackel } 12806714d8e8SKurt Hackel 12816714d8e8SKurt Hackel status = dlm_launch_recovery_thread(dlm); 12826714d8e8SKurt Hackel if (status < 0) { 12836714d8e8SKurt Hackel mlog_errno(status); 12846714d8e8SKurt Hackel goto bail; 12856714d8e8SKurt Hackel } 12866714d8e8SKurt Hackel 12873156d267SKurt Hackel dlm->dlm_worker = create_singlethread_workqueue("dlm_wq"); 12883156d267SKurt Hackel if (!dlm->dlm_worker) { 12893156d267SKurt Hackel status = -ENOMEM; 12903156d267SKurt Hackel mlog_errno(status); 12913156d267SKurt Hackel goto bail; 12923156d267SKurt Hackel } 12933156d267SKurt Hackel 12946714d8e8SKurt Hackel do { 12956714d8e8SKurt Hackel unsigned int backoff; 12966714d8e8SKurt Hackel status = dlm_try_to_join_domain(dlm); 12976714d8e8SKurt Hackel 12986714d8e8SKurt Hackel /* If we're racing another node to the join, then we 12996714d8e8SKurt Hackel * need to back off temporarily and let them 13006714d8e8SKurt Hackel * complete. */ 13016714d8e8SKurt Hackel if (status == -EAGAIN) { 13026714d8e8SKurt Hackel if (signal_pending(current)) { 13036714d8e8SKurt Hackel status = -ERESTARTSYS; 13046714d8e8SKurt Hackel goto bail; 13056714d8e8SKurt Hackel } 13066714d8e8SKurt Hackel 13076714d8e8SKurt Hackel /* 13086714d8e8SKurt Hackel * <chip> After you! 13096714d8e8SKurt Hackel * <dale> No, after you! 13106714d8e8SKurt Hackel * <chip> I insist! 13116714d8e8SKurt Hackel * <dale> But you first! 13126714d8e8SKurt Hackel * ... 13136714d8e8SKurt Hackel */ 13146714d8e8SKurt Hackel backoff = (unsigned int)(jiffies & 0x3); 13156714d8e8SKurt Hackel backoff *= DLM_DOMAIN_BACKOFF_MS; 13166714d8e8SKurt Hackel mlog(0, "backoff %d\n", backoff); 13176714d8e8SKurt Hackel msleep(backoff); 13186714d8e8SKurt Hackel } 13196714d8e8SKurt Hackel } while (status == -EAGAIN); 13206714d8e8SKurt Hackel 13216714d8e8SKurt Hackel if (status < 0) { 13226714d8e8SKurt Hackel mlog_errno(status); 13236714d8e8SKurt Hackel goto bail; 13246714d8e8SKurt Hackel } 13256714d8e8SKurt Hackel 13266714d8e8SKurt Hackel status = 0; 13276714d8e8SKurt Hackel bail: 13286714d8e8SKurt Hackel wake_up(&dlm_domain_events); 13296714d8e8SKurt Hackel 13306714d8e8SKurt Hackel if (status) { 13316714d8e8SKurt Hackel dlm_unregister_domain_handlers(dlm); 13326714d8e8SKurt Hackel dlm_complete_thread(dlm); 13336714d8e8SKurt Hackel dlm_complete_recovery_thread(dlm); 13343156d267SKurt Hackel dlm_destroy_dlm_worker(dlm); 13356714d8e8SKurt Hackel } 13366714d8e8SKurt Hackel 13376714d8e8SKurt Hackel return status; 13386714d8e8SKurt Hackel } 13396714d8e8SKurt Hackel 13406714d8e8SKurt Hackel static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, 13416714d8e8SKurt Hackel u32 key) 13426714d8e8SKurt Hackel { 13436714d8e8SKurt Hackel int i; 13446714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 13456714d8e8SKurt Hackel 1346cd861280SRobert P. J. Day dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); 13476714d8e8SKurt Hackel if (!dlm) { 13486714d8e8SKurt Hackel mlog_errno(-ENOMEM); 13496714d8e8SKurt Hackel goto leave; 13506714d8e8SKurt Hackel } 13516714d8e8SKurt Hackel 13526714d8e8SKurt Hackel dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL); 13536714d8e8SKurt Hackel if (dlm->name == NULL) { 13546714d8e8SKurt Hackel mlog_errno(-ENOMEM); 13556714d8e8SKurt Hackel kfree(dlm); 13566714d8e8SKurt Hackel dlm = NULL; 13576714d8e8SKurt Hackel goto leave; 13586714d8e8SKurt Hackel } 13596714d8e8SKurt Hackel 136003d864c0SDaniel Phillips dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); 136181f2094aSMark Fasheh if (!dlm->lockres_hash) { 13626714d8e8SKurt Hackel mlog_errno(-ENOMEM); 13636714d8e8SKurt Hackel kfree(dlm->name); 13646714d8e8SKurt Hackel kfree(dlm); 13656714d8e8SKurt Hackel dlm = NULL; 13666714d8e8SKurt Hackel goto leave; 13676714d8e8SKurt Hackel } 13686714d8e8SKurt Hackel 136981f2094aSMark Fasheh for (i = 0; i < DLM_HASH_BUCKETS; i++) 137003d864c0SDaniel Phillips INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); 13716714d8e8SKurt Hackel 13726714d8e8SKurt Hackel strcpy(dlm->name, domain); 13736714d8e8SKurt Hackel dlm->key = key; 13746714d8e8SKurt Hackel dlm->node_num = o2nm_this_node(); 13756714d8e8SKurt Hackel 13766714d8e8SKurt Hackel spin_lock_init(&dlm->spinlock); 13776714d8e8SKurt Hackel spin_lock_init(&dlm->master_lock); 13786714d8e8SKurt Hackel spin_lock_init(&dlm->ast_lock); 13796714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->list); 13806714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dirty_list); 13816714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->reco.resources); 13826714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->reco.received); 13836714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->reco.node_data); 13846714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->purge_list); 13856714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dlm_domain_handlers); 13866714d8e8SKurt Hackel dlm->reco.state = 0; 13876714d8e8SKurt Hackel 13886714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->pending_asts); 13896714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->pending_basts); 13906714d8e8SKurt Hackel 13916714d8e8SKurt Hackel mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n", 13926714d8e8SKurt Hackel dlm->recovery_map, &(dlm->recovery_map[0])); 13936714d8e8SKurt Hackel 13946714d8e8SKurt Hackel memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map)); 13956714d8e8SKurt Hackel memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map)); 13966714d8e8SKurt Hackel memset(dlm->domain_map, 0, sizeof(dlm->domain_map)); 13976714d8e8SKurt Hackel 13986714d8e8SKurt Hackel dlm->dlm_thread_task = NULL; 13996714d8e8SKurt Hackel dlm->dlm_reco_thread_task = NULL; 14003156d267SKurt Hackel dlm->dlm_worker = NULL; 14016714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_thread_wq); 14026714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_reco_thread_wq); 14036714d8e8SKurt Hackel init_waitqueue_head(&dlm->reco.event); 14046714d8e8SKurt Hackel init_waitqueue_head(&dlm->ast_wq); 14056714d8e8SKurt Hackel init_waitqueue_head(&dlm->migration_wq); 14066714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->master_list); 14076714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->mle_hb_events); 14086714d8e8SKurt Hackel 14096714d8e8SKurt Hackel dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; 14106714d8e8SKurt Hackel init_waitqueue_head(&dlm->dlm_join_events); 14116714d8e8SKurt Hackel 14126714d8e8SKurt Hackel dlm->reco.new_master = O2NM_INVALID_NODE_NUM; 14136714d8e8SKurt Hackel dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 14146714d8e8SKurt Hackel atomic_set(&dlm->local_resources, 0); 14156714d8e8SKurt Hackel atomic_set(&dlm->remote_resources, 0); 14166714d8e8SKurt Hackel atomic_set(&dlm->unknown_resources, 0); 14176714d8e8SKurt Hackel 14186714d8e8SKurt Hackel spin_lock_init(&dlm->work_lock); 14196714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->work_list); 1420c4028958SDavid Howells INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work); 14216714d8e8SKurt Hackel 14226714d8e8SKurt Hackel kref_init(&dlm->dlm_refs); 14236714d8e8SKurt Hackel dlm->dlm_state = DLM_CTXT_NEW; 14246714d8e8SKurt Hackel 14256714d8e8SKurt Hackel INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); 14266714d8e8SKurt Hackel 14276714d8e8SKurt Hackel mlog(0, "context init: refcount %u\n", 14286714d8e8SKurt Hackel atomic_read(&dlm->dlm_refs.refcount)); 14296714d8e8SKurt Hackel 14306714d8e8SKurt Hackel leave: 14316714d8e8SKurt Hackel return dlm; 14326714d8e8SKurt Hackel } 14336714d8e8SKurt Hackel 14346714d8e8SKurt Hackel /* 14356714d8e8SKurt Hackel * dlm_register_domain: one-time setup per "domain" 14366714d8e8SKurt Hackel */ 14376714d8e8SKurt Hackel struct dlm_ctxt * dlm_register_domain(const char *domain, 14386714d8e8SKurt Hackel u32 key) 14396714d8e8SKurt Hackel { 14406714d8e8SKurt Hackel int ret; 14416714d8e8SKurt Hackel struct dlm_ctxt *dlm = NULL; 14426714d8e8SKurt Hackel struct dlm_ctxt *new_ctxt = NULL; 14436714d8e8SKurt Hackel 14446714d8e8SKurt Hackel if (strlen(domain) > O2NM_MAX_NAME_LEN) { 14456714d8e8SKurt Hackel ret = -ENAMETOOLONG; 14466714d8e8SKurt Hackel mlog(ML_ERROR, "domain name length too long\n"); 14476714d8e8SKurt Hackel goto leave; 14486714d8e8SKurt Hackel } 14496714d8e8SKurt Hackel 14506714d8e8SKurt Hackel if (!o2hb_check_local_node_heartbeating()) { 14516714d8e8SKurt Hackel mlog(ML_ERROR, "the local node has not been configured, or is " 14526714d8e8SKurt Hackel "not heartbeating\n"); 14536714d8e8SKurt Hackel ret = -EPROTO; 14546714d8e8SKurt Hackel goto leave; 14556714d8e8SKurt Hackel } 14566714d8e8SKurt Hackel 14576714d8e8SKurt Hackel mlog(0, "register called for domain \"%s\"\n", domain); 14586714d8e8SKurt Hackel 14596714d8e8SKurt Hackel retry: 14606714d8e8SKurt Hackel dlm = NULL; 14616714d8e8SKurt Hackel if (signal_pending(current)) { 14626714d8e8SKurt Hackel ret = -ERESTARTSYS; 14636714d8e8SKurt Hackel mlog_errno(ret); 14646714d8e8SKurt Hackel goto leave; 14656714d8e8SKurt Hackel } 14666714d8e8SKurt Hackel 14676714d8e8SKurt Hackel spin_lock(&dlm_domain_lock); 14686714d8e8SKurt Hackel 14696714d8e8SKurt Hackel dlm = __dlm_lookup_domain(domain); 14706714d8e8SKurt Hackel if (dlm) { 14716714d8e8SKurt Hackel if (dlm->dlm_state != DLM_CTXT_JOINED) { 14726714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 14736714d8e8SKurt Hackel 14746714d8e8SKurt Hackel mlog(0, "This ctxt is not joined yet!\n"); 14756714d8e8SKurt Hackel wait_event_interruptible(dlm_domain_events, 14766714d8e8SKurt Hackel dlm_wait_on_domain_helper( 14776714d8e8SKurt Hackel domain)); 14786714d8e8SKurt Hackel goto retry; 14796714d8e8SKurt Hackel } 14806714d8e8SKurt Hackel 14816714d8e8SKurt Hackel __dlm_get(dlm); 14826714d8e8SKurt Hackel dlm->num_joins++; 14836714d8e8SKurt Hackel 14846714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 14856714d8e8SKurt Hackel 14866714d8e8SKurt Hackel ret = 0; 14876714d8e8SKurt Hackel goto leave; 14886714d8e8SKurt Hackel } 14896714d8e8SKurt Hackel 14906714d8e8SKurt Hackel /* doesn't exist */ 14916714d8e8SKurt Hackel if (!new_ctxt) { 14926714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 14936714d8e8SKurt Hackel 14946714d8e8SKurt Hackel new_ctxt = dlm_alloc_ctxt(domain, key); 14956714d8e8SKurt Hackel if (new_ctxt) 14966714d8e8SKurt Hackel goto retry; 14976714d8e8SKurt Hackel 14986714d8e8SKurt Hackel ret = -ENOMEM; 14996714d8e8SKurt Hackel mlog_errno(ret); 15006714d8e8SKurt Hackel goto leave; 15016714d8e8SKurt Hackel } 15026714d8e8SKurt Hackel 15036714d8e8SKurt Hackel /* a little variable switch-a-roo here... */ 15046714d8e8SKurt Hackel dlm = new_ctxt; 15056714d8e8SKurt Hackel new_ctxt = NULL; 15066714d8e8SKurt Hackel 15076714d8e8SKurt Hackel /* add the new domain */ 15086714d8e8SKurt Hackel list_add_tail(&dlm->list, &dlm_domains); 15096714d8e8SKurt Hackel spin_unlock(&dlm_domain_lock); 15106714d8e8SKurt Hackel 15116714d8e8SKurt Hackel ret = dlm_join_domain(dlm); 15126714d8e8SKurt Hackel if (ret) { 15136714d8e8SKurt Hackel mlog_errno(ret); 15146714d8e8SKurt Hackel dlm_put(dlm); 15156714d8e8SKurt Hackel goto leave; 15166714d8e8SKurt Hackel } 15176714d8e8SKurt Hackel 15186714d8e8SKurt Hackel ret = 0; 15196714d8e8SKurt Hackel leave: 15206714d8e8SKurt Hackel if (new_ctxt) 15216714d8e8SKurt Hackel dlm_free_ctxt_mem(new_ctxt); 15226714d8e8SKurt Hackel 15236714d8e8SKurt Hackel if (ret < 0) 15246714d8e8SKurt Hackel dlm = ERR_PTR(ret); 15256714d8e8SKurt Hackel 15266714d8e8SKurt Hackel return dlm; 15276714d8e8SKurt Hackel } 15286714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_register_domain); 15296714d8e8SKurt Hackel 15306714d8e8SKurt Hackel static LIST_HEAD(dlm_join_handlers); 15316714d8e8SKurt Hackel 15326714d8e8SKurt Hackel static void dlm_unregister_net_handlers(void) 15336714d8e8SKurt Hackel { 15346714d8e8SKurt Hackel o2net_unregister_handler_list(&dlm_join_handlers); 15356714d8e8SKurt Hackel } 15366714d8e8SKurt Hackel 15376714d8e8SKurt Hackel static int dlm_register_net_handlers(void) 15386714d8e8SKurt Hackel { 15396714d8e8SKurt Hackel int status = 0; 15406714d8e8SKurt Hackel 15416714d8e8SKurt Hackel status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, 15426714d8e8SKurt Hackel sizeof(struct dlm_query_join_request), 15436714d8e8SKurt Hackel dlm_query_join_handler, 1544d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 15456714d8e8SKurt Hackel if (status) 15466714d8e8SKurt Hackel goto bail; 15476714d8e8SKurt Hackel 15486714d8e8SKurt Hackel status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 15496714d8e8SKurt Hackel sizeof(struct dlm_assert_joined), 15506714d8e8SKurt Hackel dlm_assert_joined_handler, 1551d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 15526714d8e8SKurt Hackel if (status) 15536714d8e8SKurt Hackel goto bail; 15546714d8e8SKurt Hackel 15556714d8e8SKurt Hackel status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 15566714d8e8SKurt Hackel sizeof(struct dlm_cancel_join), 15576714d8e8SKurt Hackel dlm_cancel_join_handler, 1558d74c9803SKurt Hackel NULL, NULL, &dlm_join_handlers); 15596714d8e8SKurt Hackel 15606714d8e8SKurt Hackel bail: 15616714d8e8SKurt Hackel if (status < 0) 15626714d8e8SKurt Hackel dlm_unregister_net_handlers(); 15636714d8e8SKurt Hackel 15646714d8e8SKurt Hackel return status; 15656714d8e8SKurt Hackel } 15666714d8e8SKurt Hackel 15676714d8e8SKurt Hackel /* Domain eviction callback handling. 15686714d8e8SKurt Hackel * 15696714d8e8SKurt Hackel * The file system requires notification of node death *before* the 15706714d8e8SKurt Hackel * dlm completes it's recovery work, otherwise it may be able to 15716714d8e8SKurt Hackel * acquire locks on resources requiring recovery. Since the dlm can 15726714d8e8SKurt Hackel * evict a node from it's domain *before* heartbeat fires, a similar 15736714d8e8SKurt Hackel * mechanism is required. */ 15746714d8e8SKurt Hackel 15756714d8e8SKurt Hackel /* Eviction is not expected to happen often, so a per-domain lock is 15766714d8e8SKurt Hackel * not necessary. Eviction callbacks are allowed to sleep for short 15776714d8e8SKurt Hackel * periods of time. */ 15786714d8e8SKurt Hackel static DECLARE_RWSEM(dlm_callback_sem); 15796714d8e8SKurt Hackel 15806714d8e8SKurt Hackel void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, 15816714d8e8SKurt Hackel int node_num) 15826714d8e8SKurt Hackel { 15836714d8e8SKurt Hackel struct list_head *iter; 15846714d8e8SKurt Hackel struct dlm_eviction_cb *cb; 15856714d8e8SKurt Hackel 15866714d8e8SKurt Hackel down_read(&dlm_callback_sem); 15876714d8e8SKurt Hackel list_for_each(iter, &dlm->dlm_eviction_callbacks) { 15886714d8e8SKurt Hackel cb = list_entry(iter, struct dlm_eviction_cb, ec_item); 15896714d8e8SKurt Hackel 15906714d8e8SKurt Hackel cb->ec_func(node_num, cb->ec_data); 15916714d8e8SKurt Hackel } 15926714d8e8SKurt Hackel up_read(&dlm_callback_sem); 15936714d8e8SKurt Hackel } 15946714d8e8SKurt Hackel 15956714d8e8SKurt Hackel void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb, 15966714d8e8SKurt Hackel dlm_eviction_func *f, 15976714d8e8SKurt Hackel void *data) 15986714d8e8SKurt Hackel { 15996714d8e8SKurt Hackel INIT_LIST_HEAD(&cb->ec_item); 16006714d8e8SKurt Hackel cb->ec_func = f; 16016714d8e8SKurt Hackel cb->ec_data = data; 16026714d8e8SKurt Hackel } 16036714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_setup_eviction_cb); 16046714d8e8SKurt Hackel 16056714d8e8SKurt Hackel void dlm_register_eviction_cb(struct dlm_ctxt *dlm, 16066714d8e8SKurt Hackel struct dlm_eviction_cb *cb) 16076714d8e8SKurt Hackel { 16086714d8e8SKurt Hackel down_write(&dlm_callback_sem); 16096714d8e8SKurt Hackel list_add_tail(&cb->ec_item, &dlm->dlm_eviction_callbacks); 16106714d8e8SKurt Hackel up_write(&dlm_callback_sem); 16116714d8e8SKurt Hackel } 16126714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_register_eviction_cb); 16136714d8e8SKurt Hackel 16146714d8e8SKurt Hackel void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb) 16156714d8e8SKurt Hackel { 16166714d8e8SKurt Hackel down_write(&dlm_callback_sem); 16176714d8e8SKurt Hackel list_del_init(&cb->ec_item); 16186714d8e8SKurt Hackel up_write(&dlm_callback_sem); 16196714d8e8SKurt Hackel } 16206714d8e8SKurt Hackel EXPORT_SYMBOL_GPL(dlm_unregister_eviction_cb); 16216714d8e8SKurt Hackel 16226714d8e8SKurt Hackel static int __init dlm_init(void) 16236714d8e8SKurt Hackel { 16246714d8e8SKurt Hackel int status; 16256714d8e8SKurt Hackel 16266714d8e8SKurt Hackel dlm_print_version(); 16276714d8e8SKurt Hackel 16286714d8e8SKurt Hackel status = dlm_init_mle_cache(); 16296714d8e8SKurt Hackel if (status) 16306714d8e8SKurt Hackel return -1; 16316714d8e8SKurt Hackel 16326714d8e8SKurt Hackel status = dlm_register_net_handlers(); 16336714d8e8SKurt Hackel if (status) { 16346714d8e8SKurt Hackel dlm_destroy_mle_cache(); 16356714d8e8SKurt Hackel return -1; 16366714d8e8SKurt Hackel } 16376714d8e8SKurt Hackel 16386714d8e8SKurt Hackel return 0; 16396714d8e8SKurt Hackel } 16406714d8e8SKurt Hackel 16416714d8e8SKurt Hackel static void __exit dlm_exit (void) 16426714d8e8SKurt Hackel { 16436714d8e8SKurt Hackel dlm_unregister_net_handlers(); 16446714d8e8SKurt Hackel dlm_destroy_mle_cache(); 16456714d8e8SKurt Hackel } 16466714d8e8SKurt Hackel 16476714d8e8SKurt Hackel MODULE_AUTHOR("Oracle"); 16486714d8e8SKurt Hackel MODULE_LICENSE("GPL"); 16496714d8e8SKurt Hackel 16506714d8e8SKurt Hackel module_init(dlm_init); 16516714d8e8SKurt Hackel module_exit(dlm_exit); 1652