1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 #ifndef _LINUX_DM_BTREE_H 7 #define _LINUX_DM_BTREE_H 8 9 #include "dm-block-manager.h" 10 11 struct dm_transaction_manager; 12 13 /*----------------------------------------------------------------*/ 14 15 /* 16 * Annotations used to check on-disk metadata is handled as little-endian. 17 */ 18 #ifdef __CHECKER__ 19 # define __dm_written_to_disk(x) __releases(x) 20 # define __dm_reads_from_disk(x) __acquires(x) 21 # define __dm_bless_for_disk(x) __acquire(x) 22 # define __dm_unbless_for_disk(x) __release(x) 23 #else 24 # define __dm_written_to_disk(x) 25 # define __dm_reads_from_disk(x) 26 # define __dm_bless_for_disk(x) 27 # define __dm_unbless_for_disk(x) 28 #endif 29 30 /*----------------------------------------------------------------*/ 31 32 /* 33 * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized 34 * values. 35 */ 36 37 /* 38 * Information about the values stored within the btree. 39 */ 40 struct dm_btree_value_type { 41 void *context; 42 43 /* 44 * The size in bytes of each value. 45 */ 46 uint32_t size; 47 48 /* 49 * Any of these methods can be safely set to NULL if you do not 50 * need the corresponding feature. 51 */ 52 53 /* 54 * The btree is making a duplicate of a run of values, for instance 55 * because previously-shared btree nodes have now diverged. 56 * @value argument is the new copy that the copy function may modify. 57 * (Probably it just wants to increment a reference count 58 * somewhere.) This method is _not_ called for insertion of a new 59 * value: It is assumed the ref count is already 1. 60 */ 61 void (*inc)(void *context, const void *value, unsigned count); 62 63 /* 64 * These values are being deleted. The btree takes care of freeing 65 * the memory pointed to by @value. Often the del function just 66 * needs to decrement a reference counts somewhere. 67 */ 68 void (*dec)(void *context, const void *value, unsigned count); 69 70 /* 71 * A test for equality between two values. When a value is 72 * overwritten with a new one, the old one has the dec method 73 * called _unless_ the new and old value are deemed equal. 74 */ 75 int (*equal)(void *context, const void *value1, const void *value2); 76 }; 77 78 /* 79 * The shape and contents of a btree. 80 */ 81 struct dm_btree_info { 82 struct dm_transaction_manager *tm; 83 84 /* 85 * Number of nested btrees. (Not the depth of a single tree.) 86 */ 87 unsigned levels; 88 struct dm_btree_value_type value_type; 89 }; 90 91 /* 92 * Set up an empty tree. O(1). 93 */ 94 int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root); 95 96 /* 97 * Delete a tree. O(n) - this is the slow one! It can also block, so 98 * please don't call it on an IO path. 99 */ 100 int dm_btree_del(struct dm_btree_info *info, dm_block_t root); 101 102 /* 103 * All the lookup functions return -ENODATA if the key cannot be found. 104 */ 105 106 /* 107 * Tries to find a key that matches exactly. O(ln(n)) 108 */ 109 int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 110 uint64_t *keys, void *value_le); 111 112 /* 113 * Tries to find the first key where the bottom level key is >= to that 114 * given. Useful for skipping empty sections of the btree. 115 */ 116 int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, 117 uint64_t *keys, uint64_t *rkey, void *value_le); 118 119 /* 120 * Insertion (or overwrite an existing value). O(ln(n)) 121 */ 122 int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 123 uint64_t *keys, void *value, dm_block_t *new_root) 124 __dm_written_to_disk(value); 125 126 /* 127 * A variant of insert that indicates whether it actually inserted or just 128 * overwrote. Useful if you're keeping track of the number of entries in a 129 * tree. 130 */ 131 int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 132 uint64_t *keys, void *value, dm_block_t *new_root, 133 int *inserted) 134 __dm_written_to_disk(value); 135 136 /* 137 * Remove a key if present. This doesn't remove empty sub trees. Normally 138 * subtrees represent a separate entity, like a snapshot map, so this is 139 * correct behaviour. O(ln(n)). 140 */ 141 int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, 142 uint64_t *keys, dm_block_t *new_root); 143 144 /* 145 * Removes a _contiguous_ run of values starting from 'keys' and not 146 * reaching keys2 (where keys2 is keys with the final key replaced with 147 * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be 148 * altered. 149 */ 150 int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, 151 uint64_t *keys, uint64_t end_key, 152 dm_block_t *new_root, unsigned *nr_removed); 153 154 /* 155 * Returns < 0 on failure. Otherwise the number of key entries that have 156 * been filled out. Remember trees can have zero entries, and as such have 157 * no lowest key. 158 */ 159 int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, 160 uint64_t *result_keys); 161 162 /* 163 * Returns < 0 on failure. Otherwise the number of key entries that have 164 * been filled out. Remember trees can have zero entries, and as such have 165 * no highest key. 166 */ 167 int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 168 uint64_t *result_keys); 169 170 /* 171 * Iterate through the a btree, calling fn() on each entry. 172 * It only works for single level trees and is internally recursive, so 173 * monitor stack usage carefully. 174 */ 175 int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 176 int (*fn)(void *context, uint64_t *keys, void *leaf), 177 void *context); 178 179 180 /*----------------------------------------------------------------*/ 181 182 /* 183 * Cursor API. This does not follow the rolling lock convention. Since we 184 * know the order that values are required we can issue prefetches to speed 185 * up iteration. Use on a single level btree only. 186 */ 187 #define DM_BTREE_CURSOR_MAX_DEPTH 16 188 189 struct cursor_node { 190 struct dm_block *b; 191 unsigned index; 192 }; 193 194 struct dm_btree_cursor { 195 struct dm_btree_info *info; 196 dm_block_t root; 197 198 bool prefetch_leaves; 199 unsigned depth; 200 struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; 201 }; 202 203 /* 204 * Creates a fresh cursor. If prefetch_leaves is set then it is assumed 205 * the btree contains block indexes that will be prefetched. The cursor is 206 * quite large, so you probably don't want to put it on the stack. 207 */ 208 int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 209 bool prefetch_leaves, struct dm_btree_cursor *c); 210 void dm_btree_cursor_end(struct dm_btree_cursor *c); 211 int dm_btree_cursor_next(struct dm_btree_cursor *c); 212 int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); 213 int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); 214 215 #endif /* _LINUX_DM_BTREE_H */ 216