btrfs: delayed-ref: Use list to replace the ref_root in ref_head.
This patch replace the rbtree used in ref_head to list. This has the following advantage: 1) Easier merge logic. With the new list implement, we only need to care merging the tail ref_node with the new ref_node. And this can be done quite easy at insert time, no need to do a indicated merge at run_delayed_refs(). Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
00db646d3f
commit
c6fc245499
|
@ -574,8 +574,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
|
|||
struct list_head *prefs, u64 *total_refs,
|
||||
u64 inum)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *node;
|
||||
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
|
||||
struct rb_node *n = &head->node.rb_node;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key op_key = {0};
|
||||
int sgn;
|
||||
|
@ -585,12 +585,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
|
|||
btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
|
||||
|
||||
spin_lock(&head->lock);
|
||||
n = rb_first(&head->ref_root);
|
||||
while (n) {
|
||||
struct btrfs_delayed_ref_node *node;
|
||||
node = rb_entry(n, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
n = rb_next(n);
|
||||
list_for_each_entry(node, &head->ref_list, list) {
|
||||
if (node->seq > seq)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -268,7 +268,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
|
|||
rb_erase(&head->href_node, &delayed_refs->href_root);
|
||||
} else {
|
||||
assert_spin_locked(&head->lock);
|
||||
rb_erase(&ref->rb_node, &head->ref_root);
|
||||
list_del(&ref->list);
|
||||
}
|
||||
ref->in_tree = 0;
|
||||
btrfs_put_delayed_ref(ref);
|
||||
|
@ -328,48 +328,6 @@ static int merge_ref(struct btrfs_trans_handle *trans,
|
|||
return done;
|
||||
}
|
||||
|
||||
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct rb_node *node;
|
||||
u64 seq = 0;
|
||||
|
||||
assert_spin_locked(&head->lock);
|
||||
/*
|
||||
* We don't have too much refs to merge in the case of delayed data
|
||||
* refs.
|
||||
*/
|
||||
if (head->is_data)
|
||||
return;
|
||||
|
||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||
struct seq_list *elem;
|
||||
|
||||
elem = list_first_entry(&fs_info->tree_mod_seq_list,
|
||||
struct seq_list, list);
|
||||
seq = elem->seq;
|
||||
}
|
||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
||||
|
||||
node = rb_first(&head->ref_root);
|
||||
while (node) {
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
/* We can't merge refs that are outside of our seq count */
|
||||
if (seq && ref->seq >= seq)
|
||||
break;
|
||||
if (merge_ref(trans, delayed_refs, head, ref, seq))
|
||||
node = rb_first(&head->ref_root);
|
||||
else
|
||||
node = rb_next(&ref->rb_node);
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 seq)
|
||||
|
@ -484,6 +442,74 @@ update_existing_ref(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper to insert the ref_node to the tail or merge with tail.
|
||||
*
|
||||
* Return 0 for insert.
|
||||
* Return >0 for merge.
|
||||
*/
|
||||
static int
|
||||
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *root,
|
||||
struct btrfs_delayed_ref_head *href,
|
||||
struct btrfs_delayed_ref_node *ref)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *exist;
|
||||
int mod;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&href->lock);
|
||||
/* Check whether we can merge the tail node with ref */
|
||||
if (list_empty(&href->ref_list))
|
||||
goto add_tail;
|
||||
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
/* No need to compare bytenr nor is_head */
|
||||
if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
|
||||
exist->seq != ref->seq)
|
||||
goto add_tail;
|
||||
|
||||
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
|
||||
comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
|
||||
btrfs_delayed_node_to_tree_ref(ref),
|
||||
ref->type))
|
||||
goto add_tail;
|
||||
if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
|
||||
exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
|
||||
comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
|
||||
btrfs_delayed_node_to_data_ref(ref)))
|
||||
goto add_tail;
|
||||
|
||||
/* Now we are sure we can merge */
|
||||
ret = 1;
|
||||
if (exist->action == ref->action) {
|
||||
mod = ref->ref_mod;
|
||||
} else {
|
||||
/* Need to change action */
|
||||
if (exist->ref_mod < ref->ref_mod) {
|
||||
exist->action = ref->action;
|
||||
mod = -exist->ref_mod;
|
||||
exist->ref_mod = ref->ref_mod;
|
||||
} else
|
||||
mod = -ref->ref_mod;
|
||||
}
|
||||
exist->ref_mod += mod;
|
||||
|
||||
/* remove existing tail if its ref_mod is zero */
|
||||
if (exist->ref_mod == 0)
|
||||
drop_delayed_ref(trans, root, href, exist);
|
||||
spin_unlock(&href->lock);
|
||||
return ret;
|
||||
|
||||
add_tail:
|
||||
list_add_tail(&ref->list, &href->ref_list);
|
||||
atomic_inc(&root->num_entries);
|
||||
trans->delayed_ref_updates++;
|
||||
spin_unlock(&href->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function to update the accounting in the head ref
|
||||
* existing and update must have the same bytenr
|
||||
|
@ -618,7 +644,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
|||
head_ref = btrfs_delayed_node_to_head(ref);
|
||||
head_ref->must_insert_reserved = must_insert_reserved;
|
||||
head_ref->is_data = is_data;
|
||||
head_ref->ref_root = RB_ROOT;
|
||||
INIT_LIST_HEAD(&head_ref->ref_list);
|
||||
head_ref->processing = 0;
|
||||
head_ref->total_ref_mod = count_mod;
|
||||
|
||||
|
@ -659,10 +685,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
|||
u64 num_bytes, u64 parent, u64 ref_root, int level,
|
||||
int action, int no_quota)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *existing;
|
||||
struct btrfs_delayed_tree_ref *full_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
u64 seq = 0;
|
||||
int ret;
|
||||
|
||||
if (action == BTRFS_ADD_DELAYED_EXTENT)
|
||||
action = BTRFS_ADD_DELAYED_REF;
|
||||
|
@ -693,21 +719,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
|||
|
||||
trace_add_delayed_tree_ref(ref, full_ref, action);
|
||||
|
||||
spin_lock(&head_ref->lock);
|
||||
existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
|
||||
if (existing) {
|
||||
update_existing_ref(trans, delayed_refs, head_ref, existing,
|
||||
ref);
|
||||
/*
|
||||
* we've updated the existing ref, free the newly
|
||||
* allocated ref
|
||||
*/
|
||||
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
|
||||
|
||||
/*
|
||||
* XXX: memory should be freed at the same level allocated.
|
||||
* But bad practice is anywhere... Follow it now. Need cleanup.
|
||||
*/
|
||||
if (ret > 0)
|
||||
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
|
||||
} else {
|
||||
atomic_inc(&delayed_refs->num_entries);
|
||||
trans->delayed_ref_updates++;
|
||||
}
|
||||
spin_unlock(&head_ref->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -721,10 +740,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
|||
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
|
||||
u64 offset, int action, int no_quota)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *existing;
|
||||
struct btrfs_delayed_data_ref *full_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
u64 seq = 0;
|
||||
int ret;
|
||||
|
||||
if (action == BTRFS_ADD_DELAYED_EXTENT)
|
||||
action = BTRFS_ADD_DELAYED_REF;
|
||||
|
@ -758,21 +777,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
|||
|
||||
trace_add_delayed_data_ref(ref, full_ref, action);
|
||||
|
||||
spin_lock(&head_ref->lock);
|
||||
existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
|
||||
if (existing) {
|
||||
update_existing_ref(trans, delayed_refs, head_ref, existing,
|
||||
ref);
|
||||
/*
|
||||
* we've updated the existing ref, free the newly
|
||||
* allocated ref
|
||||
*/
|
||||
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
|
||||
|
||||
if (ret > 0)
|
||||
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
|
||||
} else {
|
||||
atomic_inc(&delayed_refs->num_entries);
|
||||
trans->delayed_ref_updates++;
|
||||
}
|
||||
spin_unlock(&head_ref->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -24,9 +24,25 @@
|
|||
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
|
||||
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
|
||||
|
||||
/*
|
||||
* XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
|
||||
* same ref_node structure.
|
||||
* Ref_head is in a higher logic level than tree/data ref, and duplicated
|
||||
* bytenr/num_bytes in ref_node is really a waste or memory, they should be
|
||||
* referred from ref_head.
|
||||
* This gets more disgusting after we use list to store tree/data ref in
|
||||
* ref_head. Must clean this mess up later.
|
||||
*/
|
||||
struct btrfs_delayed_ref_node {
|
||||
/*
|
||||
* ref_head use rb tree, stored in ref_root->href.
|
||||
* indexed by bytenr
|
||||
*/
|
||||
struct rb_node rb_node;
|
||||
|
||||
/*data/tree ref use list, stored in ref_head->ref_list. */
|
||||
struct list_head list;
|
||||
|
||||
/* the starting bytenr of the extent */
|
||||
u64 bytenr;
|
||||
|
||||
|
@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head {
|
|||
struct mutex mutex;
|
||||
|
||||
spinlock_t lock;
|
||||
struct rb_root ref_root;
|
||||
struct list_head ref_list;
|
||||
|
||||
struct rb_node href_node;
|
||||
|
||||
|
|
|
@ -4062,6 +4062,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
|||
|
||||
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct btrfs_delayed_ref_node *tmp;
|
||||
bool pin_bytes = false;
|
||||
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
|
@ -4077,11 +4078,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
|||
continue;
|
||||
}
|
||||
spin_lock(&head->lock);
|
||||
while ((node = rb_first(&head->ref_root)) != NULL) {
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
|
||||
list) {
|
||||
ref->in_tree = 0;
|
||||
rb_erase(&ref->rb_node, &head->ref_root);
|
||||
list_del(&ref->list);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
btrfs_put_delayed_ref(ref);
|
||||
}
|
||||
|
|
|
@ -2323,28 +2323,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static noinline struct btrfs_delayed_ref_node *
|
||||
static inline struct btrfs_delayed_ref_node *
|
||||
select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct btrfs_delayed_ref_node *ref, *last = NULL;;
|
||||
if (list_empty(&head->ref_list))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* select delayed ref of type BTRFS_ADD_DELAYED_REF first.
|
||||
* this prevents ref count from going down to zero when
|
||||
* there still are pending delayed ref.
|
||||
*/
|
||||
node = rb_first(&head->ref_root);
|
||||
while (node) {
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
if (ref->action == BTRFS_ADD_DELAYED_REF)
|
||||
return ref;
|
||||
else if (last == NULL)
|
||||
last = ref;
|
||||
node = rb_next(node);
|
||||
}
|
||||
return last;
|
||||
return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2396,16 +2382,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to try and merge add/drops of the same ref since we
|
||||
* can run into issues with relocate dropping the implicit ref
|
||||
* and then it being added back again before the drop can
|
||||
* finish. If we merged anything we need to re-loop so we can
|
||||
* get a good ref.
|
||||
*/
|
||||
spin_lock(&locked_ref->lock);
|
||||
btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
|
||||
locked_ref);
|
||||
|
||||
/*
|
||||
* locked_ref is the head node, so we have to go one
|
||||
|
@ -2482,7 +2459,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
spin_unlock(&locked_ref->lock);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
spin_lock(&locked_ref->lock);
|
||||
if (rb_first(&locked_ref->ref_root) ||
|
||||
if (!list_empty(&locked_ref->ref_list) ||
|
||||
locked_ref->extent_op) {
|
||||
spin_unlock(&locked_ref->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
@ -2496,7 +2473,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
} else {
|
||||
actual_count++;
|
||||
ref->in_tree = 0;
|
||||
rb_erase(&ref->rb_node, &locked_ref->ref_root);
|
||||
list_del(&ref->list);
|
||||
}
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
|
||||
|
@ -2905,7 +2882,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_delayed_ref_node *ref;
|
||||
struct btrfs_delayed_data_ref *data_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct rb_node *node;
|
||||
int ret = 0;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
@ -2934,11 +2910,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
|
|||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
spin_lock(&head->lock);
|
||||
node = rb_first(&head->ref_root);
|
||||
while (node) {
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
|
||||
node = rb_next(node);
|
||||
|
||||
list_for_each_entry(ref, &head->ref_list, list) {
|
||||
/* If it's a shared ref we know a cross reference exists */
|
||||
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
|
||||
ret = 1;
|
||||
|
@ -6448,7 +6420,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|||
goto out_delayed_unlock;
|
||||
|
||||
spin_lock(&head->lock);
|
||||
if (rb_first(&head->ref_root))
|
||||
if (!list_empty(&head->ref_list))
|
||||
goto out;
|
||||
|
||||
if (head->extent_op) {
|
||||
|
|
Loading…
Reference in New Issue