Commit | Line | Data |
---|---|---|
925baedd CM |
1 | /* |
2 | * Copyright (C) 2008 Oracle. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public | |
6 | * License v2 as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public | |
14 | * License along with this program; if not, write to the | |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
16 | * Boston, MA 021110-1307, USA. | |
17 | */ | |
18 | #include <linux/sched.h> | |
925baedd CM |
19 | #include <linux/pagemap.h> |
20 | #include <linux/spinlock.h> | |
21 | #include <linux/page-flags.h> | |
4881ee5a | 22 | #include <asm/bug.h> |
925baedd CM |
23 | #include "ctree.h" |
24 | #include "extent_io.h" | |
25 | #include "locking.h" | |
26 | ||
b4ce94de CM |
27 | static inline void spin_nested(struct extent_buffer *eb) |
28 | { | |
29 | spin_lock(&eb->lock); | |
30 | } | |
d397712b | 31 | |
b4ce94de CM |
32 | /* |
33 | * Setting a lock to blocking will drop the spinlock and set the | |
34 | * flag that forces other procs who want the lock to wait. After | |
35 | * this you can safely schedule with the lock held. | |
36 | */ | |
37 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | |
925baedd | 38 | { |
b4ce94de CM |
39 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { |
40 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | |
41 | spin_unlock(&eb->lock); | |
42 | } | |
43 | /* exit with the spin lock released and the bit set */ | |
44 | } | |
f9efa9c7 | 45 | |
b4ce94de CM |
46 | /* |
47 | * clearing the blocking flag will take the spinlock again. | |
48 | * After this you can't safely schedule | |
49 | */ | |
50 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |
51 | { | |
52 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | |
53 | spin_nested(eb); | |
54 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | |
55 | smp_mb__after_clear_bit(); | |
56 | } | |
57 | /* exit with the spin lock held */ | |
58 | } | |
59 | ||
60 | /* | |
61 | * unfortunately, many of the places that currently set a lock to blocking | |
d4a78947 WF |
62 | * don't end up blocking for very long, and often they don't block |
63 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | |
b4ce94de CM |
64 | * at all, the context switch rate can jump up to 400,000/sec or more. |
65 | * | |
66 | * So, we're still stuck with this crummy spin on the blocking bit, | |
67 | * at least until the most common causes of the short blocks | |
68 | * can be dealt with. | |
69 | */ | |
70 | static int btrfs_spin_on_block(struct extent_buffer *eb) | |
71 | { | |
72 | int i; | |
66d7e85e | 73 | |
f9efa9c7 | 74 | for (i = 0; i < 512; i++) { |
b4ce94de CM |
75 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
76 | return 1; | |
77 | if (need_resched()) | |
78 | break; | |
66d7e85e | 79 | cpu_relax(); |
b4ce94de CM |
80 | } |
81 | return 0; | |
82 | } | |
83 | ||
84 | /* | |
85 | * This is somewhat different from trylock. It will take the | |
86 | * spinlock but if it finds the lock is set to blocking, it will | |
87 | * return without the lock held. | |
88 | * | |
89 | * returns 1 if it was able to take the lock and zero otherwise | |
90 | * | |
91 | * After this call, scheduling is not safe without first calling | |
92 | * btrfs_set_lock_blocking() | |
93 | */ | |
94 | int btrfs_try_spin_lock(struct extent_buffer *eb) | |
95 | { | |
96 | int i; | |
97 | ||
b9473439 CM |
98 | if (btrfs_spin_on_block(eb)) { |
99 | spin_nested(eb); | |
100 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
101 | return 1; | |
102 | spin_unlock(&eb->lock); | |
103 | } | |
b4ce94de CM |
104 | /* spin for a bit on the BLOCKING flag */ |
105 | for (i = 0; i < 2; i++) { | |
66d7e85e | 106 | cpu_relax(); |
b4ce94de CM |
107 | if (!btrfs_spin_on_block(eb)) |
108 | break; | |
109 | ||
110 | spin_nested(eb); | |
111 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
112 | return 1; | |
113 | spin_unlock(&eb->lock); | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | /* | |
119 | * the autoremove wake function will return 0 if it tried to wake up | |
120 | * a process that was already awake, which means that process won't | |
121 | * count as an exclusive wakeup. The waitq code will continue waking | |
122 | * procs until it finds one that was actually sleeping. | |
123 | * | |
124 | * For btrfs, this isn't quite what we want. We want a single proc | |
125 | * to be notified that the lock is ready for taking. If that proc | |
126 | * already happen to be awake, great, it will loop around and try for | |
127 | * the lock. | |
128 | * | |
129 | * So, btrfs_wake_function always returns 1, even when the proc that we | |
130 | * tried to wake up was already awake. | |
131 | */ | |
132 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | |
133 | int sync, void *key) | |
134 | { | |
135 | autoremove_wake_function(wait, mode, sync, key); | |
136 | return 1; | |
137 | } | |
138 | ||
139 | /* | |
140 | * returns with the extent buffer spinlocked. | |
141 | * | |
142 | * This will spin and/or wait as required to take the lock, and then | |
143 | * return with the spinlock held. | |
144 | * | |
145 | * After this call, scheduling is not safe without first calling | |
146 | * btrfs_set_lock_blocking() | |
147 | */ | |
148 | int btrfs_tree_lock(struct extent_buffer *eb) | |
149 | { | |
150 | DEFINE_WAIT(wait); | |
151 | wait.func = btrfs_wake_function; | |
152 | ||
66d7e85e CM |
153 | if (!btrfs_spin_on_block(eb)) |
154 | goto sleep; | |
155 | ||
b4ce94de CM |
156 | while(1) { |
157 | spin_nested(eb); | |
158 | ||
159 | /* nobody is blocking, exit with the spinlock held */ | |
160 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
f9efa9c7 | 161 | return 0; |
b4ce94de CM |
162 | |
163 | /* | |
164 | * we have the spinlock, but the real owner is blocking. | |
165 | * wait for them | |
166 | */ | |
167 | spin_unlock(&eb->lock); | |
168 | ||
169 | /* | |
170 | * spin for a bit, and if the blocking flag goes away, | |
171 | * loop around | |
172 | */ | |
66d7e85e | 173 | cpu_relax(); |
b4ce94de CM |
174 | if (btrfs_spin_on_block(eb)) |
175 | continue; | |
66d7e85e | 176 | sleep: |
b4ce94de CM |
177 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, |
178 | TASK_UNINTERRUPTIBLE); | |
179 | ||
180 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
181 | schedule(); | |
182 | ||
183 | finish_wait(&eb->lock_wq, &wait); | |
f9efa9c7 | 184 | } |
925baedd CM |
185 | return 0; |
186 | } | |
187 | ||
b4ce94de CM |
188 | /* |
189 | * Very quick trylock, this does not spin or schedule. It returns | |
190 | * 1 with the spinlock held if it was able to take the lock, or it | |
191 | * returns zero if it was unable to take the lock. | |
192 | * | |
193 | * After this call, scheduling is not safe without first calling | |
194 | * btrfs_set_lock_blocking() | |
195 | */ | |
925baedd CM |
196 | int btrfs_try_tree_lock(struct extent_buffer *eb) |
197 | { | |
b4ce94de CM |
198 | if (spin_trylock(&eb->lock)) { |
199 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | |
200 | /* | |
201 | * we've got the spinlock, but the real owner is | |
202 | * blocking. Drop the spinlock and return failure | |
203 | */ | |
204 | spin_unlock(&eb->lock); | |
205 | return 0; | |
206 | } | |
207 | return 1; | |
208 | } | |
209 | /* someone else has the spinlock giveup */ | |
210 | return 0; | |
925baedd CM |
211 | } |
212 | ||
213 | int btrfs_tree_unlock(struct extent_buffer *eb) | |
214 | { | |
b4ce94de CM |
215 | /* |
216 | * if we were a blocking owner, we don't have the spinlock held | |
217 | * just clear the bit and look for waiters | |
218 | */ | |
219 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
220 | smp_mb__after_clear_bit(); | |
221 | else | |
222 | spin_unlock(&eb->lock); | |
223 | ||
224 | if (waitqueue_active(&eb->lock_wq)) | |
225 | wake_up(&eb->lock_wq); | |
925baedd CM |
226 | return 0; |
227 | } | |
228 | ||
b9447ef8 | 229 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
925baedd | 230 | { |
b9447ef8 CM |
231 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
232 | assert_spin_locked(&eb->lock); | |
925baedd | 233 | } |