Commit | Line | Data |
---|---|---|
925baedd CM |
1 | /* |
2 | * Copyright (C) 2008 Oracle. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public | |
6 | * License v2 as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public | |
14 | * License along with this program; if not, write to the | |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
16 | * Boston, MA 021110-1307, USA. | |
17 | */ | |
18 | #include <linux/sched.h> | |
19 | #include <linux/gfp.h> | |
20 | #include <linux/pagemap.h> | |
21 | #include <linux/spinlock.h> | |
22 | #include <linux/page-flags.h> | |
4881ee5a | 23 | #include <asm/bug.h> |
925baedd CM |
24 | #include "ctree.h" |
25 | #include "extent_io.h" | |
26 | #include "locking.h" | |
27 | ||
b4ce94de CM |
28 | static inline void spin_nested(struct extent_buffer *eb) |
29 | { | |
30 | spin_lock(&eb->lock); | |
31 | } | |
d397712b | 32 | |
b4ce94de CM |
33 | /* |
34 | * Setting a lock to blocking will drop the spinlock and set the | |
35 | * flag that forces other procs who want the lock to wait. After | |
36 | * this you can safely schedule with the lock held. | |
37 | */ | |
38 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | |
925baedd | 39 | { |
b4ce94de CM |
40 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { |
41 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | |
42 | spin_unlock(&eb->lock); | |
43 | } | |
44 | /* exit with the spin lock released and the bit set */ | |
45 | } | |
f9efa9c7 | 46 | |
b4ce94de CM |
47 | /* |
48 | * clearing the blocking flag will take the spinlock again. | |
49 | * After this you can't safely schedule | |
50 | */ | |
51 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |
52 | { | |
53 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | |
54 | spin_nested(eb); | |
55 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | |
56 | smp_mb__after_clear_bit(); | |
57 | } | |
58 | /* exit with the spin lock held */ | |
59 | } | |
60 | ||
61 | /* | |
62 | * unfortunately, many of the places that currently set a lock to blocking | |
d4a78947 WF |
63 | * don't end up blocking for very long, and often they don't block |
64 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | |
b4ce94de CM |
65 | * at all, the context switch rate can jump up to 400,000/sec or more. |
66 | * | |
67 | * So, we're still stuck with this crummy spin on the blocking bit, | |
68 | * at least until the most common causes of the short blocks | |
69 | * can be dealt with. | |
70 | */ | |
71 | static int btrfs_spin_on_block(struct extent_buffer *eb) | |
72 | { | |
73 | int i; | |
66d7e85e | 74 | |
f9efa9c7 | 75 | for (i = 0; i < 512; i++) { |
b4ce94de CM |
76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
77 | return 1; | |
78 | if (need_resched()) | |
79 | break; | |
66d7e85e | 80 | cpu_relax(); |
b4ce94de CM |
81 | } |
82 | return 0; | |
83 | } | |
84 | ||
85 | /* | |
86 | * This is somewhat different from trylock. It will take the | |
87 | * spinlock but if it finds the lock is set to blocking, it will | |
88 | * return without the lock held. | |
89 | * | |
90 | * returns 1 if it was able to take the lock and zero otherwise | |
91 | * | |
92 | * After this call, scheduling is not safe without first calling | |
93 | * btrfs_set_lock_blocking() | |
94 | */ | |
95 | int btrfs_try_spin_lock(struct extent_buffer *eb) | |
96 | { | |
97 | int i; | |
98 | ||
b9473439 CM |
99 | if (btrfs_spin_on_block(eb)) { |
100 | spin_nested(eb); | |
101 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
102 | return 1; | |
103 | spin_unlock(&eb->lock); | |
104 | } | |
b4ce94de CM |
105 | /* spin for a bit on the BLOCKING flag */ |
106 | for (i = 0; i < 2; i++) { | |
66d7e85e | 107 | cpu_relax(); |
b4ce94de CM |
108 | if (!btrfs_spin_on_block(eb)) |
109 | break; | |
110 | ||
111 | spin_nested(eb); | |
112 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
113 | return 1; | |
114 | spin_unlock(&eb->lock); | |
115 | } | |
116 | return 0; | |
117 | } | |
118 | ||
119 | /* | |
120 | * the autoremove wake function will return 0 if it tried to wake up | |
121 | * a process that was already awake, which means that process won't | |
122 | * count as an exclusive wakeup. The waitq code will continue waking | |
123 | * procs until it finds one that was actually sleeping. | |
124 | * | |
125 | * For btrfs, this isn't quite what we want. We want a single proc | |
126 | * to be notified that the lock is ready for taking. If that proc | |
127 | * already happen to be awake, great, it will loop around and try for | |
128 | * the lock. | |
129 | * | |
130 | * So, btrfs_wake_function always returns 1, even when the proc that we | |
131 | * tried to wake up was already awake. | |
132 | */ | |
133 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | |
134 | int sync, void *key) | |
135 | { | |
136 | autoremove_wake_function(wait, mode, sync, key); | |
137 | return 1; | |
138 | } | |
139 | ||
140 | /* | |
141 | * returns with the extent buffer spinlocked. | |
142 | * | |
143 | * This will spin and/or wait as required to take the lock, and then | |
144 | * return with the spinlock held. | |
145 | * | |
146 | * After this call, scheduling is not safe without first calling | |
147 | * btrfs_set_lock_blocking() | |
148 | */ | |
149 | int btrfs_tree_lock(struct extent_buffer *eb) | |
150 | { | |
151 | DEFINE_WAIT(wait); | |
152 | wait.func = btrfs_wake_function; | |
153 | ||
66d7e85e CM |
154 | if (!btrfs_spin_on_block(eb)) |
155 | goto sleep; | |
156 | ||
b4ce94de CM |
157 | while(1) { |
158 | spin_nested(eb); | |
159 | ||
160 | /* nobody is blocking, exit with the spinlock held */ | |
161 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
f9efa9c7 | 162 | return 0; |
b4ce94de CM |
163 | |
164 | /* | |
165 | * we have the spinlock, but the real owner is blocking. | |
166 | * wait for them | |
167 | */ | |
168 | spin_unlock(&eb->lock); | |
169 | ||
170 | /* | |
171 | * spin for a bit, and if the blocking flag goes away, | |
172 | * loop around | |
173 | */ | |
66d7e85e | 174 | cpu_relax(); |
b4ce94de CM |
175 | if (btrfs_spin_on_block(eb)) |
176 | continue; | |
66d7e85e | 177 | sleep: |
b4ce94de CM |
178 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, |
179 | TASK_UNINTERRUPTIBLE); | |
180 | ||
181 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
182 | schedule(); | |
183 | ||
184 | finish_wait(&eb->lock_wq, &wait); | |
f9efa9c7 | 185 | } |
925baedd CM |
186 | return 0; |
187 | } | |
188 | ||
b4ce94de CM |
189 | /* |
190 | * Very quick trylock, this does not spin or schedule. It returns | |
191 | * 1 with the spinlock held if it was able to take the lock, or it | |
192 | * returns zero if it was unable to take the lock. | |
193 | * | |
194 | * After this call, scheduling is not safe without first calling | |
195 | * btrfs_set_lock_blocking() | |
196 | */ | |
925baedd CM |
197 | int btrfs_try_tree_lock(struct extent_buffer *eb) |
198 | { | |
b4ce94de CM |
199 | if (spin_trylock(&eb->lock)) { |
200 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | |
201 | /* | |
202 | * we've got the spinlock, but the real owner is | |
203 | * blocking. Drop the spinlock and return failure | |
204 | */ | |
205 | spin_unlock(&eb->lock); | |
206 | return 0; | |
207 | } | |
208 | return 1; | |
209 | } | |
210 | /* someone else has the spinlock giveup */ | |
211 | return 0; | |
925baedd CM |
212 | } |
213 | ||
214 | int btrfs_tree_unlock(struct extent_buffer *eb) | |
215 | { | |
b4ce94de CM |
216 | /* |
217 | * if we were a blocking owner, we don't have the spinlock held | |
218 | * just clear the bit and look for waiters | |
219 | */ | |
220 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | |
221 | smp_mb__after_clear_bit(); | |
222 | else | |
223 | spin_unlock(&eb->lock); | |
224 | ||
225 | if (waitqueue_active(&eb->lock_wq)) | |
226 | wake_up(&eb->lock_wq); | |
925baedd CM |
227 | return 0; |
228 | } | |
229 | ||
b9447ef8 | 230 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
925baedd | 231 | { |
b9447ef8 CM |
232 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
233 | assert_spin_locked(&eb->lock); | |
925baedd | 234 | } |