Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/kernel/power/swsusp.c | |
3 | * | |
96bc7aec | 4 | * This file provides code to write suspend image to swap and read it back. |
1da177e4 LT |
5 | * |
6 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
25761b6e | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> |
1da177e4 LT |
8 | * |
9 | * This file is released under the GPLv2. | |
10 | * | |
11 | * I'd like to thank the following people for their work: | |
2e4d5822 | 12 | * |
1da177e4 LT |
13 | * Pavel Machek <pavel@ucw.cz>: |
14 | * Modifications, defectiveness pointing, being with me at the very beginning, | |
15 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | |
16 | * | |
2e4d5822 | 17 | * Steve Doddi <dirk@loth.demon.co.uk>: |
1da177e4 LT |
18 | * Support the possibility of hardware state restoring. |
19 | * | |
20 | * Raph <grey.havens@earthling.net>: | |
21 | * Support for preserving states of network devices and virtual console | |
22 | * (including X and svgatextmode) | |
23 | * | |
24 | * Kurt Garloff <garloff@suse.de>: | |
25 | * Straightened the critical function in order to prevent compilers from | |
26 | * playing tricks with local variables. | |
27 | * | |
28 | * Andreas Mohr <a.mohr@mailto.de> | |
29 | * | |
30 | * Alex Badea <vampire@go.ro>: | |
31 | * Fixed runaway init | |
32 | * | |
7088a5c0 RW |
33 | * Rafael J. Wysocki <rjw@sisk.pl> |
34 | * Added the swap map data structure and reworked the handling of swap | |
35 | * | |
1da177e4 LT |
36 | * More state savers are welcome. Especially for the scsi layer... |
37 | * | |
38 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | |
39 | */ | |
40 | ||
41 | #include <linux/module.h> | |
42 | #include <linux/mm.h> | |
43 | #include <linux/suspend.h> | |
44 | #include <linux/smp_lock.h> | |
45 | #include <linux/file.h> | |
46 | #include <linux/utsname.h> | |
47 | #include <linux/version.h> | |
48 | #include <linux/delay.h> | |
1da177e4 | 49 | #include <linux/bitops.h> |
1da177e4 LT |
50 | #include <linux/spinlock.h> |
51 | #include <linux/genhd.h> | |
52 | #include <linux/kernel.h> | |
53 | #include <linux/major.h> | |
54 | #include <linux/swap.h> | |
55 | #include <linux/pm.h> | |
56 | #include <linux/device.h> | |
57 | #include <linux/buffer_head.h> | |
58 | #include <linux/swapops.h> | |
59 | #include <linux/bootmem.h> | |
60 | #include <linux/syscalls.h> | |
1da177e4 LT |
61 | #include <linux/highmem.h> |
62 | #include <linux/bio.h> | |
63 | ||
64 | #include <asm/uaccess.h> | |
65 | #include <asm/mmu_context.h> | |
66 | #include <asm/pgtable.h> | |
67 | #include <asm/tlbflush.h> | |
68 | #include <asm/io.h> | |
69 | ||
70 | #include "power.h" | |
71 | ||
0fbeb5a4 RW |
72 | #ifdef CONFIG_HIGHMEM |
73 | int save_highmem(void); | |
74 | int restore_highmem(void); | |
75 | #else | |
76 | static int save_highmem(void) { return 0; } | |
77 | static int restore_highmem(void) { return 0; } | |
78 | #endif | |
79 | ||
1da177e4 LT |
80 | extern char resume_file[]; |
81 | ||
1da177e4 LT |
82 | #define SWSUSP_SIG "S1SUSPEND" |
83 | ||
84 | static struct swsusp_header { | |
f2d97f02 | 85 | char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; |
1da177e4 LT |
86 | swp_entry_t swsusp_info; |
87 | char orig_sig[10]; | |
88 | char sig[10]; | |
89 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; | |
90 | ||
91 | static struct swsusp_info swsusp_info; | |
92 | ||
1da177e4 LT |
93 | /* |
94 | * Saving part... | |
95 | */ | |
96 | ||
97 | /* We memorize in swapfile_used what swap devices are used for suspension */ | |
98 | #define SWAPFILE_UNUSED 0 | |
99 | #define SWAPFILE_SUSPEND 1 /* This is the suspending device */ | |
100 | #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ | |
101 | ||
102 | static unsigned short swapfile_used[MAX_SWAPFILES]; | |
103 | static unsigned short root_swap; | |
104 | ||
105 | static int mark_swapfiles(swp_entry_t prev) | |
106 | { | |
107 | int error; | |
108 | ||
2e4d5822 | 109 | rw_swap_page_sync(READ, |
1da177e4 LT |
110 | swp_entry(root_swap, 0), |
111 | virt_to_page((unsigned long)&swsusp_header)); | |
112 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | |
113 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | |
114 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | |
115 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | |
116 | swsusp_header.swsusp_info = prev; | |
2e4d5822 | 117 | error = rw_swap_page_sync(WRITE, |
1da177e4 LT |
118 | swp_entry(root_swap, 0), |
119 | virt_to_page((unsigned long) | |
120 | &swsusp_header)); | |
121 | } else { | |
122 | pr_debug("swsusp: Partition is not swap space.\n"); | |
123 | error = -ENODEV; | |
124 | } | |
125 | return error; | |
126 | } | |
127 | ||
128 | /* | |
129 | * Check whether the swap device is the specified resume | |
130 | * device, irrespective of whether they are specified by | |
131 | * identical names. | |
132 | * | |
133 | * (Thus, device inode aliasing is allowed. You can say /dev/hda4 | |
134 | * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] | |
135 | * and they'll be considered the same device. This is *necessary* for | |
136 | * devfs, since the resume code can only recognize the form /dev/hda4, | |
137 | * but the suspend code would see the long name.) | |
138 | */ | |
139 | static int is_resume_device(const struct swap_info_struct *swap_info) | |
140 | { | |
141 | struct file *file = swap_info->swap_file; | |
142 | struct inode *inode = file->f_dentry->d_inode; | |
143 | ||
144 | return S_ISBLK(inode->i_mode) && | |
145 | swsusp_resume_device == MKDEV(imajor(inode), iminor(inode)); | |
146 | } | |
147 | ||
148 | static int swsusp_swap_check(void) /* This is called before saving image */ | |
149 | { | |
150 | int i, len; | |
2e4d5822 | 151 | |
1da177e4 LT |
152 | len=strlen(resume_file); |
153 | root_swap = 0xFFFF; | |
2e4d5822 | 154 | |
dae06ac4 | 155 | spin_lock(&swap_lock); |
2e4d5822 | 156 | for (i=0; i<MAX_SWAPFILES; i++) { |
dae06ac4 | 157 | if (!(swap_info[i].flags & SWP_WRITEOK)) { |
1da177e4 LT |
158 | swapfile_used[i]=SWAPFILE_UNUSED; |
159 | } else { | |
2e4d5822 | 160 | if (!len) { |
1da177e4 | 161 | printk(KERN_WARNING "resume= option should be used to set suspend device" ); |
2e4d5822 | 162 | if (root_swap == 0xFFFF) { |
1da177e4 LT |
163 | swapfile_used[i] = SWAPFILE_SUSPEND; |
164 | root_swap = i; | |
165 | } else | |
2e4d5822 | 166 | swapfile_used[i] = SWAPFILE_IGNORED; |
1da177e4 LT |
167 | } else { |
168 | /* we ignore all swap devices that are not the resume_file */ | |
169 | if (is_resume_device(&swap_info[i])) { | |
170 | swapfile_used[i] = SWAPFILE_SUSPEND; | |
171 | root_swap = i; | |
172 | } else { | |
173 | swapfile_used[i] = SWAPFILE_IGNORED; | |
174 | } | |
175 | } | |
176 | } | |
177 | } | |
dae06ac4 | 178 | spin_unlock(&swap_lock); |
1da177e4 LT |
179 | return (root_swap != 0xffff) ? 0 : -ENODEV; |
180 | } | |
181 | ||
182 | /** | |
183 | * This is called after saving image so modification | |
184 | * will be lost after resume... and that's what we want. | |
185 | * we make the device unusable. A new call to | |
2e4d5822 | 186 | * lock_swapdevices can unlock the devices. |
1da177e4 LT |
187 | */ |
188 | static void lock_swapdevices(void) | |
189 | { | |
190 | int i; | |
191 | ||
dae06ac4 | 192 | spin_lock(&swap_lock); |
2e4d5822 PM |
193 | for (i = 0; i< MAX_SWAPFILES; i++) |
194 | if (swapfile_used[i] == SWAPFILE_IGNORED) { | |
dae06ac4 | 195 | swap_info[i].flags ^= SWP_WRITEOK; |
1da177e4 | 196 | } |
dae06ac4 | 197 | spin_unlock(&swap_lock); |
1da177e4 LT |
198 | } |
199 | ||
200 | /** | |
8686bcd0 | 201 | * write_page - Write one page to a fresh swap location. |
1da177e4 LT |
202 | * @addr: Address we're writing. |
203 | * @loc: Place to store the entry we used. | |
204 | * | |
205 | * Allocate a new swap entry and 'sync' it. Note we discard -EIO | |
2e4d5822 | 206 | * errors. That is an artifact left over from swsusp. It did not |
1da177e4 LT |
207 | * check the return of rw_swap_page_sync() at all, since most pages |
208 | * written back to swap would return -EIO. | |
209 | * This is a partial improvement, since we will at least return other | |
210 | * errors, though we need to eventually fix the damn code. | |
211 | */ | |
dc19d507 | 212 | static int write_page(unsigned long addr, swp_entry_t *loc) |
1da177e4 LT |
213 | { |
214 | swp_entry_t entry; | |
215 | int error = 0; | |
216 | ||
217 | entry = get_swap_page(); | |
2e4d5822 | 218 | if (swp_offset(entry) && |
1da177e4 LT |
219 | swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { |
220 | error = rw_swap_page_sync(WRITE, entry, | |
221 | virt_to_page(addr)); | |
222 | if (error == -EIO) | |
223 | error = 0; | |
224 | if (!error) | |
225 | *loc = entry; | |
226 | } else | |
227 | error = -ENOSPC; | |
228 | return error; | |
229 | } | |
230 | ||
231 | /** | |
7088a5c0 RW |
232 | * Swap map-handling functions |
233 | * | |
234 | * The swap map is a data structure used for keeping track of each page | |
235 | * written to the swap. It consists of many swap_map_page structures | |
236 | * that contain each an array of MAP_PAGE_SIZE swap entries. | |
237 | * These structures are linked together with the help of either the | |
238 | * .next (in memory) or the .next_swap (in swap) member. | |
1da177e4 | 239 | * |
7088a5c0 RW |
240 | * The swap map is created during suspend. At that time we need to keep |
241 | * it in memory, because we have to free all of the allocated swap | |
242 | * entries if an error occurs. The memory needed is preallocated | |
243 | * so that we know in advance if there's enough of it. | |
244 | * | |
245 | * The first swap_map_page structure is filled with the swap entries that | |
246 | * correspond to the first MAP_PAGE_SIZE data pages written to swap and | |
247 | * so on. After the all of the data pages have been written, the order | |
248 | * of the swap_map_page structures in the map is reversed so that they | |
249 | * can be read from swap in the original order. This causes the data | |
250 | * pages to be loaded in exactly the same order in which they have been | |
251 | * saved. | |
252 | * | |
253 | * During resume we only need to use one swap_map_page structure | |
254 | * at a time, which means that we only need to use two memory pages for | |
255 | * reading the image - one for reading the swap_map_page structures | |
256 | * and the second for reading the data pages from swap. | |
1da177e4 | 257 | */ |
7088a5c0 RW |
258 | |
259 | #define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \ | |
260 | / sizeof(swp_entry_t)) | |
261 | ||
262 | struct swap_map_page { | |
263 | swp_entry_t entries[MAP_PAGE_SIZE]; | |
264 | swp_entry_t next_swap; | |
265 | struct swap_map_page *next; | |
266 | }; | |
267 | ||
268 | static inline void free_swap_map(struct swap_map_page *swap_map) | |
1da177e4 | 269 | { |
7088a5c0 | 270 | struct swap_map_page *swp; |
1da177e4 | 271 | |
7088a5c0 RW |
272 | while (swap_map) { |
273 | swp = swap_map->next; | |
274 | free_page((unsigned long)swap_map); | |
275 | swap_map = swp; | |
276 | } | |
277 | } | |
278 | ||
279 | static struct swap_map_page *alloc_swap_map(unsigned int nr_pages) | |
280 | { | |
281 | struct swap_map_page *swap_map, *swp; | |
282 | unsigned n = 0; | |
283 | ||
284 | if (!nr_pages) | |
285 | return NULL; | |
286 | ||
287 | pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages); | |
288 | swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); | |
289 | swp = swap_map; | |
290 | for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) { | |
291 | swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); | |
292 | swp = swp->next; | |
293 | if (!swp) { | |
294 | free_swap_map(swap_map); | |
295 | return NULL; | |
296 | } | |
1da177e4 | 297 | } |
7088a5c0 | 298 | return swap_map; |
1da177e4 LT |
299 | } |
300 | ||
301 | /** | |
7088a5c0 RW |
302 | * reverse_swap_map - reverse the order of pages in the swap map |
303 | * @swap_map | |
1da177e4 | 304 | */ |
7088a5c0 RW |
305 | |
306 | static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map) | |
1da177e4 | 307 | { |
7088a5c0 RW |
308 | struct swap_map_page *prev, *next; |
309 | ||
310 | prev = NULL; | |
311 | while (swap_map) { | |
312 | next = swap_map->next; | |
313 | swap_map->next = prev; | |
314 | prev = swap_map; | |
315 | swap_map = next; | |
316 | } | |
317 | return prev; | |
318 | } | |
1da177e4 | 319 | |
7088a5c0 RW |
320 | /** |
321 | * free_swap_map_entries - free the swap entries allocated to store | |
322 | * the swap map @swap_map (this is only called in case of an error) | |
323 | */ | |
324 | static inline void free_swap_map_entries(struct swap_map_page *swap_map) | |
325 | { | |
326 | while (swap_map) { | |
327 | if (swap_map->next_swap.val) | |
328 | swap_free(swap_map->next_swap); | |
329 | swap_map = swap_map->next; | |
330 | } | |
331 | } | |
1da177e4 | 332 | |
7088a5c0 RW |
333 | /** |
334 | * save_swap_map - save the swap map used for tracing the data pages | |
335 | * stored in the swap | |
336 | */ | |
337 | ||
338 | static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start) | |
339 | { | |
340 | swp_entry_t entry = (swp_entry_t){0}; | |
341 | int error; | |
342 | ||
343 | while (swap_map) { | |
344 | swap_map->next_swap = entry; | |
345 | if ((error = write_page((unsigned long)swap_map, &entry))) | |
1da177e4 | 346 | return error; |
7088a5c0 | 347 | swap_map = swap_map->next; |
1da177e4 | 348 | } |
7088a5c0 RW |
349 | *start = entry; |
350 | return 0; | |
351 | } | |
352 | ||
353 | /** | |
354 | * free_image_entries - free the swap entries allocated to store | |
355 | * the image data pages (this is only called in case of an error) | |
356 | */ | |
357 | ||
358 | static inline void free_image_entries(struct swap_map_page *swp) | |
359 | { | |
360 | unsigned k; | |
361 | ||
362 | while (swp) { | |
363 | for (k = 0; k < MAP_PAGE_SIZE; k++) | |
364 | if (swp->entries[k].val) | |
365 | swap_free(swp->entries[k]); | |
366 | swp = swp->next; | |
367 | } | |
368 | } | |
369 | ||
370 | /** | |
371 | * The swap_map_handle structure is used for handling the swap map in | |
372 | * a file-alike way | |
373 | */ | |
374 | ||
375 | struct swap_map_handle { | |
376 | struct swap_map_page *cur; | |
377 | unsigned int k; | |
378 | }; | |
379 | ||
380 | static inline void init_swap_map_handle(struct swap_map_handle *handle, | |
381 | struct swap_map_page *map) | |
382 | { | |
383 | handle->cur = map; | |
384 | handle->k = 0; | |
385 | } | |
386 | ||
387 | static inline int swap_map_write_page(struct swap_map_handle *handle, | |
388 | unsigned long addr) | |
389 | { | |
390 | int error; | |
391 | ||
392 | error = write_page(addr, handle->cur->entries + handle->k); | |
393 | if (error) | |
394 | return error; | |
395 | if (++handle->k >= MAP_PAGE_SIZE) { | |
396 | handle->cur = handle->cur->next; | |
397 | handle->k = 0; | |
398 | } | |
399 | return 0; | |
400 | } | |
401 | ||
402 | /** | |
403 | * save_image_data - save the data pages pointed to by the PBEs | |
404 | * from the list @pblist using the swap map handle @handle | |
405 | * (assume there are @nr_pages data pages to save) | |
406 | */ | |
407 | ||
408 | static int save_image_data(struct pbe *pblist, | |
409 | struct swap_map_handle *handle, | |
410 | unsigned int nr_pages) | |
411 | { | |
412 | unsigned int m; | |
413 | struct pbe *p; | |
414 | int error = 0; | |
415 | ||
416 | printk("Saving image data pages (%u pages) ... ", nr_pages); | |
417 | m = nr_pages / 100; | |
418 | if (!m) | |
419 | m = 1; | |
420 | nr_pages = 0; | |
421 | for_each_pbe (p, pblist) { | |
422 | error = swap_map_write_page(handle, p->address); | |
423 | if (error) | |
424 | break; | |
425 | if (!(nr_pages % m)) | |
426 | printk("\b\b\b\b%3d%%", nr_pages / m); | |
427 | nr_pages++; | |
428 | } | |
429 | if (!error) | |
430 | printk("\b\b\b\bdone\n"); | |
1da177e4 LT |
431 | return error; |
432 | } | |
433 | ||
434 | static void dump_info(void) | |
435 | { | |
436 | pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); | |
437 | pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); | |
438 | pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); | |
439 | pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); | |
440 | pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); | |
441 | pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); | |
442 | pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); | |
443 | pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); | |
444 | pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); | |
445 | pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); | |
7088a5c0 | 446 | pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages); |
1da177e4 LT |
447 | } |
448 | ||
7088a5c0 | 449 | static void init_header(unsigned int nr_pages) |
1da177e4 LT |
450 | { |
451 | memset(&swsusp_info, 0, sizeof(swsusp_info)); | |
452 | swsusp_info.version_code = LINUX_VERSION_CODE; | |
453 | swsusp_info.num_physpages = num_physpages; | |
454 | memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); | |
455 | ||
1da177e4 | 456 | swsusp_info.cpus = num_online_cpus(); |
7088a5c0 RW |
457 | swsusp_info.image_pages = nr_pages; |
458 | swsusp_info.pages = nr_pages + | |
459 | ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT); | |
1da177e4 LT |
460 | } |
461 | ||
462 | static int close_swap(void) | |
463 | { | |
464 | swp_entry_t entry; | |
465 | int error; | |
466 | ||
467 | dump_info(); | |
468 | error = write_page((unsigned long)&swsusp_info, &entry); | |
2e4d5822 | 469 | if (!error) { |
1da177e4 LT |
470 | printk( "S" ); |
471 | error = mark_swapfiles(entry); | |
472 | printk( "|\n" ); | |
473 | } | |
474 | return error; | |
475 | } | |
476 | ||
477 | /** | |
7088a5c0 RW |
478 | * pack_orig_addresses - the .orig_address fields of the PBEs from the |
479 | * list starting at @pbe are stored in the array @buf[] (1 page) | |
1da177e4 LT |
480 | */ |
481 | ||
7088a5c0 RW |
482 | static inline struct pbe *pack_orig_addresses(unsigned long *buf, |
483 | struct pbe *pbe) | |
1da177e4 | 484 | { |
7088a5c0 | 485 | int j; |
1da177e4 | 486 | |
7088a5c0 RW |
487 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { |
488 | buf[j] = pbe->orig_address; | |
489 | pbe = pbe->next; | |
490 | } | |
491 | if (!pbe) | |
492 | for (; j < PAGE_SIZE / sizeof(long); j++) | |
493 | buf[j] = 0; | |
494 | return pbe; | |
1da177e4 LT |
495 | } |
496 | ||
1da177e4 | 497 | /** |
7088a5c0 RW |
498 | * save_image_metadata - save the .orig_address fields of the PBEs |
499 | * from the list @pblist using the swap map handle @handle | |
1da177e4 LT |
500 | */ |
501 | ||
7088a5c0 RW |
502 | static int save_image_metadata(struct pbe *pblist, |
503 | struct swap_map_handle *handle) | |
1da177e4 | 504 | { |
7088a5c0 | 505 | unsigned long *buf; |
dc19d507 | 506 | unsigned int n = 0; |
7088a5c0 RW |
507 | struct pbe *p; |
508 | int error = 0; | |
1da177e4 | 509 | |
7088a5c0 RW |
510 | printk("Saving image metadata ... "); |
511 | buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); | |
512 | if (!buf) | |
513 | return -ENOMEM; | |
514 | p = pblist; | |
515 | while (p) { | |
516 | p = pack_orig_addresses(buf, p); | |
517 | error = swap_map_write_page(handle, (unsigned long)buf); | |
518 | if (error) | |
519 | break; | |
520 | n++; | |
1da177e4 | 521 | } |
7088a5c0 RW |
522 | free_page((unsigned long)buf); |
523 | if (!error) | |
524 | printk("done (%u pages saved)\n", n); | |
1da177e4 LT |
525 | return error; |
526 | } | |
527 | ||
0fbeb5a4 RW |
528 | /** |
529 | * enough_swap - Make sure we have enough swap to save the image. | |
530 | * | |
531 | * Returns TRUE or FALSE after checking the total amount of swap | |
532 | * space avaiable. | |
533 | * | |
534 | * FIXME: si_swapinfo(&i) returns all swap devices information. | |
535 | * We should only consider resume_device. | |
536 | */ | |
537 | ||
538 | static int enough_swap(unsigned int nr_pages) | |
539 | { | |
540 | struct sysinfo i; | |
541 | ||
542 | si_swapinfo(&i); | |
543 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | |
544 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | |
545 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | |
546 | } | |
547 | ||
1da177e4 LT |
548 | /** |
549 | * write_suspend_image - Write entire image and metadata. | |
1da177e4 | 550 | */ |
7088a5c0 | 551 | static int write_suspend_image(struct pbe *pblist, unsigned int nr_pages) |
1da177e4 | 552 | { |
7088a5c0 RW |
553 | struct swap_map_page *swap_map; |
554 | struct swap_map_handle handle; | |
1da177e4 LT |
555 | int error; |
556 | ||
7088a5c0 | 557 | if (!enough_swap(nr_pages)) { |
0fbeb5a4 RW |
558 | printk(KERN_ERR "swsusp: Not enough free swap\n"); |
559 | return -ENOSPC; | |
560 | } | |
561 | ||
7088a5c0 RW |
562 | init_header(nr_pages); |
563 | swap_map = alloc_swap_map(swsusp_info.pages); | |
564 | if (!swap_map) | |
565 | return -ENOMEM; | |
566 | init_swap_map_handle(&handle, swap_map); | |
1da177e4 | 567 | |
7088a5c0 RW |
568 | error = save_image_metadata(pblist, &handle); |
569 | if (!error) | |
570 | error = save_image_data(pblist, &handle, nr_pages); | |
571 | if (error) | |
572 | goto Free_image_entries; | |
1da177e4 | 573 | |
7088a5c0 RW |
574 | swap_map = reverse_swap_map(swap_map); |
575 | error = save_swap_map(swap_map, &swsusp_info.start); | |
576 | if (error) | |
577 | goto Free_map_entries; | |
578 | ||
579 | error = close_swap(); | |
580 | if (error) | |
581 | goto Free_map_entries; | |
582 | ||
583 | Free_swap_map: | |
584 | free_swap_map(swap_map); | |
1da177e4 | 585 | return error; |
7088a5c0 RW |
586 | |
587 | Free_map_entries: | |
588 | free_swap_map_entries(swap_map); | |
589 | Free_image_entries: | |
590 | free_image_entries(swap_map); | |
591 | goto Free_swap_map; | |
1da177e4 LT |
592 | } |
593 | ||
1da177e4 LT |
594 | /* It is important _NOT_ to umount filesystems at this point. We want |
595 | * them synced (in case something goes wrong) but we DO not want to mark | |
596 | * filesystem clean: it is not. (And it does not matter, if we resume | |
597 | * correctly, we'll mark system clean, anyway.) | |
598 | */ | |
7088a5c0 | 599 | int swsusp_write(struct pbe *pblist, unsigned int nr_pages) |
1da177e4 LT |
600 | { |
601 | int error; | |
0245b3e7 | 602 | |
0fbeb5a4 RW |
603 | if ((error = swsusp_swap_check())) { |
604 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | |
605 | return error; | |
606 | } | |
1da177e4 | 607 | lock_swapdevices(); |
7088a5c0 | 608 | error = write_suspend_image(pblist, nr_pages); |
1da177e4 LT |
609 | /* This will unlock ignored swap devices since writing is finished */ |
610 | lock_swapdevices(); | |
611 | return error; | |
1da177e4 LT |
612 | } |
613 | ||
1da177e4 LT |
614 | int swsusp_suspend(void) |
615 | { | |
616 | int error; | |
0fbeb5a4 | 617 | |
1da177e4 LT |
618 | if ((error = arch_prepare_suspend())) |
619 | return error; | |
620 | local_irq_disable(); | |
621 | /* At this point, device_suspend() has been called, but *not* | |
622 | * device_power_down(). We *must* device_power_down() now. | |
623 | * Otherwise, drivers for some devices (e.g. interrupt controllers) | |
624 | * become desynchronized with the actual state of the hardware | |
625 | * at resume time, and evil weirdness ensues. | |
626 | */ | |
627 | if ((error = device_power_down(PMSG_FREEZE))) { | |
99dc7d63 | 628 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
0fbeb5a4 | 629 | goto Enable_irqs; |
1da177e4 | 630 | } |
47b724f3 | 631 | |
0fbeb5a4 RW |
632 | if ((error = save_highmem())) { |
633 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); | |
634 | goto Restore_highmem; | |
47b724f3 PM |
635 | } |
636 | ||
1da177e4 LT |
637 | save_processor_state(); |
638 | if ((error = swsusp_arch_suspend())) | |
99dc7d63 | 639 | printk(KERN_ERR "Error %d suspending\n", error); |
1da177e4 LT |
640 | /* Restore control flow magically appears here */ |
641 | restore_processor_state(); | |
0fbeb5a4 | 642 | Restore_highmem: |
1da177e4 LT |
643 | restore_highmem(); |
644 | device_power_up(); | |
0fbeb5a4 | 645 | Enable_irqs: |
1da177e4 LT |
646 | local_irq_enable(); |
647 | return error; | |
648 | } | |
649 | ||
650 | int swsusp_resume(void) | |
651 | { | |
652 | int error; | |
653 | local_irq_disable(); | |
654 | if (device_power_down(PMSG_FREEZE)) | |
655 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); | |
656 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
657 | save_processor_state(); | |
658 | error = swsusp_arch_resume(); | |
659 | /* Code below is only ever reached in case of failure. Otherwise | |
660 | * execution continues at place where swsusp_arch_suspend was called | |
661 | */ | |
662 | BUG_ON(!error); | |
2c1b4a5c RW |
663 | /* The only reason why swsusp_arch_resume() can fail is memory being |
664 | * very tight, so we have to free it as soon as we can to avoid | |
665 | * subsequent failures | |
666 | */ | |
667 | swsusp_free(); | |
1da177e4 LT |
668 | restore_processor_state(); |
669 | restore_highmem(); | |
8446f1d3 | 670 | touch_softlockup_watchdog(); |
1da177e4 LT |
671 | device_power_up(); |
672 | local_irq_enable(); | |
673 | return error; | |
674 | } | |
675 | ||
1da177e4 | 676 | /** |
ed14b527 RW |
677 | * mark_unsafe_pages - mark the pages that cannot be used for storing |
678 | * the image during resume, because they conflict with the pages that | |
679 | * had been used before suspend | |
1da177e4 LT |
680 | */ |
681 | ||
ed14b527 | 682 | static void mark_unsafe_pages(struct pbe *pblist) |
1da177e4 LT |
683 | { |
684 | struct zone *zone; | |
685 | unsigned long zone_pfn; | |
ed14b527 | 686 | struct pbe *p; |
1da177e4 LT |
687 | |
688 | if (!pblist) /* a sanity check */ | |
ed14b527 | 689 | return; |
1da177e4 | 690 | |
2c1b4a5c | 691 | /* Clear page flags */ |
2e4d5822 | 692 | for_each_zone (zone) { |
ed14b527 RW |
693 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
694 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | |
695 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | |
1da177e4 LT |
696 | zone->zone_start_pfn)); |
697 | } | |
698 | ||
2c1b4a5c | 699 | /* Mark orig addresses */ |
1da177e4 | 700 | for_each_pbe (p, pblist) |
2c1b4a5c | 701 | SetPageNosaveFree(virt_to_page(p->orig_address)); |
1da177e4 | 702 | |
ed14b527 | 703 | } |
1da177e4 | 704 | |
ed14b527 RW |
705 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) |
706 | { | |
707 | /* We assume both lists contain the same number of elements */ | |
708 | while (src) { | |
709 | dst->orig_address = src->orig_address; | |
ed14b527 RW |
710 | dst = dst->next; |
711 | src = src->next; | |
2c1b4a5c | 712 | } |
1da177e4 LT |
713 | } |
714 | ||
4dc3b16b | 715 | /* |
1da177e4 LT |
716 | * Using bio to read from swap. |
717 | * This code requires a bit more work than just using buffer heads | |
718 | * but, it is the recommended way for 2.5/2.6. | |
719 | * The following are to signal the beginning and end of I/O. Bios | |
720 | * finish asynchronously, while we want them to happen synchronously. | |
721 | * A simple atomic_t, and a wait loop take care of this problem. | |
722 | */ | |
723 | ||
724 | static atomic_t io_done = ATOMIC_INIT(0); | |
725 | ||
dc19d507 | 726 | static int end_io(struct bio *bio, unsigned int num, int err) |
1da177e4 LT |
727 | { |
728 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | |
729 | panic("I/O error reading memory image"); | |
730 | atomic_set(&io_done, 0); | |
731 | return 0; | |
732 | } | |
733 | ||
dc19d507 | 734 | static struct block_device *resume_bdev; |
1da177e4 LT |
735 | |
736 | /** | |
737 | * submit - submit BIO request. | |
738 | * @rw: READ or WRITE. | |
739 | * @off physical offset of page. | |
740 | * @page: page we're reading or writing. | |
741 | * | |
742 | * Straight from the textbook - allocate and initialize the bio. | |
743 | * If we're writing, make sure the page is marked as dirty. | |
744 | * Then submit it and wait. | |
745 | */ | |
746 | ||
dc19d507 | 747 | static int submit(int rw, pgoff_t page_off, void *page) |
1da177e4 LT |
748 | { |
749 | int error = 0; | |
dc19d507 | 750 | struct bio *bio; |
1da177e4 LT |
751 | |
752 | bio = bio_alloc(GFP_ATOMIC, 1); | |
753 | if (!bio) | |
754 | return -ENOMEM; | |
755 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | |
756 | bio_get(bio); | |
757 | bio->bi_bdev = resume_bdev; | |
758 | bio->bi_end_io = end_io; | |
759 | ||
760 | if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { | |
761 | printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); | |
762 | error = -EFAULT; | |
763 | goto Done; | |
764 | } | |
765 | ||
766 | if (rw == WRITE) | |
767 | bio_set_pages_dirty(bio); | |
768 | ||
769 | atomic_set(&io_done, 1); | |
770 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | |
771 | while (atomic_read(&io_done)) | |
772 | yield(); | |
773 | ||
774 | Done: | |
775 | bio_put(bio); | |
776 | return error; | |
777 | } | |
778 | ||
dc19d507 | 779 | static int bio_read_page(pgoff_t page_off, void *page) |
1da177e4 LT |
780 | { |
781 | return submit(READ, page_off, page); | |
782 | } | |
783 | ||
dc19d507 | 784 | static int bio_write_page(pgoff_t page_off, void *page) |
1da177e4 LT |
785 | { |
786 | return submit(WRITE, page_off, page); | |
787 | } | |
788 | ||
7088a5c0 RW |
789 | /** |
790 | * The following functions allow us to read data using a swap map | |
791 | * in a file-alike way | |
792 | */ | |
793 | ||
794 | static inline void release_swap_map_reader(struct swap_map_handle *handle) | |
795 | { | |
796 | if (handle->cur) | |
797 | free_page((unsigned long)handle->cur); | |
798 | handle->cur = NULL; | |
799 | } | |
800 | ||
801 | static inline int get_swap_map_reader(struct swap_map_handle *handle, | |
802 | swp_entry_t start) | |
803 | { | |
804 | int error; | |
805 | ||
806 | if (!swp_offset(start)) | |
807 | return -EINVAL; | |
808 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); | |
809 | if (!handle->cur) | |
810 | return -ENOMEM; | |
811 | error = bio_read_page(swp_offset(start), handle->cur); | |
812 | if (error) { | |
813 | release_swap_map_reader(handle); | |
814 | return error; | |
815 | } | |
816 | handle->k = 0; | |
817 | return 0; | |
818 | } | |
819 | ||
820 | static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf) | |
821 | { | |
822 | unsigned long offset; | |
823 | int error; | |
824 | ||
825 | if (!handle->cur) | |
826 | return -EINVAL; | |
827 | offset = swp_offset(handle->cur->entries[handle->k]); | |
828 | if (!offset) | |
829 | return -EINVAL; | |
830 | error = bio_read_page(offset, buf); | |
831 | if (error) | |
832 | return error; | |
833 | if (++handle->k >= MAP_PAGE_SIZE) { | |
834 | handle->k = 0; | |
835 | offset = swp_offset(handle->cur->next_swap); | |
836 | if (!offset) | |
837 | release_swap_map_reader(handle); | |
838 | else | |
839 | error = bio_read_page(offset, handle->cur); | |
840 | } | |
841 | return error; | |
842 | } | |
843 | ||
1da177e4 LT |
844 | /* |
845 | * Sanity check if this image makes sense with this kernel/swap context | |
846 | * I really don't think that it's foolproof but more than nothing.. | |
847 | */ | |
848 | ||
dc19d507 | 849 | static const char *sanity_check(void) |
1da177e4 LT |
850 | { |
851 | dump_info(); | |
47b724f3 | 852 | if (swsusp_info.version_code != LINUX_VERSION_CODE) |
1da177e4 | 853 | return "kernel version"; |
47b724f3 | 854 | if (swsusp_info.num_physpages != num_physpages) |
1da177e4 LT |
855 | return "memory size"; |
856 | if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) | |
857 | return "system type"; | |
858 | if (strcmp(swsusp_info.uts.release,system_utsname.release)) | |
859 | return "kernel release"; | |
860 | if (strcmp(swsusp_info.uts.version,system_utsname.version)) | |
861 | return "version"; | |
862 | if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) | |
863 | return "machine"; | |
5a72e04d | 864 | #if 0 |
99dc7d63 PM |
865 | /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */ |
866 | if (swsusp_info.cpus != num_possible_cpus()) | |
1da177e4 | 867 | return "number of cpus"; |
5a72e04d | 868 | #endif |
1da177e4 LT |
869 | return NULL; |
870 | } | |
871 | ||
1da177e4 LT |
872 | static int check_header(void) |
873 | { | |
dc19d507 | 874 | const char *reason = NULL; |
1da177e4 LT |
875 | int error; |
876 | ||
877 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) | |
878 | return error; | |
879 | ||
880 | /* Is this same machine? */ | |
881 | if ((reason = sanity_check())) { | |
882 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); | |
883 | return -EPERM; | |
884 | } | |
1da177e4 LT |
885 | return error; |
886 | } | |
887 | ||
888 | static int check_sig(void) | |
889 | { | |
890 | int error; | |
891 | ||
892 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | |
893 | if ((error = bio_read_page(0, &swsusp_header))) | |
894 | return error; | |
895 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | |
896 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | |
897 | ||
898 | /* | |
899 | * Reset swap signature now. | |
900 | */ | |
901 | error = bio_write_page(0, &swsusp_header); | |
0fbeb5a4 | 902 | } else { |
1da177e4 LT |
903 | return -EINVAL; |
904 | } | |
905 | if (!error) | |
906 | pr_debug("swsusp: Signature found, resuming\n"); | |
907 | return error; | |
908 | } | |
909 | ||
910 | /** | |
7088a5c0 RW |
911 | * load_image_data - load the image data using the swap map handle |
912 | * @handle and store them using the page backup list @pblist | |
913 | * (assume there are @nr_pages pages to load) | |
1da177e4 LT |
914 | */ |
915 | ||
7088a5c0 RW |
916 | static int load_image_data(struct pbe *pblist, |
917 | struct swap_map_handle *handle, | |
918 | unsigned int nr_pages) | |
1da177e4 | 919 | { |
7088a5c0 RW |
920 | int error; |
921 | unsigned int m; | |
dc19d507 | 922 | struct pbe *p; |
1da177e4 | 923 | |
7088a5c0 RW |
924 | if (!pblist) |
925 | return -EINVAL; | |
926 | printk("Loading image data pages (%u pages) ... ", nr_pages); | |
927 | m = nr_pages / 100; | |
928 | if (!m) | |
929 | m = 1; | |
930 | nr_pages = 0; | |
931 | p = pblist; | |
932 | while (p) { | |
933 | error = swap_map_read_page(handle, (void *)p->address); | |
934 | if (error) | |
935 | break; | |
936 | p = p->next; | |
937 | if (!(nr_pages % m)) | |
938 | printk("\b\b\b\b%3d%%", nr_pages / m); | |
939 | nr_pages++; | |
1da177e4 | 940 | } |
7088a5c0 RW |
941 | if (!error) |
942 | printk("\b\b\b\bdone\n"); | |
1da177e4 LT |
943 | return error; |
944 | } | |
945 | ||
1da177e4 | 946 | /** |
7088a5c0 RW |
947 | * unpack_orig_addresses - copy the elements of @buf[] (1 page) to |
948 | * the PBEs in the list starting at @pbe | |
1da177e4 LT |
949 | */ |
950 | ||
7088a5c0 RW |
951 | static inline struct pbe *unpack_orig_addresses(unsigned long *buf, |
952 | struct pbe *pbe) | |
1da177e4 | 953 | { |
7088a5c0 | 954 | int j; |
1da177e4 | 955 | |
7088a5c0 RW |
956 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { |
957 | pbe->orig_address = buf[j]; | |
958 | pbe = pbe->next; | |
959 | } | |
960 | return pbe; | |
961 | } | |
1da177e4 | 962 | |
7088a5c0 RW |
963 | /** |
964 | * load_image_metadata - load the image metadata using the swap map | |
965 | * handle @handle and put them into the PBEs in the list @pblist | |
966 | */ | |
1da177e4 | 967 | |
7088a5c0 RW |
968 | static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle) |
969 | { | |
970 | struct pbe *p; | |
971 | unsigned long *buf; | |
972 | unsigned int n = 0; | |
973 | int error = 0; | |
1da177e4 | 974 | |
7088a5c0 RW |
975 | printk("Loading image metadata ... "); |
976 | buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); | |
977 | if (!buf) | |
978 | return -ENOMEM; | |
979 | p = pblist; | |
980 | while (p) { | |
981 | error = swap_map_read_page(handle, buf); | |
1da177e4 LT |
982 | if (error) |
983 | break; | |
7088a5c0 RW |
984 | p = unpack_orig_addresses(buf, p); |
985 | n++; | |
1da177e4 | 986 | } |
7088a5c0 | 987 | free_page((unsigned long)buf); |
2c1b4a5c | 988 | if (!error) |
7088a5c0 | 989 | printk("done (%u pages loaded)\n", n); |
1da177e4 LT |
990 | return error; |
991 | } | |
992 | ||
1da177e4 LT |
993 | static int check_suspend_image(void) |
994 | { | |
995 | int error = 0; | |
996 | ||
997 | if ((error = check_sig())) | |
998 | return error; | |
999 | ||
1000 | if ((error = check_header())) | |
1001 | return error; | |
1002 | ||
1003 | return 0; | |
1004 | } | |
1005 | ||
7088a5c0 | 1006 | static int read_suspend_image(struct pbe **pblist_ptr) |
1da177e4 LT |
1007 | { |
1008 | int error = 0; | |
7088a5c0 RW |
1009 | struct pbe *p, *pblist; |
1010 | struct swap_map_handle handle; | |
1011 | unsigned int nr_pages = swsusp_info.image_pages; | |
1da177e4 | 1012 | |
7088a5c0 RW |
1013 | p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0); |
1014 | if (!p) | |
1da177e4 | 1015 | return -ENOMEM; |
7088a5c0 RW |
1016 | error = get_swap_map_reader(&handle, swsusp_info.start); |
1017 | if (error) | |
1018 | /* The PBE list at p will be released by swsusp_free() */ | |
1da177e4 | 1019 | return error; |
7088a5c0 RW |
1020 | error = load_image_metadata(p, &handle); |
1021 | if (!error) { | |
1022 | mark_unsafe_pages(p); | |
1023 | pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); | |
1024 | if (pblist) | |
1025 | copy_page_backup_list(pblist, p); | |
1026 | free_pagedir(p); | |
1027 | if (!pblist) | |
1028 | error = -ENOMEM; | |
1029 | ||
1030 | /* Allocate memory for the image and read the data from swap */ | |
1031 | if (!error) | |
1032 | error = alloc_data_pages(pblist, GFP_ATOMIC, 1); | |
1033 | if (!error) | |
1034 | error = load_image_data(pblist, &handle, nr_pages); | |
1035 | if (!error) | |
1036 | *pblist_ptr = pblist; | |
ed14b527 | 1037 | } |
7088a5c0 | 1038 | release_swap_map_reader(&handle); |
1da177e4 LT |
1039 | return error; |
1040 | } | |
1041 | ||
1042 | /** | |
1043 | * swsusp_check - Check for saved image in swap | |
1044 | */ | |
1045 | ||
1046 | int swsusp_check(void) | |
1047 | { | |
1048 | int error; | |
1049 | ||
1da177e4 LT |
1050 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
1051 | if (!IS_ERR(resume_bdev)) { | |
1052 | set_blocksize(resume_bdev, PAGE_SIZE); | |
1053 | error = check_suspend_image(); | |
1054 | if (error) | |
1055 | blkdev_put(resume_bdev); | |
1056 | } else | |
1057 | error = PTR_ERR(resume_bdev); | |
1058 | ||
1059 | if (!error) | |
1060 | pr_debug("swsusp: resume file found\n"); | |
1061 | else | |
1062 | pr_debug("swsusp: Error %d check for resume file\n", error); | |
1063 | return error; | |
1064 | } | |
1065 | ||
1066 | /** | |
1067 | * swsusp_read - Read saved image from swap. | |
1068 | */ | |
1069 | ||
7088a5c0 | 1070 | int swsusp_read(struct pbe **pblist_ptr) |
1da177e4 LT |
1071 | { |
1072 | int error; | |
1073 | ||
1074 | if (IS_ERR(resume_bdev)) { | |
1075 | pr_debug("swsusp: block device not initialised\n"); | |
1076 | return PTR_ERR(resume_bdev); | |
1077 | } | |
1078 | ||
7088a5c0 | 1079 | error = read_suspend_image(pblist_ptr); |
1da177e4 LT |
1080 | blkdev_put(resume_bdev); |
1081 | ||
1082 | if (!error) | |
1083 | pr_debug("swsusp: Reading resume file was successful\n"); | |
1084 | else | |
1085 | pr_debug("swsusp: Error %d resuming\n", error); | |
1086 | return error; | |
1087 | } | |
1088 | ||
1089 | /** | |
1090 | * swsusp_close - close swap device. | |
1091 | */ | |
1092 | ||
1093 | void swsusp_close(void) | |
1094 | { | |
1095 | if (IS_ERR(resume_bdev)) { | |
1096 | pr_debug("swsusp: block device not initialised\n"); | |
1097 | return; | |
1098 | } | |
1099 | ||
1100 | blkdev_put(resume_bdev); | |
1101 | } |