Commit | Line | Data |
---|---|---|
daa6afa6 DM |
1 | /* |
2 | * tmem.h | |
3 | * | |
4 | * Transcendent memory | |
5 | * | |
6 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | |
7 | */ | |
8 | ||
9 | #ifndef _TMEM_H_ | |
10 | #define _TMEM_H_ | |
11 | ||
12 | #include <linux/types.h> | |
13 | #include <linux/highmem.h> | |
14 | #include <linux/hash.h> | |
15 | #include <linux/atomic.h> | |
16 | ||
17 | /* | |
18 | * These are pre-defined by the Xen<->Linux ABI | |
19 | */ | |
20 | #define TMEM_PUT_PAGE 4 | |
21 | #define TMEM_GET_PAGE 5 | |
22 | #define TMEM_FLUSH_PAGE 6 | |
23 | #define TMEM_FLUSH_OBJECT 7 | |
24 | #define TMEM_POOL_PERSIST 1 | |
25 | #define TMEM_POOL_SHARED 2 | |
26 | #define TMEM_POOL_PRECOMPRESSED 4 | |
27 | #define TMEM_POOL_PAGESIZE_SHIFT 4 | |
28 | #define TMEM_POOL_PAGESIZE_MASK 0xf | |
29 | #define TMEM_POOL_RESERVED_BITS 0x00ffff00 | |
30 | ||
31 | /* | |
32 | * sentinels have proven very useful for debugging but can be removed | |
33 | * or disabled before final merge. | |
34 | */ | |
35 | #define SENTINELS | |
36 | #ifdef SENTINELS | |
37 | #define DECL_SENTINEL uint32_t sentinel; | |
38 | #define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL) | |
39 | #define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL) | |
40 | #define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL) | |
41 | #define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL) | |
42 | #else | |
43 | #define DECL_SENTINEL | |
44 | #define SET_SENTINEL(_x, _y) do { } while (0) | |
45 | #define INVERT_SENTINEL(_x, _y) do { } while (0) | |
46 | #define ASSERT_SENTINEL(_x, _y) do { } while (0) | |
47 | #define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) | |
48 | #endif | |
49 | ||
bc01caf5 | 50 | #define ASSERT_SPINLOCK(_l) lockdep_assert_held(_l) |
daa6afa6 DM |
51 | |
52 | /* | |
53 | * A pool is the highest-level data structure managed by tmem and | |
54 | * usually corresponds to a large independent set of pages such as | |
55 | * a filesystem. Each pool has an id, and certain attributes and counters. | |
56 | * It also contains a set of hash buckets, each of which contains an rbtree | |
57 | * of objects and a lock to manage concurrency within the pool. | |
58 | */ | |
59 | ||
60 | #define TMEM_HASH_BUCKET_BITS 8 | |
61 | #define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS) | |
62 | ||
63 | struct tmem_hashbucket { | |
64 | struct rb_root obj_rb_root; | |
65 | spinlock_t lock; | |
66 | }; | |
67 | ||
68 | struct tmem_pool { | |
69 | void *client; /* "up" for some clients, avoids table lookup */ | |
70 | struct list_head pool_list; | |
71 | uint32_t pool_id; | |
72 | bool persistent; | |
73 | bool shared; | |
74 | atomic_t obj_count; | |
75 | atomic_t refcount; | |
76 | struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS]; | |
77 | DECL_SENTINEL | |
78 | }; | |
79 | ||
80 | #define is_persistent(_p) (_p->persistent) | |
81 | #define is_ephemeral(_p) (!(_p->persistent)) | |
82 | ||
83 | /* | |
84 | * An object id ("oid") is large: 192-bits (to ensure, for example, files | |
85 | * in a modern filesystem can be uniquely identified). | |
86 | */ | |
87 | ||
88 | struct tmem_oid { | |
89 | uint64_t oid[3]; | |
90 | }; | |
91 | ||
92 | static inline void tmem_oid_set_invalid(struct tmem_oid *oidp) | |
93 | { | |
94 | oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; | |
95 | } | |
96 | ||
97 | static inline bool tmem_oid_valid(struct tmem_oid *oidp) | |
98 | { | |
99 | return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL || | |
100 | oidp->oid[2] != -1UL; | |
101 | } | |
102 | ||
103 | static inline int tmem_oid_compare(struct tmem_oid *left, | |
104 | struct tmem_oid *right) | |
105 | { | |
106 | int ret; | |
107 | ||
108 | if (left->oid[2] == right->oid[2]) { | |
109 | if (left->oid[1] == right->oid[1]) { | |
110 | if (left->oid[0] == right->oid[0]) | |
111 | ret = 0; | |
112 | else if (left->oid[0] < right->oid[0]) | |
113 | ret = -1; | |
114 | else | |
115 | return 1; | |
116 | } else if (left->oid[1] < right->oid[1]) | |
117 | ret = -1; | |
118 | else | |
119 | ret = 1; | |
120 | } else if (left->oid[2] < right->oid[2]) | |
121 | ret = -1; | |
122 | else | |
123 | ret = 1; | |
124 | return ret; | |
125 | } | |
126 | ||
127 | static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) | |
128 | { | |
129 | return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], | |
130 | TMEM_HASH_BUCKET_BITS); | |
131 | } | |
132 | ||
133 | /* | |
134 | * A tmem_obj contains an identifier (oid), pointers to the parent | |
135 | * pool and the rb_tree to which it belongs, counters, and an ordered | |
136 | * set of pampds, structured in a radix-tree-like tree. The intermediate | |
137 | * nodes of the tree are called tmem_objnodes. | |
138 | */ | |
139 | ||
140 | struct tmem_objnode; | |
141 | ||
142 | struct tmem_obj { | |
143 | struct tmem_oid oid; | |
144 | struct tmem_pool *pool; | |
145 | struct rb_node rb_tree_node; | |
146 | struct tmem_objnode *objnode_tree_root; | |
147 | unsigned int objnode_tree_height; | |
148 | unsigned long objnode_count; | |
149 | long pampd_count; | |
966b9016 | 150 | void *extra; /* for private use by pampd implementation */ |
daa6afa6 DM |
151 | DECL_SENTINEL |
152 | }; | |
153 | ||
154 | #define OBJNODE_TREE_MAP_SHIFT 6 | |
155 | #define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT) | |
156 | #define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1) | |
157 | #define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) | |
158 | #define OBJNODE_TREE_MAX_PATH \ | |
159 | (OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2) | |
160 | ||
161 | struct tmem_objnode { | |
162 | struct tmem_obj *obj; | |
163 | DECL_SENTINEL | |
164 | void *slots[OBJNODE_TREE_MAP_SIZE]; | |
165 | unsigned int slots_in_use; | |
166 | }; | |
167 | ||
168 | /* pampd abstract datatype methods provided by the PAM implementation */ | |
169 | struct tmem_pamops { | |
966b9016 DM |
170 | void *(*create)(char *, size_t, bool, int, |
171 | struct tmem_pool *, struct tmem_oid *, uint32_t); | |
172 | int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *, | |
173 | struct tmem_oid *, uint32_t); | |
174 | int (*get_data_and_free)(char *, size_t *, bool, void *, | |
175 | struct tmem_pool *, struct tmem_oid *, | |
176 | uint32_t); | |
177 | void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t); | |
178 | void (*free_obj)(struct tmem_pool *, struct tmem_obj *); | |
179 | bool (*is_remote)(void *); | |
180 | void (*new_obj)(struct tmem_obj *); | |
181 | int (*replace_in_obj)(void *, struct tmem_obj *); | |
daa6afa6 DM |
182 | }; |
183 | extern void tmem_register_pamops(struct tmem_pamops *m); | |
184 | ||
185 | /* memory allocation methods provided by the host implementation */ | |
186 | struct tmem_hostops { | |
187 | struct tmem_obj *(*obj_alloc)(struct tmem_pool *); | |
188 | void (*obj_free)(struct tmem_obj *, struct tmem_pool *); | |
189 | struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *); | |
190 | void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *); | |
191 | }; | |
192 | extern void tmem_register_hostops(struct tmem_hostops *m); | |
193 | ||
194 | /* core tmem accessor functions */ | |
195 | extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, | |
966b9016 | 196 | char *, size_t, bool, bool); |
daa6afa6 | 197 | extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, |
966b9016 DM |
198 | char *, size_t *, bool, int); |
199 | extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, | |
200 | void *); | |
daa6afa6 DM |
201 | extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, |
202 | uint32_t index); | |
203 | extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); | |
204 | extern int tmem_destroy_pool(struct tmem_pool *); | |
205 | extern void tmem_new_pool(struct tmem_pool *, uint32_t); | |
206 | #endif /* _TMEM_H */ |