Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
1d62e436 | 9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
10 | { |
11 | /* | |
3e32cb2e | 12 | * The root cgroup does not use page_counters, but rather, |
d1a4c0b3 GC |
13 | * rely on the data already collected by the network |
14 | * subsystem | |
15 | */ | |
d1a4c0b3 | 16 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
3e32cb2e JW |
17 | struct page_counter *counter_parent = NULL; |
18 | struct cg_proto *cg_proto, *parent_cg; | |
d1a4c0b3 GC |
19 | |
20 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
21 | if (!cg_proto) | |
6bc10349 | 22 | return 0; |
d1a4c0b3 | 23 | |
2e685cad EB |
24 | cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; |
25 | cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; | |
26 | cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; | |
27 | cg_proto->memory_pressure = 0; | |
28 | cg_proto->memcg = memcg; | |
d1a4c0b3 GC |
29 | |
30 | parent_cg = tcp_prot.proto_cgroup(parent); | |
31 | if (parent_cg) | |
3e32cb2e | 32 | counter_parent = &parent_cg->memory_allocated; |
d1a4c0b3 | 33 | |
3e32cb2e | 34 | page_counter_init(&cg_proto->memory_allocated, counter_parent); |
d1a4c0b3 | 35 | |
6bc10349 | 36 | return 0; |
d1a4c0b3 GC |
37 | } |
38 | EXPORT_SYMBOL(tcp_init_cgroup); | |
39 | ||
1d62e436 | 40 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 41 | { |
d1a4c0b3 | 42 | struct cg_proto *cg_proto; |
d1a4c0b3 GC |
43 | |
44 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
45 | if (!cg_proto) | |
46 | return; | |
47 | ||
9ee11ba4 | 48 | if (cg_proto->active) |
f48b80a5 VD |
49 | static_key_slow_dec(&memcg_socket_limit_enabled); |
50 | ||
d1a4c0b3 GC |
51 | } |
52 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 | 53 | |
3e32cb2e | 54 | static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) |
3aaabe23 | 55 | { |
3aaabe23 | 56 | struct cg_proto *cg_proto; |
3aaabe23 GC |
57 | int i; |
58 | int ret; | |
59 | ||
60 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
61 | if (!cg_proto) | |
62 | return -EINVAL; | |
63 | ||
3e32cb2e | 64 | ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); |
3aaabe23 GC |
65 | if (ret) |
66 | return ret; | |
67 | ||
68 | for (i = 0; i < 3; i++) | |
3e32cb2e | 69 | cg_proto->sysctl_mem[i] = min_t(long, nr_pages, |
2e685cad | 70 | sysctl_tcp_mem[i]); |
3aaabe23 | 71 | |
9ee11ba4 | 72 | if (!cg_proto->active) { |
3f134619 | 73 | /* |
9ee11ba4 | 74 | * The active flag needs to be written after the static_key |
3f134619 GC |
75 | * update. This is what guarantees that the socket activation |
76 | * function is the last one to run. See sock_update_memcg() for | |
77 | * details, and note that we don't mark any socket as belonging | |
78 | * to this memcg until that flag is up. | |
79 | * | |
80 | * We need to do this, because static_keys will span multiple | |
81 | * sites, but we can't control their order. If we mark a socket | |
82 | * as accounted, but the accounting functions are not patched in | |
83 | * yet, we'll lose accounting. | |
84 | * | |
85 | * We never race with the readers in sock_update_memcg(), | |
86 | * because when this value change, the code to process it is not | |
87 | * patched in yet. | |
3f134619 | 88 | */ |
9ee11ba4 VD |
89 | static_key_slow_inc(&memcg_socket_limit_enabled); |
90 | cg_proto->active = true; | |
3f134619 | 91 | } |
3aaabe23 GC |
92 | |
93 | return 0; | |
94 | } | |
95 | ||
3e32cb2e JW |
96 | enum { |
97 | RES_USAGE, | |
98 | RES_LIMIT, | |
99 | RES_MAX_USAGE, | |
100 | RES_FAILCNT, | |
101 | }; | |
102 | ||
103 | static DEFINE_MUTEX(tcp_limit_mutex); | |
104 | ||
451af504 TH |
105 | static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, |
106 | char *buf, size_t nbytes, loff_t off) | |
3aaabe23 | 107 | { |
451af504 | 108 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); |
3e32cb2e | 109 | unsigned long nr_pages; |
3aaabe23 GC |
110 | int ret = 0; |
111 | ||
451af504 TH |
112 | buf = strstrip(buf); |
113 | ||
114 | switch (of_cft(of)->private) { | |
3aaabe23 GC |
115 | case RES_LIMIT: |
116 | /* see memcontrol.c */ | |
650c5e56 | 117 | ret = page_counter_memparse(buf, "-1", &nr_pages); |
3aaabe23 GC |
118 | if (ret) |
119 | break; | |
3e32cb2e JW |
120 | mutex_lock(&tcp_limit_mutex); |
121 | ret = tcp_update_limit(memcg, nr_pages); | |
122 | mutex_unlock(&tcp_limit_mutex); | |
3aaabe23 GC |
123 | break; |
124 | default: | |
125 | ret = -EINVAL; | |
126 | break; | |
127 | } | |
451af504 | 128 | return ret ?: nbytes; |
3aaabe23 GC |
129 | } |
130 | ||
182446d0 | 131 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
3aaabe23 | 132 | { |
182446d0 | 133 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3e32cb2e | 134 | struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); |
3aaabe23 GC |
135 | u64 val; |
136 | ||
137 | switch (cft->private) { | |
138 | case RES_LIMIT: | |
3e32cb2e JW |
139 | if (!cg_proto) |
140 | return PAGE_COUNTER_MAX; | |
141 | val = cg_proto->memory_allocated.limit; | |
142 | val *= PAGE_SIZE; | |
3aaabe23 | 143 | break; |
5a6dd343 | 144 | case RES_USAGE: |
3e32cb2e JW |
145 | if (!cg_proto) |
146 | val = atomic_long_read(&tcp_memory_allocated); | |
147 | else | |
148 | val = page_counter_read(&cg_proto->memory_allocated); | |
149 | val *= PAGE_SIZE; | |
5a6dd343 | 150 | break; |
ffea59e5 | 151 | case RES_FAILCNT: |
3e32cb2e JW |
152 | if (!cg_proto) |
153 | return 0; | |
154 | val = cg_proto->memory_allocated.failcnt; | |
155 | break; | |
0850f0f5 | 156 | case RES_MAX_USAGE: |
3e32cb2e JW |
157 | if (!cg_proto) |
158 | return 0; | |
159 | val = cg_proto->memory_allocated.watermark; | |
160 | val *= PAGE_SIZE; | |
ffea59e5 | 161 | break; |
3aaabe23 GC |
162 | default: |
163 | BUG(); | |
164 | } | |
165 | return val; | |
166 | } | |
167 | ||
6770c64e TH |
168 | static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, |
169 | char *buf, size_t nbytes, loff_t off) | |
ffea59e5 GC |
170 | { |
171 | struct mem_cgroup *memcg; | |
ffea59e5 GC |
172 | struct cg_proto *cg_proto; |
173 | ||
6770c64e | 174 | memcg = mem_cgroup_from_css(of_css(of)); |
ffea59e5 GC |
175 | cg_proto = tcp_prot.proto_cgroup(memcg); |
176 | if (!cg_proto) | |
6770c64e | 177 | return nbytes; |
ffea59e5 | 178 | |
6770c64e | 179 | switch (of_cft(of)->private) { |
0850f0f5 | 180 | case RES_MAX_USAGE: |
3e32cb2e | 181 | page_counter_reset_watermark(&cg_proto->memory_allocated); |
0850f0f5 | 182 | break; |
ffea59e5 | 183 | case RES_FAILCNT: |
3e32cb2e | 184 | cg_proto->memory_allocated.failcnt = 0; |
ffea59e5 GC |
185 | break; |
186 | } | |
187 | ||
6770c64e | 188 | return nbytes; |
ffea59e5 GC |
189 | } |
190 | ||
676f7c8f TH |
191 | static struct cftype tcp_files[] = { |
192 | { | |
193 | .name = "kmem.tcp.limit_in_bytes", | |
451af504 | 194 | .write = tcp_cgroup_write, |
676f7c8f TH |
195 | .read_u64 = tcp_cgroup_read, |
196 | .private = RES_LIMIT, | |
197 | }, | |
198 | { | |
199 | .name = "kmem.tcp.usage_in_bytes", | |
200 | .read_u64 = tcp_cgroup_read, | |
201 | .private = RES_USAGE, | |
202 | }, | |
203 | { | |
204 | .name = "kmem.tcp.failcnt", | |
205 | .private = RES_FAILCNT, | |
6770c64e | 206 | .write = tcp_cgroup_reset, |
676f7c8f TH |
207 | .read_u64 = tcp_cgroup_read, |
208 | }, | |
209 | { | |
210 | .name = "kmem.tcp.max_usage_in_bytes", | |
211 | .private = RES_MAX_USAGE, | |
6770c64e | 212 | .write = tcp_cgroup_reset, |
676f7c8f TH |
213 | .read_u64 = tcp_cgroup_read, |
214 | }, | |
6bc10349 | 215 | { } /* terminate */ |
676f7c8f | 216 | }; |
6bc10349 TH |
217 | |
218 | static int __init tcp_memcontrol_init(void) | |
219 | { | |
2cf669a5 | 220 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); |
6bc10349 TH |
221 | return 0; |
222 | } | |
223 | __initcall(tcp_memcontrol_init); |