Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
3aaabe23 GC |
9 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); |
10 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | |
11 | const char *buffer); | |
ffea59e5 | 12 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event); |
3aaabe23 GC |
13 | |
14 | static struct cftype tcp_files[] = { | |
15 | { | |
16 | .name = "kmem.tcp.limit_in_bytes", | |
17 | .write_string = tcp_cgroup_write, | |
18 | .read_u64 = tcp_cgroup_read, | |
19 | .private = RES_LIMIT, | |
20 | }, | |
5a6dd343 GC |
21 | { |
22 | .name = "kmem.tcp.usage_in_bytes", | |
23 | .read_u64 = tcp_cgroup_read, | |
24 | .private = RES_USAGE, | |
25 | }, | |
ffea59e5 GC |
26 | { |
27 | .name = "kmem.tcp.failcnt", | |
28 | .private = RES_FAILCNT, | |
29 | .trigger = tcp_cgroup_reset, | |
30 | .read_u64 = tcp_cgroup_read, | |
31 | }, | |
0850f0f5 GC |
32 | { |
33 | .name = "kmem.tcp.max_usage_in_bytes", | |
34 | .private = RES_MAX_USAGE, | |
35 | .trigger = tcp_cgroup_reset, | |
36 | .read_u64 = tcp_cgroup_read, | |
37 | }, | |
3aaabe23 GC |
38 | }; |
39 | ||
d1a4c0b3 GC |
40 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) |
41 | { | |
42 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | |
43 | } | |
44 | ||
45 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) | |
46 | { | |
c48e074c | 47 | if (sk->sk_cgrp->memory_pressure) |
d1a4c0b3 GC |
48 | *sk->sk_cgrp->memory_pressure = 1; |
49 | } | |
50 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | |
51 | ||
52 | int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |
53 | { | |
54 | /* | |
55 | * The root cgroup does not use res_counters, but rather, | |
56 | * rely on the data already collected by the network | |
57 | * subsystem | |
58 | */ | |
59 | struct res_counter *res_parent = NULL; | |
60 | struct cg_proto *cg_proto, *parent_cg; | |
61 | struct tcp_memcontrol *tcp; | |
62 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | |
63 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | |
3dc43e3e | 64 | struct net *net = current->nsproxy->net_ns; |
d1a4c0b3 GC |
65 | |
66 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
67 | if (!cg_proto) | |
3aaabe23 | 68 | goto create_files; |
d1a4c0b3 GC |
69 | |
70 | tcp = tcp_from_cgproto(cg_proto); | |
71 | ||
3dc43e3e GC |
72 | tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; |
73 | tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; | |
74 | tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; | |
d1a4c0b3 GC |
75 | tcp->tcp_memory_pressure = 0; |
76 | ||
77 | parent_cg = tcp_prot.proto_cgroup(parent); | |
78 | if (parent_cg) | |
79 | res_parent = parent_cg->memory_allocated; | |
80 | ||
81 | res_counter_init(&tcp->tcp_memory_allocated, res_parent); | |
82 | percpu_counter_init(&tcp->tcp_sockets_allocated, 0); | |
83 | ||
84 | cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; | |
85 | cg_proto->memory_pressure = &tcp->tcp_memory_pressure; | |
86 | cg_proto->sysctl_mem = tcp->tcp_prot_mem; | |
87 | cg_proto->memory_allocated = &tcp->tcp_memory_allocated; | |
88 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | |
89 | cg_proto->memcg = memcg; | |
90 | ||
3aaabe23 GC |
91 | create_files: |
92 | return cgroup_add_files(cgrp, ss, tcp_files, | |
93 | ARRAY_SIZE(tcp_files)); | |
d1a4c0b3 GC |
94 | } |
95 | EXPORT_SYMBOL(tcp_init_cgroup); | |
96 | ||
97 | void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |
98 | { | |
99 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | |
100 | struct cg_proto *cg_proto; | |
101 | struct tcp_memcontrol *tcp; | |
3aaabe23 | 102 | u64 val; |
d1a4c0b3 GC |
103 | |
104 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
105 | if (!cg_proto) | |
106 | return; | |
107 | ||
108 | tcp = tcp_from_cgproto(cg_proto); | |
109 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | |
3aaabe23 | 110 | |
1398eee0 | 111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); |
3aaabe23 GC |
112 | |
113 | if (val != RESOURCE_MAX) | |
c5905afb | 114 | static_key_slow_dec(&memcg_socket_limit_enabled); |
d1a4c0b3 GC |
115 | } |
116 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 GC |
117 | |
118 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |
119 | { | |
120 | struct net *net = current->nsproxy->net_ns; | |
121 | struct tcp_memcontrol *tcp; | |
122 | struct cg_proto *cg_proto; | |
123 | u64 old_lim; | |
124 | int i; | |
125 | int ret; | |
126 | ||
127 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
128 | if (!cg_proto) | |
129 | return -EINVAL; | |
130 | ||
131 | if (val > RESOURCE_MAX) | |
132 | val = RESOURCE_MAX; | |
133 | ||
134 | tcp = tcp_from_cgproto(cg_proto); | |
135 | ||
136 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | |
137 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | |
138 | if (ret) | |
139 | return ret; | |
140 | ||
141 | for (i = 0; i < 3; i++) | |
142 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | |
143 | net->ipv4.sysctl_tcp_mem[i]); | |
144 | ||
145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) | |
c5905afb | 146 | static_key_slow_dec(&memcg_socket_limit_enabled); |
3aaabe23 | 147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) |
c5905afb | 148 | static_key_slow_inc(&memcg_socket_limit_enabled); |
3aaabe23 GC |
149 | |
150 | return 0; | |
151 | } | |
152 | ||
153 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | |
154 | const char *buffer) | |
155 | { | |
156 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | |
157 | unsigned long long val; | |
158 | int ret = 0; | |
159 | ||
160 | switch (cft->private) { | |
161 | case RES_LIMIT: | |
162 | /* see memcontrol.c */ | |
163 | ret = res_counter_memparse_write_strategy(buffer, &val); | |
164 | if (ret) | |
165 | break; | |
166 | ret = tcp_update_limit(memcg, val); | |
167 | break; | |
168 | default: | |
169 | ret = -EINVAL; | |
170 | break; | |
171 | } | |
172 | return ret; | |
173 | } | |
174 | ||
175 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | |
176 | { | |
177 | struct tcp_memcontrol *tcp; | |
178 | struct cg_proto *cg_proto; | |
179 | ||
180 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
181 | if (!cg_proto) | |
182 | return default_val; | |
183 | ||
184 | tcp = tcp_from_cgproto(cg_proto); | |
185 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | |
186 | } | |
187 | ||
5a6dd343 GC |
188 | static u64 tcp_read_usage(struct mem_cgroup *memcg) |
189 | { | |
190 | struct tcp_memcontrol *tcp; | |
191 | struct cg_proto *cg_proto; | |
192 | ||
193 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
194 | if (!cg_proto) | |
195 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | |
196 | ||
197 | tcp = tcp_from_cgproto(cg_proto); | |
198 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | |
199 | } | |
200 | ||
3aaabe23 GC |
201 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) |
202 | { | |
203 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | |
204 | u64 val; | |
205 | ||
206 | switch (cft->private) { | |
207 | case RES_LIMIT: | |
208 | val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); | |
209 | break; | |
5a6dd343 GC |
210 | case RES_USAGE: |
211 | val = tcp_read_usage(memcg); | |
212 | break; | |
ffea59e5 | 213 | case RES_FAILCNT: |
0850f0f5 GC |
214 | case RES_MAX_USAGE: |
215 | val = tcp_read_stat(memcg, cft->private, 0); | |
ffea59e5 | 216 | break; |
3aaabe23 GC |
217 | default: |
218 | BUG(); | |
219 | } | |
220 | return val; | |
221 | } | |
222 | ||
ffea59e5 GC |
223 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) |
224 | { | |
225 | struct mem_cgroup *memcg; | |
226 | struct tcp_memcontrol *tcp; | |
227 | struct cg_proto *cg_proto; | |
228 | ||
229 | memcg = mem_cgroup_from_cont(cont); | |
230 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
231 | if (!cg_proto) | |
232 | return 0; | |
233 | tcp = tcp_from_cgproto(cg_proto); | |
234 | ||
235 | switch (event) { | |
0850f0f5 GC |
236 | case RES_MAX_USAGE: |
237 | res_counter_reset_max(&tcp->tcp_memory_allocated); | |
238 | break; | |
ffea59e5 GC |
239 | case RES_FAILCNT: |
240 | res_counter_reset_failcnt(&tcp->tcp_memory_allocated); | |
241 | break; | |
242 | } | |
243 | ||
244 | return 0; | |
245 | } | |
246 | ||
3aaabe23 GC |
247 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) |
248 | { | |
249 | struct tcp_memcontrol *tcp; | |
250 | struct cg_proto *cg_proto; | |
251 | ||
252 | cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); | |
253 | if (!cg_proto) | |
254 | return 0; | |
255 | ||
256 | tcp = tcp_from_cgproto(cg_proto); | |
257 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | |
258 | } | |
259 | ||
260 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) | |
261 | { | |
262 | struct tcp_memcontrol *tcp; | |
263 | struct cg_proto *cg_proto; | |
264 | ||
265 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
266 | if (!cg_proto) | |
267 | return; | |
268 | ||
269 | tcp = tcp_from_cgproto(cg_proto); | |
270 | ||
271 | tcp->tcp_prot_mem[idx] = val; | |
272 | } |