Commit | Line | Data |
---|---|---|
be3de80d JB |
1 | /* |
2 | * mem-memset.c | |
3 | * | |
4 | * memset: Simple memory set in various ways | |
5 | * | |
6 | * Trivial clone of mem-memcpy.c. | |
7 | */ | |
be3de80d JB |
8 | |
9 | #include "../perf.h" | |
10 | #include "../util/util.h" | |
11 | #include "../util/parse-options.h" | |
12 | #include "../util/header.h" | |
13 | #include "bench.h" | |
14 | #include "mem-memset-arch.h" | |
15 | ||
16 | #include <stdio.h> | |
17 | #include <stdlib.h> | |
18 | #include <string.h> | |
19 | #include <sys/time.h> | |
20 | #include <errno.h> | |
21 | ||
22 | #define K 1024 | |
23 | ||
24 | static const char *length_str = "1MB"; | |
25 | static const char *routine = "default"; | |
e3e877e7 | 26 | static int iterations = 1; |
17d7a112 HM |
27 | static bool use_cycle; |
28 | static int cycle_fd; | |
be3de80d JB |
29 | static bool only_prefault; |
30 | static bool no_prefault; | |
31 | ||
32 | static const struct option options[] = { | |
33 | OPT_STRING('l', "length", &length_str, "1MB", | |
08942f6d NK |
34 | "Specify length of memory to set. " |
35 | "Available units: B, KB, MB, GB and TB (upper and lower)"), | |
be3de80d | 36 | OPT_STRING('r', "routine", &routine, "default", |
08942f6d | 37 | "Specify routine to set"), |
e3e877e7 JB |
38 | OPT_INTEGER('i', "iterations", &iterations, |
39 | "repeat memset() invocation this number of times"), | |
17d7a112 | 40 | OPT_BOOLEAN('c', "cycle", &use_cycle, |
08942f6d | 41 | "Use cycles event instead of gettimeofday() for measuring"), |
be3de80d JB |
42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
43 | "Show only the result with page faults before memset()"), | |
44 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | |
45 | "Show only the result without page faults before memset()"), | |
46 | OPT_END() | |
47 | }; | |
48 | ||
49 | typedef void *(*memset_t)(void *, int, size_t); | |
50 | ||
51 | struct routine { | |
52 | const char *name; | |
53 | const char *desc; | |
54 | memset_t fn; | |
55 | }; | |
56 | ||
57 | static const struct routine routines[] = { | |
58 | { "default", | |
59 | "Default memset() provided by glibc", | |
60 | memset }, | |
89fe808a | 61 | #ifdef HAVE_ARCH_X86_64_SUPPORT |
be3de80d JB |
62 | |
63 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | |
64 | #include "mem-memset-x86-64-asm-def.h" | |
65 | #undef MEMSET_FN | |
66 | ||
67 | #endif | |
68 | ||
69 | { NULL, | |
70 | NULL, | |
71 | NULL } | |
72 | }; | |
73 | ||
74 | static const char * const bench_mem_memset_usage[] = { | |
75 | "perf bench mem memset <options>", | |
76 | NULL | |
77 | }; | |
78 | ||
17d7a112 | 79 | static struct perf_event_attr cycle_attr = { |
be3de80d JB |
80 | .type = PERF_TYPE_HARDWARE, |
81 | .config = PERF_COUNT_HW_CPU_CYCLES | |
82 | }; | |
83 | ||
17d7a112 | 84 | static void init_cycle(void) |
be3de80d | 85 | { |
17d7a112 | 86 | cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); |
be3de80d | 87 | |
17d7a112 | 88 | if (cycle_fd < 0 && errno == ENOSYS) |
be3de80d JB |
89 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
90 | else | |
17d7a112 | 91 | BUG_ON(cycle_fd < 0); |
be3de80d JB |
92 | } |
93 | ||
17d7a112 | 94 | static u64 get_cycle(void) |
be3de80d JB |
95 | { |
96 | int ret; | |
97 | u64 clk; | |
98 | ||
17d7a112 | 99 | ret = read(cycle_fd, &clk, sizeof(u64)); |
be3de80d JB |
100 | BUG_ON(ret != sizeof(u64)); |
101 | ||
102 | return clk; | |
103 | } | |
104 | ||
105 | static double timeval2double(struct timeval *ts) | |
106 | { | |
107 | return (double)ts->tv_sec + | |
108 | (double)ts->tv_usec / (double)1000000; | |
109 | } | |
110 | ||
111 | static void alloc_mem(void **dst, size_t length) | |
112 | { | |
113 | *dst = zalloc(length); | |
13966721 | 114 | if (!*dst) |
be3de80d JB |
115 | die("memory allocation failed - maybe length is too large?\n"); |
116 | } | |
117 | ||
17d7a112 | 118 | static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) |
be3de80d | 119 | { |
17d7a112 | 120 | u64 cycle_start = 0ULL, cycle_end = 0ULL; |
be3de80d | 121 | void *dst = NULL; |
e3e877e7 | 122 | int i; |
be3de80d JB |
123 | |
124 | alloc_mem(&dst, len); | |
125 | ||
126 | if (prefault) | |
127 | fn(dst, -1, len); | |
128 | ||
17d7a112 | 129 | cycle_start = get_cycle(); |
e3e877e7 JB |
130 | for (i = 0; i < iterations; ++i) |
131 | fn(dst, i, len); | |
17d7a112 | 132 | cycle_end = get_cycle(); |
be3de80d JB |
133 | |
134 | free(dst); | |
17d7a112 | 135 | return cycle_end - cycle_start; |
be3de80d JB |
136 | } |
137 | ||
138 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | |
139 | { | |
140 | struct timeval tv_start, tv_end, tv_diff; | |
141 | void *dst = NULL; | |
e3e877e7 | 142 | int i; |
be3de80d JB |
143 | |
144 | alloc_mem(&dst, len); | |
145 | ||
146 | if (prefault) | |
147 | fn(dst, -1, len); | |
148 | ||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | |
e3e877e7 JB |
150 | for (i = 0; i < iterations; ++i) |
151 | fn(dst, i, len); | |
be3de80d JB |
152 | BUG_ON(gettimeofday(&tv_end, NULL)); |
153 | ||
154 | timersub(&tv_end, &tv_start, &tv_diff); | |
155 | ||
156 | free(dst); | |
157 | return (double)((double)len / timeval2double(&tv_diff)); | |
158 | } | |
159 | ||
160 | #define pf (no_prefault ? 0 : 1) | |
161 | ||
162 | #define print_bps(x) do { \ | |
163 | if (x < K) \ | |
164 | printf(" %14lf B/Sec", x); \ | |
165 | else if (x < K * K) \ | |
166 | printf(" %14lfd KB/Sec", x / K); \ | |
167 | else if (x < K * K * K) \ | |
168 | printf(" %14lf MB/Sec", x / K / K); \ | |
169 | else \ | |
170 | printf(" %14lf GB/Sec", x / K / K / K); \ | |
171 | } while (0) | |
172 | ||
173 | int bench_mem_memset(int argc, const char **argv, | |
1d037ca1 | 174 | const char *prefix __maybe_unused) |
be3de80d JB |
175 | { |
176 | int i; | |
177 | size_t len; | |
178 | double result_bps[2]; | |
17d7a112 | 179 | u64 result_cycle[2]; |
be3de80d JB |
180 | |
181 | argc = parse_options(argc, argv, options, | |
182 | bench_mem_memset_usage, 0); | |
183 | ||
17d7a112 HM |
184 | if (use_cycle) |
185 | init_cycle(); | |
be3de80d JB |
186 | |
187 | len = (size_t)perf_atoll((char *)length_str); | |
188 | ||
17d7a112 | 189 | result_cycle[0] = result_cycle[1] = 0ULL; |
be3de80d JB |
190 | result_bps[0] = result_bps[1] = 0.0; |
191 | ||
192 | if ((s64)len <= 0) { | |
193 | fprintf(stderr, "Invalid length:%s\n", length_str); | |
194 | return 1; | |
195 | } | |
196 | ||
197 | /* same to without specifying either of prefault and no-prefault */ | |
198 | if (only_prefault && no_prefault) | |
199 | only_prefault = no_prefault = false; | |
200 | ||
201 | for (i = 0; routines[i].name; i++) { | |
202 | if (!strcmp(routines[i].name, routine)) | |
203 | break; | |
204 | } | |
205 | if (!routines[i].name) { | |
206 | printf("Unknown routine:%s\n", routine); | |
207 | printf("Available routines...\n"); | |
208 | for (i = 0; routines[i].name; i++) { | |
209 | printf("\t%s ... %s\n", | |
210 | routines[i].name, routines[i].desc); | |
211 | } | |
212 | return 1; | |
213 | } | |
214 | ||
215 | if (bench_format == BENCH_FORMAT_DEFAULT) | |
216 | printf("# Copying %s Bytes ...\n\n", length_str); | |
217 | ||
218 | if (!only_prefault && !no_prefault) { | |
219 | /* show both of results */ | |
17d7a112 HM |
220 | if (use_cycle) { |
221 | result_cycle[0] = | |
222 | do_memset_cycle(routines[i].fn, len, false); | |
223 | result_cycle[1] = | |
224 | do_memset_cycle(routines[i].fn, len, true); | |
be3de80d JB |
225 | } else { |
226 | result_bps[0] = | |
227 | do_memset_gettimeofday(routines[i].fn, | |
228 | len, false); | |
229 | result_bps[1] = | |
230 | do_memset_gettimeofday(routines[i].fn, | |
231 | len, true); | |
232 | } | |
233 | } else { | |
17d7a112 HM |
234 | if (use_cycle) { |
235 | result_cycle[pf] = | |
236 | do_memset_cycle(routines[i].fn, | |
be3de80d JB |
237 | len, only_prefault); |
238 | } else { | |
239 | result_bps[pf] = | |
240 | do_memset_gettimeofday(routines[i].fn, | |
241 | len, only_prefault); | |
242 | } | |
243 | } | |
244 | ||
245 | switch (bench_format) { | |
246 | case BENCH_FORMAT_DEFAULT: | |
247 | if (!only_prefault && !no_prefault) { | |
17d7a112 HM |
248 | if (use_cycle) { |
249 | printf(" %14lf Cycle/Byte\n", | |
250 | (double)result_cycle[0] | |
be3de80d | 251 | / (double)len); |
17d7a112 HM |
252 | printf(" %14lf Cycle/Byte (with prefault)\n ", |
253 | (double)result_cycle[1] | |
be3de80d JB |
254 | / (double)len); |
255 | } else { | |
256 | print_bps(result_bps[0]); | |
257 | printf("\n"); | |
258 | print_bps(result_bps[1]); | |
259 | printf(" (with prefault)\n"); | |
260 | } | |
261 | } else { | |
17d7a112 HM |
262 | if (use_cycle) { |
263 | printf(" %14lf Cycle/Byte", | |
264 | (double)result_cycle[pf] | |
be3de80d JB |
265 | / (double)len); |
266 | } else | |
267 | print_bps(result_bps[pf]); | |
268 | ||
269 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | |
270 | } | |
271 | break; | |
272 | case BENCH_FORMAT_SIMPLE: | |
273 | if (!only_prefault && !no_prefault) { | |
17d7a112 | 274 | if (use_cycle) { |
be3de80d | 275 | printf("%lf %lf\n", |
17d7a112 HM |
276 | (double)result_cycle[0] / (double)len, |
277 | (double)result_cycle[1] / (double)len); | |
be3de80d JB |
278 | } else { |
279 | printf("%lf %lf\n", | |
280 | result_bps[0], result_bps[1]); | |
281 | } | |
282 | } else { | |
17d7a112 HM |
283 | if (use_cycle) { |
284 | printf("%lf\n", (double)result_cycle[pf] | |
be3de80d JB |
285 | / (double)len); |
286 | } else | |
287 | printf("%lf\n", result_bps[pf]); | |
288 | } | |
289 | break; | |
290 | default: | |
291 | /* reaching this means there's some disaster: */ | |
292 | die("unknown format: %d\n", bench_format); | |
293 | break; | |
294 | } | |
295 | ||
296 | return 0; | |
297 | } |