1 : /*
2 : * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 : * Use of this source code is governed by a BSD-style license that can be
4 : * found in the LICENSE file.
5 : */
6 :
7 : #include <limits.h>
8 : #include <math.h>
9 : #include <stdio.h>
10 : #include <time.h>
11 :
12 : #include "native_client/src/include/nacl_assert.h"
13 : #include "native_client/src/include/nacl_macros.h"
14 : #include "native_client/tests/performance/perf_test_compat_osx.h"
15 : #include "native_client/tests/performance/perf_test_runner.h"
16 :
17 :
18 66 : double TimeIterations(PerfTest *test, int iterations) {
19 66 : struct timespec start_time;
20 66 : struct timespec end_time;
21 198 : ASSERT_EQ(clock_gettime(CLOCK_MONOTONIC, &start_time), 0);
22 371699232 : for (int i = 0; i < iterations; i++) {
23 185849550 : test->run();
24 185849550 : }
25 198 : ASSERT_EQ(clock_gettime(CLOCK_MONOTONIC, &end_time), 0);
26 66 : double total_time =
27 : (end_time.tv_sec - start_time.tv_sec
28 : + (double) (end_time.tv_nsec - start_time.tv_nsec) / 1e9);
29 : // Output the raw data.
30 66 : printf(" %.3f usec (%g sec) per iteration: %g sec for %i iterations\n",
31 : total_time / iterations * 1e6,
32 : total_time / iterations,
33 : total_time, iterations);
34 66 : return total_time;
35 : }
36 :
37 10 : int CalibrateIterationCount(PerfTest *test, double target_time,
38 10 : int sample_count) {
39 10 : int calibration_iterations = 100;
40 10 : double calibration_time;
41 10 : for (;;) {
42 16 : calibration_time = TimeIterations(test, calibration_iterations);
43 : // If the test completed too quickly to get an accurate
44 : // measurement, try a larger number of iterations.
45 16 : if (calibration_time >= 1e-5)
46 10 : break;
47 18 : ASSERT_LE(calibration_iterations, INT_MAX / 10);
48 6 : calibration_iterations *= 10;
49 6 : }
50 :
51 10 : double iterations_d =
52 : (target_time / (calibration_time / calibration_iterations)
53 : / sample_count);
54 : // Sanity checks for very fast or very slow tests.
55 30 : ASSERT_LE(iterations_d, INT_MAX);
56 10 : int iterations = iterations_d;
57 10 : if (iterations < 1)
58 0 : iterations = 1;
59 10 : return iterations;
60 : }
61 :
62 10 : void TimePerfTest(PerfTest *test, double *mean, double *stddev) {
63 : // 'target_time' is the amount of time we aim to run this perf test
64 : // for in total.
65 10 : double target_time = 0.5; // seconds
66 : // 'sample_count' is the number of separate timings we take in order
67 : // to measure the variability of the results.
68 10 : int sample_count = 5;
69 10 : int iterations = CalibrateIterationCount(test, target_time, sample_count);
70 :
71 10 : double sum = 0;
72 10 : double sum_of_squares = 0;
73 120 : for (int i = 0; i < sample_count; i++) {
74 50 : double time = TimeIterations(test, iterations) / iterations;
75 50 : sum += time;
76 50 : sum_of_squares += time * time;
77 50 : }
78 10 : *mean = sum / sample_count;
79 10 : *stddev = sqrt(sum_of_squares / sample_count - *mean * *mean);
80 10 : }
81 :
82 10 : void PerfTestRealTime(const char *description_string, const char *test_name,
83 10 : PerfTest *test, double *result_mean) {
84 10 : double mean;
85 10 : double stddev;
86 10 : printf("Measuring real time:\n");
87 10 : TimePerfTest(test, &mean, &stddev);
88 10 : printf(" mean: %.6f usec\n", mean * 1e6);
89 10 : printf(" stddev: %.6f usec\n", stddev * 1e6);
90 10 : printf(" relative stddev: %.2f%%\n", stddev / mean * 100);
91 : // Output the result in a format that Buildbot will recognise in the
92 : // logs and record, using the Chromium perf testing infrastructure.
93 10 : printf("RESULT %s: %s= {%.6f, %.6f} us\n",
94 : test_name, description_string, mean * 1e6, stddev * 1e6);
95 10 : *result_mean = mean;
96 10 : }
97 :
98 : #if defined(__i386__) || defined(__x86_64__)
99 :
100 : static INLINE uint64_t ReadTimestampCounter() {
101 2020 : uint32_t edx; // Top 32 bits of timestamp
102 2020 : uint32_t eax; // Bottom 32 bits of timestamp
103 : // NaCl's x86 validators don't allow rdtscp, so we can't check
104 : // whether the thread has been moved to a different core.
105 2020 : __asm__ volatile("rdtsc" : "=d"(edx), "=a"(eax));
106 2020 : return (((uint64_t) edx) << 32) | eax;
107 : }
108 :
109 2647 : static int CompareUint64(const void *val1, const void *val2) {
110 2647 : uint64_t i1 = *(uint64_t *) val1;
111 2647 : uint64_t i2 = *(uint64_t *) val2;
112 2647 : if (i1 == i2)
113 867 : return 0;
114 1780 : return i1 < i2 ? -1 : 1;
115 2647 : }
116 :
117 10 : void PerfTestCycleCount(const char *description_string, const char *test_name,
118 10 : PerfTest *test, uint64_t *result_cycles) {
119 10 : printf("Measuring clock cycles:\n");
120 10 : uint64_t times[101];
121 2040 : for (size_t i = 0; i < NACL_ARRAY_SIZE(times); i++) {
122 1010 : uint64_t start_time = ReadTimestampCounter();
123 1010 : test->run();
124 1010 : uint64_t end_time = ReadTimestampCounter();
125 1010 : times[i] = end_time - start_time;
126 1010 : }
127 :
128 : // We expect the first run to be slower because caches won't be
129 : // warm. We print the first and slowest runs so that we can verify
130 : // this.
131 10 : printf(" first runs (cycles): ");
132 220 : for (size_t i = 0; i < 10; i++)
133 100 : printf(" %" PRId64, times[i]);
134 10 : printf(" ...\n");
135 :
136 10 : qsort(times, NACL_ARRAY_SIZE(times), sizeof(times[0]), CompareUint64);
137 :
138 10 : printf(" slowest runs (cycles): ...");
139 220 : for (size_t i = NACL_ARRAY_SIZE(times) - 10; i < NACL_ARRAY_SIZE(times); i++)
140 100 : printf(" %" PRId64, times[i]);
141 10 : printf("\n");
142 :
143 10 : int count = NACL_ARRAY_SIZE(times) - 1;
144 10 : uint64_t q1 = times[count * 1 / 4]; // First quartile
145 10 : uint64_t q2 = times[count * 1 / 2]; // Median
146 10 : uint64_t q3 = times[count * 3 / 4]; // Third quartile
147 10 : printf(" min: %" PRId64 " cycles\n", times[0]);
148 10 : printf(" q1: %" PRId64 " cycles\n", q1);
149 10 : printf(" median: %" PRId64 " cycles\n", q2);
150 10 : printf(" q3: %" PRId64 " cycles\n", q3);
151 10 : printf(" max: %" PRId64 " cycles\n", times[count]);
152 : // The "{...}" RESULT syntax usually means standard deviation but
153 : // here we report the interquartile range.
154 10 : printf("RESULT %s_CycleCount: %s= {%" PRId64 ", %" PRId64 "} count\n",
155 : test_name, description_string, q2, q3 - q1);
156 10 : *result_cycles = q2;
157 10 : }
158 :
159 : #endif
160 :
161 10 : void RunPerfTest(const char *description_string, const char *test_name,
162 10 : PerfTest *test) {
163 10 : printf("\n%s:\n", test_name);
164 10 : double mean_time;
165 10 : PerfTestRealTime(description_string, test_name, test, &mean_time);
166 : #if defined(__i386__) || defined(__x86_64__)
167 10 : uint64_t cycles;
168 10 : PerfTestCycleCount(description_string, test_name, test, &cycles);
169 : // The apparent clock speed can be used to sanity-check the results,
170 : // e.g. to see whether the CPU is in power-saving mode.
171 10 : printf("Apparent clock speed: %.0f MHz\n", cycles / mean_time / 1e6);
172 : #endif
173 20 : delete test;
174 10 : }
175 :
176 1 : int main(int argc, char **argv) {
177 3 : const char *description_string = argc >= 2 ? argv[1] : "time";
178 :
179 : // Turn off stdout buffering to aid debugging.
180 1 : setvbuf(stdout, NULL, _IONBF, 0);
181 :
182 : #define RUN_TEST(class_name) \
183 : extern PerfTest *Make##class_name(); \
184 : RunPerfTest(description_string, #class_name, Make##class_name());
185 :
186 1 : RUN_TEST(TestNull);
187 : #if defined(__native_client__)
188 : RUN_TEST(TestNaClSyscall);
189 : #endif
190 : #if NACL_LINUX || NACL_OSX
191 1 : RUN_TEST(TestHostSyscall);
192 : #endif
193 1 : RUN_TEST(TestSetjmpLongjmp);
194 1 : RUN_TEST(TestClockGetTime);
195 : #if !NACL_OSX
196 : RUN_TEST(TestTlsVariable);
197 : #endif
198 1 : RUN_TEST(TestMmapAnonymous);
199 1 : RUN_TEST(TestAtomicIncrement);
200 1 : RUN_TEST(TestUncontendedMutexLock);
201 1 : RUN_TEST(TestCondvarSignalNoOp);
202 1 : RUN_TEST(TestThreadCreateAndJoin);
203 1 : RUN_TEST(TestThreadWakeup);
204 :
205 : #if defined(__native_client__)
206 : // Test untrusted fault handling. This should come last because, on
207 : // Windows, registering a fault handler has a performance impact on
208 : // thread creation and exit. This is because when the Windows debug
209 : // exception handler is attached to sel_ldr as a debugger, Windows
210 : // suspends the whole sel_ldr process every time a thread is created
211 : // or exits.
212 : RUN_TEST(TestCatchingFault);
213 : // Measure that overhead by running MakeTestThreadCreateAndJoin again.
214 : RunPerfTest(description_string,
215 : "TestThreadCreateAndJoinAfterSettingFaultHandler",
216 : MakeTestThreadCreateAndJoin());
217 : #endif
218 :
219 : #undef RUN_TEST
220 :
221 1 : return 0;
222 : }
|