LCOV - coverage.lcov - tests/performance/perf_test

LCOV - code coverage report
Current view:	directory - tests/performance - perf_test_runner.cc (source / functions)		Found	Hit	Coverage
Test:	coverage.lcov	Lines:	120	119	99.2 %
Date:	2014-06-18	Functions:	0	0	-
       1                 : /*
       2                 :  * Copyright (c) 2012 The Native Client Authors. All rights reserved.
       3                 :  * Use of this source code is governed by a BSD-style license that can be
       4                 :  * found in the LICENSE file.
       5                 :  */
       6                 : 
       7                 : #include <limits.h>
       8                 : #include <math.h>
       9                 : #include <stdio.h>
      10                 : #include <time.h>
      11                 : 
      12                 : #include "native_client/src/include/nacl_assert.h"
      13                 : #include "native_client/src/include/nacl_macros.h"
      14                 : #include "native_client/tests/performance/perf_test_compat_osx.h"
      15                 : #include "native_client/tests/performance/perf_test_runner.h"
      16                 : 
      17                 : 
      18              66 : double TimeIterations(PerfTest *test, int iterations) {
      19              66 :   struct timespec start_time;
      20              66 :   struct timespec end_time;
      21             198 :   ASSERT_EQ(clock_gettime(CLOCK_MONOTONIC, &start_time), 0);
      22       371699232 :   for (int i = 0; i < iterations; i++) {
      23       185849550 :     test->run();
      24       185849550 :   }
      25             198 :   ASSERT_EQ(clock_gettime(CLOCK_MONOTONIC, &end_time), 0);
      26              66 :   double total_time =
      27                 :       (end_time.tv_sec - start_time.tv_sec
      28                 :        + (double) (end_time.tv_nsec - start_time.tv_nsec) / 1e9);
      29                 :   // Output the raw data.
      30              66 :   printf("  %.3f usec (%g sec) per iteration: %g sec for %i iterations\n",
      31                 :          total_time / iterations * 1e6,
      32                 :          total_time / iterations,
      33                 :          total_time, iterations);
      34              66 :   return total_time;
      35                 : }
      36                 : 
      37              10 : int CalibrateIterationCount(PerfTest *test, double target_time,
      38              10 :                             int sample_count) {
      39              10 :   int calibration_iterations = 100;
      40              10 :   double calibration_time;
      41              10 :   for (;;) {
      42              16 :     calibration_time = TimeIterations(test, calibration_iterations);
      43                 :     // If the test completed too quickly to get an accurate
      44                 :     // measurement, try a larger number of iterations.
      45              16 :     if (calibration_time >= 1e-5)
      46              10 :       break;
      47              18 :     ASSERT_LE(calibration_iterations, INT_MAX / 10);
      48               6 :     calibration_iterations *= 10;
      49               6 :   }
      50                 : 
      51              10 :   double iterations_d =
      52                 :       (target_time / (calibration_time / calibration_iterations)
      53                 :        / sample_count);
      54                 :   // Sanity checks for very fast or very slow tests.
      55              30 :   ASSERT_LE(iterations_d, INT_MAX);
      56              10 :   int iterations = iterations_d;
      57              10 :   if (iterations < 1)
      58               0 :     iterations = 1;
      59              10 :   return iterations;
      60                 : }
      61                 : 
      62              10 : void TimePerfTest(PerfTest *test, double *mean, double *stddev) {
      63                 :   // 'target_time' is the amount of time we aim to run this perf test
      64                 :   // for in total.
      65              10 :   double target_time = 0.5;  // seconds
      66                 :   // 'sample_count' is the number of separate timings we take in order
      67                 :   // to measure the variability of the results.
      68              10 :   int sample_count = 5;
      69              10 :   int iterations = CalibrateIterationCount(test, target_time, sample_count);
      70                 : 
      71              10 :   double sum = 0;
      72              10 :   double sum_of_squares = 0;
      73             120 :   for (int i = 0; i < sample_count; i++) {
      74              50 :     double time = TimeIterations(test, iterations) / iterations;
      75              50 :     sum += time;
      76              50 :     sum_of_squares += time * time;
      77              50 :   }
      78              10 :   *mean = sum / sample_count;
      79              10 :   *stddev = sqrt(sum_of_squares / sample_count - *mean * *mean);
      80              10 : }
      81                 : 
      82              10 : void PerfTestRealTime(const char *description_string, const char *test_name,
      83              10 :                       PerfTest *test, double *result_mean) {
      84              10 :   double mean;
      85              10 :   double stddev;
      86              10 :   printf("Measuring real time:\n");
      87              10 :   TimePerfTest(test, &mean, &stddev);
      88              10 :   printf("  mean:   %.6f usec\n", mean * 1e6);
      89              10 :   printf("  stddev: %.6f usec\n", stddev * 1e6);
      90              10 :   printf("  relative stddev: %.2f%%\n", stddev / mean * 100);
      91                 :   // Output the result in a format that Buildbot will recognise in the
      92                 :   // logs and record, using the Chromium perf testing infrastructure.
      93              10 :   printf("RESULT %s: %s= {%.6f, %.6f} us\n",
      94                 :          test_name, description_string, mean * 1e6, stddev * 1e6);
      95              10 :   *result_mean = mean;
      96              10 : }
      97                 : 
      98                 : #if defined(__i386__) || defined(__x86_64__)
      99                 : 
     100                 : static INLINE uint64_t ReadTimestampCounter() {
     101            2020 :   uint32_t edx;  // Top 32 bits of timestamp
     102            2020 :   uint32_t eax;  // Bottom 32 bits of timestamp
     103                 :   // NaCl's x86 validators don't allow rdtscp, so we can't check
     104                 :   // whether the thread has been moved to a different core.
     105            2020 :   __asm__ volatile("rdtsc" : "=d"(edx), "=a"(eax));
     106            2020 :   return (((uint64_t) edx) << 32) | eax;
     107                 : }
     108                 : 
     109            2647 : static int CompareUint64(const void *val1, const void *val2) {
     110            2647 :   uint64_t i1 = *(uint64_t *) val1;
     111            2647 :   uint64_t i2 = *(uint64_t *) val2;
     112            2647 :   if (i1 == i2)
     113             867 :     return 0;
     114            1780 :   return i1 < i2 ? -1 : 1;
     115            2647 : }
     116                 : 
     117              10 : void PerfTestCycleCount(const char *description_string, const char *test_name,
     118              10 :                         PerfTest *test, uint64_t *result_cycles) {
     119              10 :   printf("Measuring clock cycles:\n");
     120              10 :   uint64_t times[101];
     121            2040 :   for (size_t i = 0; i < NACL_ARRAY_SIZE(times); i++) {
     122            1010 :     uint64_t start_time = ReadTimestampCounter();
     123            1010 :     test->run();
     124            1010 :     uint64_t end_time = ReadTimestampCounter();
     125            1010 :     times[i] = end_time - start_time;
     126            1010 :   }
     127                 : 
     128                 :   // We expect the first run to be slower because caches won't be
     129                 :   // warm.  We print the first and slowest runs so that we can verify
     130                 :   // this.
     131              10 :   printf("  first runs (cycles):   ");
     132             220 :   for (size_t i = 0; i < 10; i++)
     133             100 :     printf(" %" PRId64, times[i]);
     134              10 :   printf(" ...\n");
     135                 : 
     136              10 :   qsort(times, NACL_ARRAY_SIZE(times), sizeof(times[0]), CompareUint64);
     137                 : 
     138              10 :   printf("  slowest runs (cycles):  ...");
     139             220 :   for (size_t i = NACL_ARRAY_SIZE(times) - 10; i < NACL_ARRAY_SIZE(times); i++)
     140             100 :     printf(" %" PRId64, times[i]);
     141              10 :   printf("\n");
     142                 : 
     143              10 :   int count = NACL_ARRAY_SIZE(times) - 1;
     144              10 :   uint64_t q1 = times[count * 1 / 4];  // First quartile
     145              10 :   uint64_t q2 = times[count * 1 / 2];  // Median
     146              10 :   uint64_t q3 = times[count * 3 / 4];  // Third quartile
     147              10 :   printf("  min:     %" PRId64 " cycles\n", times[0]);
     148              10 :   printf("  q1:      %" PRId64 " cycles\n", q1);
     149              10 :   printf("  median:  %" PRId64 " cycles\n", q2);
     150              10 :   printf("  q3:      %" PRId64 " cycles\n", q3);
     151              10 :   printf("  max:     %" PRId64 " cycles\n", times[count]);
     152                 :   // The "{...}" RESULT syntax usually means standard deviation but
     153                 :   // here we report the interquartile range.
     154              10 :   printf("RESULT %s_CycleCount: %s= {%" PRId64 ", %" PRId64 "} count\n",
     155                 :          test_name, description_string, q2, q3 - q1);
     156              10 :   *result_cycles = q2;
     157              10 : }
     158                 : 
     159                 : #endif
     160                 : 
     161              10 : void RunPerfTest(const char *description_string, const char *test_name,
     162              10 :                  PerfTest *test) {
     163              10 :   printf("\n%s:\n", test_name);
     164              10 :   double mean_time;
     165              10 :   PerfTestRealTime(description_string, test_name, test, &mean_time);
     166                 : #if defined(__i386__) || defined(__x86_64__)
     167              10 :   uint64_t cycles;
     168              10 :   PerfTestCycleCount(description_string, test_name, test, &cycles);
     169                 :   // The apparent clock speed can be used to sanity-check the results,
     170                 :   // e.g. to see whether the CPU is in power-saving mode.
     171              10 :   printf("Apparent clock speed: %.0f MHz\n", cycles / mean_time / 1e6);
     172                 : #endif
     173              20 :   delete test;
     174              10 : }
     175                 : 
     176               1 : int main(int argc, char **argv) {
     177               3 :   const char *description_string = argc >= 2 ? argv[1] : "time";
     178                 : 
     179                 :   // Turn off stdout buffering to aid debugging.
     180               1 :   setvbuf(stdout, NULL, _IONBF, 0);
     181                 : 
     182                 : #define RUN_TEST(class_name) \
     183                 :     extern PerfTest *Make##class_name(); \
     184                 :     RunPerfTest(description_string, #class_name, Make##class_name());
     185                 : 
     186               1 :   RUN_TEST(TestNull);
     187                 : #if defined(__native_client__)
     188                 :   RUN_TEST(TestNaClSyscall);
     189                 : #endif
     190                 : #if NACL_LINUX || NACL_OSX
     191               1 :   RUN_TEST(TestHostSyscall);
     192                 : #endif
     193               1 :   RUN_TEST(TestSetjmpLongjmp);
     194               1 :   RUN_TEST(TestClockGetTime);
     195                 : #if !NACL_OSX
     196                 :   RUN_TEST(TestTlsVariable);
     197                 : #endif
     198               1 :   RUN_TEST(TestMmapAnonymous);
     199               1 :   RUN_TEST(TestAtomicIncrement);
     200               1 :   RUN_TEST(TestUncontendedMutexLock);
     201               1 :   RUN_TEST(TestCondvarSignalNoOp);
     202               1 :   RUN_TEST(TestThreadCreateAndJoin);
     203               1 :   RUN_TEST(TestThreadWakeup);
     204                 : 
     205                 : #if defined(__native_client__)
     206                 :   // Test untrusted fault handling.  This should come last because, on
     207                 :   // Windows, registering a fault handler has a performance impact on
     208                 :   // thread creation and exit.  This is because when the Windows debug
     209                 :   // exception handler is attached to sel_ldr as a debugger, Windows
     210                 :   // suspends the whole sel_ldr process every time a thread is created
     211                 :   // or exits.
     212                 :   RUN_TEST(TestCatchingFault);
     213                 :   // Measure that overhead by running MakeTestThreadCreateAndJoin again.
     214                 :   RunPerfTest(description_string,
     215                 :               "TestThreadCreateAndJoinAfterSettingFaultHandler",
     216                 :               MakeTestThreadCreateAndJoin());
     217                 : #endif
     218                 : 
     219                 : #undef RUN_TEST
     220                 : 
     221               1 :   return 0;
     222                 : }
Generated by: LCOV version 1.7